Convert CSV to JSON using python with new headers
Question:
I’m new to python and want to convert the following csv file
1.0,100.0,303.0,619.0,figure
338.0,162.0,143.0,423.0,text
85.0,768.0,554.0,39.0,text
504.0,164.0,24.0,238.0,subtitle
120.0,727.0,182.0,13.0,caption
540.0,165.0,62.0,428.0,title
614.0,163.0,23.0,133.0,tagline
317.0,629.0,113.0,113.0,figure
443.0,629.0,112.0,113.0,figure
568.0,628.0,121.0,114.0,figure
into this format
{
"record_01": {
"x": "1.0",
"y": "100.0",
"width": "303.0",
"height": "619.0",
"tag": "figure"
},
"record_02": {
"x": "338.0",
"y": "162.0",
"width": "143.0",
"height": "423.0",
"tag": "text"
},
"record_03": {
"x": "85.0",
"y": "768.0",
"width": "554.0",
"height": "39.0",
"tag": "text"
}, .... and so on }
This is the current code
import csv
import json
def convert_json(csvPath, jsonPath):
fieldnames = ["x", "y", "width", "height", "tag"]
with open(csvPath, "r", encoding="utf-8") as csvFile:
csvReader = csv.DictReader(csvFile, fieldnames)
data = []
for rows in csvReader:
data.append(rows)
with open(jsonPath, "w", encoding="utf-8") as jsonFile:
jsonFile.write(json.dumps(data, indent=4))
and the output look like this
[
{
"x": "1.0",
"y": "100.0",
"width": "303.0",
"height": "619.0",
"tag": "figure"
},
{
"x": "338.0",
"y": "162.0",
"width": "143.0",
"height": "423.0",
"tag": "text"
},
{
"x": "85.0",
"y": "768.0",
"width": "554.0",
"height": "39.0",
"tag": "text"
}, ..... ]
How do I make sure the json file is in curly bracket instead of ‘[ ]’ and add record with number for each entry?
I tried using data={}
but it didn’t work with data.append(rows)
Edit: Thanks to the solution and explanation provided by Antonio, I changed my code and it output the expected result.
import csv
import json
fieldnames = ["x", "y", "width", "height", "tag"]
def convert_json(csvPath, jsonPath):
with open(csvPath, "r", encoding="utf-8") as csvFile:
csvReader = csv.DictReader(csvFile, fieldnames)
data = {}
for record, rows in enumerate(csvReader, start=1):
data.update({"record_{:02d}".format(record): rows})
with open(jsonPath, "w", encoding="utf-8") as jsonFile:
json.dump(data, jsonFile, indent=4)
csvPath = "data.csv"
jsonPath = "data.json"
convert_json(csvPath, jsonPath)
Answers:
You were creating a list while you need to use dictionary. You must declare a dictionary before adding your elements or you could also use dict comprehension to create your dictionary in a more pythonic way. To create your records number you can format integer numbers with zero padding. To get the current record number you can use enumerate(item)
.
import csv
import json
def convert_json(csvPath, jsonPath, fieldnames):
with open(csvPath, "r", encoding="utf-8") as csvFile:
csvReader = csv.DictReader(csvFile, fieldnames)
data = {}
for record, rows in enumerate(csvReader):
data.update({"record_{:02d}".format(record): rows})
with open(jsonPath, "w", encoding="utf-8") as jsonFile:
json.dump(data, jsonFile, indent=4)
convert_json("./data.csv", "json_file.json", ["x", "y", "width", "height", "tag"])
Edit:
Version with dict comprehension:
import csv
import json
def convert_json(csvPath, jsonPath, fieldnames):
with open(csvPath, "r", encoding="utf-8") as csvFile:
csvReader = csv.DictReader(csvFile, fieldnames)
data = {"record_{:02d}".format(record): rows for record, rows in enumerate(csvReader)}
with open(jsonPath, "w", encoding="utf-8") as jsonFile:
json.dump(data, jsonFile, indent=4)
convert_json("./data.csv", "json_file.json", ["x", "y", "width", "height", "tag"])
If your data set is small or fit in the memory, it can be done more easily with pandas as follows:
from pathlib import Path
import pandas as pd
DATA_PATH = Path("data").joinpath("data.csv")
WRITE_PATH = Path("data").joinpath("data.json")
COL_SCHEMA = ["x", "y", "width", "height", "tag"]
df = pd.read_csv(DATA_PATH, header=None)
df.columns = COL_SCHEMA
df["id"] = "record_" + df.index.astype(str)
df = df.set_index("id")
df.to_json(WRITE_PATH, orient="index", indent=2)
Here I have the same data in the data
directory in a csv file. To make the functions platform-agnostic, I have used pathlib
now that code can be run in Windows/Unix/Linux with no change. Then I have loaded the data into a data frame and added a new ID column into it. Then I have set that ID column as index of the data frame. I have written the data back into the same directory with proper orientation and for JSON I have use indent = 2
just for better beautification.
Though, you have to install pandas for that with pip install pandas
command.
I’m new to python and want to convert the following csv file
1.0,100.0,303.0,619.0,figure
338.0,162.0,143.0,423.0,text
85.0,768.0,554.0,39.0,text
504.0,164.0,24.0,238.0,subtitle
120.0,727.0,182.0,13.0,caption
540.0,165.0,62.0,428.0,title
614.0,163.0,23.0,133.0,tagline
317.0,629.0,113.0,113.0,figure
443.0,629.0,112.0,113.0,figure
568.0,628.0,121.0,114.0,figure
into this format
{
"record_01": {
"x": "1.0",
"y": "100.0",
"width": "303.0",
"height": "619.0",
"tag": "figure"
},
"record_02": {
"x": "338.0",
"y": "162.0",
"width": "143.0",
"height": "423.0",
"tag": "text"
},
"record_03": {
"x": "85.0",
"y": "768.0",
"width": "554.0",
"height": "39.0",
"tag": "text"
}, .... and so on }
This is the current code
import csv
import json
def convert_json(csvPath, jsonPath):
fieldnames = ["x", "y", "width", "height", "tag"]
with open(csvPath, "r", encoding="utf-8") as csvFile:
csvReader = csv.DictReader(csvFile, fieldnames)
data = []
for rows in csvReader:
data.append(rows)
with open(jsonPath, "w", encoding="utf-8") as jsonFile:
jsonFile.write(json.dumps(data, indent=4))
and the output look like this
[
{
"x": "1.0",
"y": "100.0",
"width": "303.0",
"height": "619.0",
"tag": "figure"
},
{
"x": "338.0",
"y": "162.0",
"width": "143.0",
"height": "423.0",
"tag": "text"
},
{
"x": "85.0",
"y": "768.0",
"width": "554.0",
"height": "39.0",
"tag": "text"
}, ..... ]
How do I make sure the json file is in curly bracket instead of ‘[ ]’ and add record with number for each entry?
I tried using data={}
but it didn’t work with data.append(rows)
Edit: Thanks to the solution and explanation provided by Antonio, I changed my code and it output the expected result.
import csv
import json
fieldnames = ["x", "y", "width", "height", "tag"]
def convert_json(csvPath, jsonPath):
with open(csvPath, "r", encoding="utf-8") as csvFile:
csvReader = csv.DictReader(csvFile, fieldnames)
data = {}
for record, rows in enumerate(csvReader, start=1):
data.update({"record_{:02d}".format(record): rows})
with open(jsonPath, "w", encoding="utf-8") as jsonFile:
json.dump(data, jsonFile, indent=4)
csvPath = "data.csv"
jsonPath = "data.json"
convert_json(csvPath, jsonPath)
You were creating a list while you need to use dictionary. You must declare a dictionary before adding your elements or you could also use dict comprehension to create your dictionary in a more pythonic way. To create your records number you can format integer numbers with zero padding. To get the current record number you can use enumerate(item)
.
import csv
import json
def convert_json(csvPath, jsonPath, fieldnames):
with open(csvPath, "r", encoding="utf-8") as csvFile:
csvReader = csv.DictReader(csvFile, fieldnames)
data = {}
for record, rows in enumerate(csvReader):
data.update({"record_{:02d}".format(record): rows})
with open(jsonPath, "w", encoding="utf-8") as jsonFile:
json.dump(data, jsonFile, indent=4)
convert_json("./data.csv", "json_file.json", ["x", "y", "width", "height", "tag"])
Edit:
Version with dict comprehension:
import csv
import json
def convert_json(csvPath, jsonPath, fieldnames):
with open(csvPath, "r", encoding="utf-8") as csvFile:
csvReader = csv.DictReader(csvFile, fieldnames)
data = {"record_{:02d}".format(record): rows for record, rows in enumerate(csvReader)}
with open(jsonPath, "w", encoding="utf-8") as jsonFile:
json.dump(data, jsonFile, indent=4)
convert_json("./data.csv", "json_file.json", ["x", "y", "width", "height", "tag"])
If your data set is small or fit in the memory, it can be done more easily with pandas as follows:
from pathlib import Path
import pandas as pd
DATA_PATH = Path("data").joinpath("data.csv")
WRITE_PATH = Path("data").joinpath("data.json")
COL_SCHEMA = ["x", "y", "width", "height", "tag"]
df = pd.read_csv(DATA_PATH, header=None)
df.columns = COL_SCHEMA
df["id"] = "record_" + df.index.astype(str)
df = df.set_index("id")
df.to_json(WRITE_PATH, orient="index", indent=2)
Here I have the same data in the data
directory in a csv file. To make the functions platform-agnostic, I have used pathlib
now that code can be run in Windows/Unix/Linux with no change. Then I have loaded the data into a data frame and added a new ID column into it. Then I have set that ID column as index of the data frame. I have written the data back into the same directory with proper orientation and for JSON I have use indent = 2
just for better beautification.
Though, you have to install pandas for that with pip install pandas
command.