Convert json with data from each id in different lines into one line per id with python
Question:
I have a json file with the following format:
{
"responses":[
{
"id":"123",
"cid":"01A",
"response":{nested lists and dictionaries}
},
{
"id":"456",
"cid":"54G",
"response":{nested lists and dictionaries}
}
]}
And so on.
And I want to convert it into a json file like this:
{"id":"123", "cid":"01A", "response":{nested lists and dictionaries}},
{"id":"456", "cid":"54G", "response":{nested lists and dictionaries}}
or
{responses:[
{"id":"123", "cid":"01A", "response":{nested lists and dictionaries}},
{"id":"456", "cid":"54G", "response":{nested lists and dictionaries}}
]}
I don’t care about the surrounding format as long as I have the information for each ID in just one line.
I have to do this while reading it because things like pd.read_json
don’t read this kind of file.
Thanks!
Answers:
You can use the built-in json
library to print each response on a separate line. The json.dump() function has an option to indent, if you want that, but its default is to put everything on one line, like what you want.
Here’s an example that works for the input you showed in your post.
#!/usr/bin/env python3
import json
import sys
with open(sys.argv[1]) as json_file:
obj = json.load(json_file)
print("{responses:[")
for response in obj['responses']:
print(json.dumps(response))
print("]}")
Usage (assuming you named the program format_json.py
):
$ chmod +x format_json.py
$ format_json.py my_json_input.json > my_json_output.json
Or, if you’re not in a command-line environment, you can also hardcode the input and output filenames:
#!/usr/bin/env python3
import json
import sys
infile = 'my_json_input.json'
outfile = 'my_json_output.json'
with open(infile) as json_file:
obj = json.load(json_file)
print("{responses:[", file=outfile)
for response in obj['responses']:
print(json.dumps(response), file=outfile)
print("]}", file=outfile)
Maybe just dump it line wise? But I guess I didn’t understand your question right?
import json
input_lines = {"responses": ...}
with open("output.json", "w") as f:
for line in input_lines["responses"]:
f.write(json.dumps(line) + "n")
You can able to modified your custom structure of json like below image, and how to remove the last duplicate record in json file
import json
class MyListLength(list):
def last_index(self):
return len(self)-1
# Read in the full list of addresses
with open('full_list.json', 'r') as f:
full_list = json.load(f)
# Read in the new list of addresses
with open('newlist.json', 'r') as s:
new_list = json.load(s)
# Create a set of the addresses in the full list
full_set = set([x['address'] for x in full_list])
# Create a list of unique addresses in the new list
fresh_list = [x for x in new_list if x['address'] not in full_set]
fresh_list_set = set([x['address'] for x in fresh_list])
j = []
for f in fresh_list_set:
j.append({ "address" : f})
uniqueJ = j
filename = "fresh_list"
desired_dir = filename +".json"
myObjLen=MyListLength(uniqueJ)
with open(desired_dir, "w") as f:
for index, joining in enumerate(uniqueJ):
if(index==0):
f.write("[ n")
k = json.dumps(joining)
f.write(str(k).replace('{"address":',' { "address":').replace('}',' }') + ",n")
if(index == myObjLen.last_index()):
f.write(str(k).replace('{"address":',' { "address":').replace('}',' }') + "n ]")
I have a json file with the following format:
{
"responses":[
{
"id":"123",
"cid":"01A",
"response":{nested lists and dictionaries}
},
{
"id":"456",
"cid":"54G",
"response":{nested lists and dictionaries}
}
]}
And so on.
And I want to convert it into a json file like this:
{"id":"123", "cid":"01A", "response":{nested lists and dictionaries}},
{"id":"456", "cid":"54G", "response":{nested lists and dictionaries}}
or
{responses:[
{"id":"123", "cid":"01A", "response":{nested lists and dictionaries}},
{"id":"456", "cid":"54G", "response":{nested lists and dictionaries}}
]}
I don’t care about the surrounding format as long as I have the information for each ID in just one line.
I have to do this while reading it because things like pd.read_json
don’t read this kind of file.
Thanks!
You can use the built-in json
library to print each response on a separate line. The json.dump() function has an option to indent, if you want that, but its default is to put everything on one line, like what you want.
Here’s an example that works for the input you showed in your post.
#!/usr/bin/env python3
import json
import sys
with open(sys.argv[1]) as json_file:
obj = json.load(json_file)
print("{responses:[")
for response in obj['responses']:
print(json.dumps(response))
print("]}")
Usage (assuming you named the program format_json.py
):
$ chmod +x format_json.py
$ format_json.py my_json_input.json > my_json_output.json
Or, if you’re not in a command-line environment, you can also hardcode the input and output filenames:
#!/usr/bin/env python3
import json
import sys
infile = 'my_json_input.json'
outfile = 'my_json_output.json'
with open(infile) as json_file:
obj = json.load(json_file)
print("{responses:[", file=outfile)
for response in obj['responses']:
print(json.dumps(response), file=outfile)
print("]}", file=outfile)
Maybe just dump it line wise? But I guess I didn’t understand your question right?
import json
input_lines = {"responses": ...}
with open("output.json", "w") as f:
for line in input_lines["responses"]:
f.write(json.dumps(line) + "n")
You can able to modified your custom structure of json like below image, and how to remove the last duplicate record in json file
import json
class MyListLength(list):
def last_index(self):
return len(self)-1
# Read in the full list of addresses
with open('full_list.json', 'r') as f:
full_list = json.load(f)
# Read in the new list of addresses
with open('newlist.json', 'r') as s:
new_list = json.load(s)
# Create a set of the addresses in the full list
full_set = set([x['address'] for x in full_list])
# Create a list of unique addresses in the new list
fresh_list = [x for x in new_list if x['address'] not in full_set]
fresh_list_set = set([x['address'] for x in fresh_list])
j = []
for f in fresh_list_set:
j.append({ "address" : f})
uniqueJ = j
filename = "fresh_list"
desired_dir = filename +".json"
myObjLen=MyListLength(uniqueJ)
with open(desired_dir, "w") as f:
for index, joining in enumerate(uniqueJ):
if(index==0):
f.write("[ n")
k = json.dumps(joining)
f.write(str(k).replace('{"address":',' { "address":').replace('}',' }') + ",n")
if(index == myObjLen.last_index()):
f.write(str(k).replace('{"address":',' { "address":').replace('}',' }') + "n ]")