Generating xml ouput file by using python
Question:
My input data is present in csv file and need to generate XML output file based on data available in input file. I am trying to implement this requirement with python. I have tried the below code but not getting required output.
Input Data(Input.csv file):
CustID,CardNo
A00001,C000000001
A00001,C000000002
A00002,C000000003
I have tried below code:
import csv
import io
import xml.etree.ElementTree as ET
from collections import defaultdict
def generate_xml(input_file, output_file):
# Read input data from the file
with open(input_file, 'r') as file:
input_data = file.readlines()
cards_per_custid = defaultdict(list)
for line in csv.DictReader(input_data):
cards_per_custid[line["CustID"]].append(line["CardNo"])
root = ET.Element(
"ReceivableAccounting",
attrib={"xmlns": "http://www.sample.com/testing"},
)
for cust_id, cards in sorted(cards_per_custid.items()):
card_el = ET.SubElement(root, "Card")
header = ET.SubElement(card_el, "CustHolderHeader")
ET.SubElement(header, "CustID").text = cust_id
for card_no in cards:
line = ET.SubElement(card_el, "CardLine")
ET.SubElement(line, "CardDetail").text = card_no
tree = ET.ElementTree(root)
ET.indent(tree)
tree.write(output_file)
# Example usage:
input_file = "Input.csv"
output_file = "output.xml"
generate_xml(input_file, output_file)
Generating the below result:
<ReceivableAccounting xmlns="http://www.sample.com/testing">
<Card>
<CustHolderHeader>
<CustID>A00001</CustID>
</CustHolderHeader>
</Card>
<Card>
<CustHolderHeader>
<CustID>A00003</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000003</CardDetail>
</CardLine>
</Card>
Expecting the below result:
<?xml version="1.0" encoding="UTF-8"?>
<ReceivableAccounting xmlns=http://www.sample.com/testing>
<Card>
<CustHolderHeader>
<CustID>A00001</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000001</CardDetail>
</CardLine>
<CardLine>
<CardDetail>C000000002</CardDetail>
</CardLine>
</Card>
<Card>
<CustHolderHeader>
<CustID>A00002</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000003</CardDetail>
</CardLine>
</Card>
Answers:
Sure thing.
I’ve inlined your example data into an io.StringIO()
, but you can just as well substitute an open file handle. Note that the xmlns
handling I’m doing here is not 100% the correct way to do it, but it works for your example case.
import csv
import io
import xml.etree.ElementTree as ET
from collections import defaultdict
input_file = io.StringIO(
"""
CustID,CardNo
A00001,C000000001
A00001,C000000002
A00002,C000000003
""".strip()
)
cards_per_custid = defaultdict(list)
for line in csv.DictReader(input_file):
cards_per_custid[line["CustID"]].append(line["CardNo"])
root = ET.Element(
"ReceivableAccounting",
attrib={"xmlns": "http://www.sample.com/testing"},
)
for cust_id, cards in sorted(cards_per_custid.items()):
card_el = ET.SubElement(root, "Card")
header = ET.SubElement(card_el, "CustHolderHeader")
ET.SubElement(header, "CustID").text = cust_id
for card_no in cards:
line = ET.SubElement(card_el, "CardLine")
ET.SubElement(line, "CardDetail").text = card_no
tree = ET.ElementTree(root)
ET.indent(tree)
print(ET.tostring(root, encoding="unicode"))
This prints
<ReceivableAccounting xmlns="http://www.sample.com/testing">
<Card>
<CustHolderHeader>
<CustID>A00001</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000001</CardDetail>
</CardLine>
<CardLine>
<CardDetail>C000000002</CardDetail>
</CardLine>
</Card>
<Card>
<CustHolderHeader>
<CustID>A00002</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000003</CardDetail>
</CardLine>
</Card>
</ReceivableAccounting>
My input data is present in csv file and need to generate XML output file based on data available in input file. I am trying to implement this requirement with python. I have tried the below code but not getting required output.
Input Data(Input.csv file):
CustID,CardNo
A00001,C000000001
A00001,C000000002
A00002,C000000003
I have tried below code:
import csv
import io
import xml.etree.ElementTree as ET
from collections import defaultdict
def generate_xml(input_file, output_file):
# Read input data from the file
with open(input_file, 'r') as file:
input_data = file.readlines()
cards_per_custid = defaultdict(list)
for line in csv.DictReader(input_data):
cards_per_custid[line["CustID"]].append(line["CardNo"])
root = ET.Element(
"ReceivableAccounting",
attrib={"xmlns": "http://www.sample.com/testing"},
)
for cust_id, cards in sorted(cards_per_custid.items()):
card_el = ET.SubElement(root, "Card")
header = ET.SubElement(card_el, "CustHolderHeader")
ET.SubElement(header, "CustID").text = cust_id
for card_no in cards:
line = ET.SubElement(card_el, "CardLine")
ET.SubElement(line, "CardDetail").text = card_no
tree = ET.ElementTree(root)
ET.indent(tree)
tree.write(output_file)
# Example usage:
input_file = "Input.csv"
output_file = "output.xml"
generate_xml(input_file, output_file)
Generating the below result:
<ReceivableAccounting xmlns="http://www.sample.com/testing">
<Card>
<CustHolderHeader>
<CustID>A00001</CustID>
</CustHolderHeader>
</Card>
<Card>
<CustHolderHeader>
<CustID>A00003</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000003</CardDetail>
</CardLine>
</Card>
Expecting the below result:
<?xml version="1.0" encoding="UTF-8"?>
<ReceivableAccounting xmlns=http://www.sample.com/testing>
<Card>
<CustHolderHeader>
<CustID>A00001</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000001</CardDetail>
</CardLine>
<CardLine>
<CardDetail>C000000002</CardDetail>
</CardLine>
</Card>
<Card>
<CustHolderHeader>
<CustID>A00002</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000003</CardDetail>
</CardLine>
</Card>
Sure thing.
I’ve inlined your example data into an io.StringIO()
, but you can just as well substitute an open file handle. Note that the xmlns
handling I’m doing here is not 100% the correct way to do it, but it works for your example case.
import csv
import io
import xml.etree.ElementTree as ET
from collections import defaultdict
input_file = io.StringIO(
"""
CustID,CardNo
A00001,C000000001
A00001,C000000002
A00002,C000000003
""".strip()
)
cards_per_custid = defaultdict(list)
for line in csv.DictReader(input_file):
cards_per_custid[line["CustID"]].append(line["CardNo"])
root = ET.Element(
"ReceivableAccounting",
attrib={"xmlns": "http://www.sample.com/testing"},
)
for cust_id, cards in sorted(cards_per_custid.items()):
card_el = ET.SubElement(root, "Card")
header = ET.SubElement(card_el, "CustHolderHeader")
ET.SubElement(header, "CustID").text = cust_id
for card_no in cards:
line = ET.SubElement(card_el, "CardLine")
ET.SubElement(line, "CardDetail").text = card_no
tree = ET.ElementTree(root)
ET.indent(tree)
print(ET.tostring(root, encoding="unicode"))
This prints
<ReceivableAccounting xmlns="http://www.sample.com/testing">
<Card>
<CustHolderHeader>
<CustID>A00001</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000001</CardDetail>
</CardLine>
<CardLine>
<CardDetail>C000000002</CardDetail>
</CardLine>
</Card>
<Card>
<CustHolderHeader>
<CustID>A00002</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000003</CardDetail>
</CardLine>
</Card>
</ReceivableAccounting>