Generating xml ouput file by using python

Question:

My input data is present in csv file and need to generate XML output file based on data available in input file. I am trying to implement this requirement with python. I have tried the below code but not getting required output.

Input Data(Input.csv file):

CustID,CardNo
A00001,C000000001  
A00001,C000000002  
A00002,C000000003  

I have tried below code:

import csv
import io
import xml.etree.ElementTree as ET
from collections import defaultdict

def generate_xml(input_file, output_file):

    # Read input data from the file
    with open(input_file, 'r') as file:
        input_data = file.readlines()

    cards_per_custid = defaultdict(list)
    for line in csv.DictReader(input_data):
        cards_per_custid[line["CustID"]].append(line["CardNo"])

    root = ET.Element(
    "ReceivableAccounting",
    attrib={"xmlns": "http://www.sample.com/testing"},
    )
    for cust_id, cards in sorted(cards_per_custid.items()):
        card_el = ET.SubElement(root, "Card")
        header = ET.SubElement(card_el, "CustHolderHeader")
        ET.SubElement(header, "CustID").text = cust_id
    for card_no in cards:
        line = ET.SubElement(card_el, "CardLine")
        ET.SubElement(line, "CardDetail").text = card_no

    tree = ET.ElementTree(root)
    ET.indent(tree)
    tree.write(output_file)

# Example usage:
input_file = "Input.csv"
output_file = "output.xml"
generate_xml(input_file, output_file)

Generating the below result:

<ReceivableAccounting xmlns="http://www.sample.com/testing">
  <Card>
    <CustHolderHeader>
      <CustID>A00001</CustID>
    </CustHolderHeader>
  </Card>
  <Card>
    <CustHolderHeader>
      <CustID>A00003</CustID>
    </CustHolderHeader>
    <CardLine>
      <CardDetail>C000000003</CardDetail>
    </CardLine>
  </Card>

Expecting the below result:

<?xml version="1.0" encoding="UTF-8"?>
<ReceivableAccounting xmlns=http://www.sample.com/testing>
  <Card>
     <CustHolderHeader>
        <CustID>A00001</CustID>
     </CustHolderHeader>
     <CardLine>
        <CardDetail>C000000001</CardDetail>
     </CardLine>
       <CardLine>
        <CardDetail>C000000002</CardDetail>
     </CardLine> 
  </Card>
  <Card>
     <CustHolderHeader>
        <CustID>A00002</CustID>
     </CustHolderHeader>
     <CardLine>
        <CardDetail>C000000003</CardDetail>
     </CardLine>  
     </Card>
Asked By: vijay

||

Answers:

Sure thing.

I’ve inlined your example data into an io.StringIO(), but you can just as well substitute an open file handle. Note that the xmlns handling I’m doing here is not 100% the correct way to do it, but it works for your example case.

import csv
import io
import xml.etree.ElementTree as ET
from collections import defaultdict

input_file = io.StringIO(
    """
CustID,CardNo
A00001,C000000001
A00001,C000000002
A00002,C000000003
""".strip()
)

cards_per_custid = defaultdict(list)
for line in csv.DictReader(input_file):
    cards_per_custid[line["CustID"]].append(line["CardNo"])

root = ET.Element(
    "ReceivableAccounting",
    attrib={"xmlns": "http://www.sample.com/testing"},
)
for cust_id, cards in sorted(cards_per_custid.items()):
    card_el = ET.SubElement(root, "Card")
    header = ET.SubElement(card_el, "CustHolderHeader")
    ET.SubElement(header, "CustID").text = cust_id
    for card_no in cards:
        line = ET.SubElement(card_el, "CardLine")
        ET.SubElement(line, "CardDetail").text = card_no

tree = ET.ElementTree(root)
ET.indent(tree)

print(ET.tostring(root, encoding="unicode"))

This prints

<ReceivableAccounting xmlns="http://www.sample.com/testing">
  <Card>
    <CustHolderHeader>
      <CustID>A00001</CustID>
    </CustHolderHeader>
    <CardLine>
      <CardDetail>C000000001</CardDetail>
    </CardLine>
    <CardLine>
      <CardDetail>C000000002</CardDetail>
    </CardLine>
  </Card>
  <Card>
    <CustHolderHeader>
      <CustID>A00002</CustID>
    </CustHolderHeader>
    <CardLine>
      <CardDetail>C000000003</CardDetail>
    </CardLine>
  </Card>
</ReceivableAccounting>
Answered By: AKX
Categories: questions Tags: ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.