Convert pandas dataframe to nest XML file in Python
Question:
I have a pandas dataframe looks like the following:
df2 = pd.DataFrame(np.array([['A', '2023-10-17', 'C', 4, 1, 1, 1.5],
['A', '2023-10-17', 'C', 5, 1, 5, 2.4],
['A', '2023-10-17', 'D', 16, 1, 2, 2.3],
['B', '2023-10-17', 'C', 4, 1, 1, 1.5],
['B', '2023-10-17', 'D', 5, 1, 5, 2.4],
['B', '2023-10-17', 'D', 5, 2, 2, 3.3],]),
columns=['Item', 'Day', 'Type', 'Hour', 'SegmentID', 'quant', 'cost'])
df2
and I would like to convert it to a XML like this:
<ItemRecord Item="A" Day="2023-10-17">
<C>
<ItemRecordHourly hour="4">
<Segment id="1">
<quant>1</quant>
<cost>1.5</cost>
</BidSegment>
</ItemRecordHourly>
<ItemRecordHourly hour="5">
<Segment id="1">
<quant>5</quant>
<cost>2.4</cost>
</BidSegment>
</ItemRecordHourly>
</C>
<D>
<ItemRecordHourly hour="16">
<Segment id="1">
<quant>2</quant>
<cost>2.3</cost>
</BidSegment>
</ItemRecordHourly>
</D>
</ItemRecord>
<ItemRecord Item="B" Day="2023-10-17">
<C>
<ItemRecordHourly hour="4">
<Segment id="1">
<quant>1</quant>
<cost>1.5</cost>
</BidSegment>
</ItemRecordHourly>
</C>
<D>
<ItemRecordHourly hour="5">
<Segment id="1">
<quant>5</quant>
<cost>2.4</cost>
</BidSegment>
<Segment id="2">
<quant>2</quant>
<cost>3.3</cost>
</BidSegment>
</ItemRecordHourly>
</D>
</ItemRecord>
I tried to use xml.etree.ElementTree and pd.to_xml() but I could not get it converting the data correctly. Any suggestions would be very helpful! Thanks in advance!
Thanks @mcjeb! Here is the working solution:
grouped = df2.groupby(['Item', 'Day'])
import xml.etree.ElementTree as ET
# Create the root element of the XML
root = ET.Element("Data")
# Iterate through the grouped data and construct the XML structure
for (item, day), group in grouped:
item_record = ET.SubElement(root, "ItemRecord", Item=item, Day=day)
types = group['Type'].unique()
for item_type in types:
item_type_element = ET.SubElement(item_record, item_type)
type_group = group[group['Type'] == item_type]
for _, row in type_group.iterrows():
item_hourly = ET.SubElement(item_type_element, "ItemRecordHourly", hour=str(row['Hour']))
# segment tags
segment = ET.SubElement(item_hourly, "Segment", id=str(row['SegmentID']))
# quant tags
quant = ET.SubElement(segment, "quant")
quant.text=str(row["quant"])
# cost tags
cost = ET.SubElement(segment, "cost")
cost .text=str(row["cost "])
tree = ET.ElementTree(root)
Answers:
Using xml.etree.ElementTree should work.
Because you start with "<ItemRecord>
" as combination of Item and Day, first group your dataframe:
grouped = df2.groupby(['Item', 'Day'])
Then you can start creating the xml file and iterating over the entries:
import xml.etree.ElementTree as ET
# Create the root element of the XML
root = ET.Element("Data")
# Iterate through the grouped data and construct the XML structure
for (item, day), group in grouped:
item_record = ET.SubElement(root, "ItemRecord", Item=item, Day=day)
Then run down the structure and create tags:
types = group['Type'].unique()
for item_type in types:
item_type_element = ET.SubElement(item_record, item_type)
type_group = group[group['Type'] == item_type]
for _, row in type_group.iterrows():
item_hourly = ET.SubElement(item_type_element, "ItemRecordHourly", hour=str(row['Hour']))
# segment tags
segment = ET.SubElement(item_hourly, "Segment", id=str(row['SegmentID']))
# quant tags
quant = ET.SubElement(segment, "quant")
# cost tags
cost = ET.SubElement(segment, "cost")
Then you can save the xml file:
tree = ET.ElementTree(root)
tree.write("output.xml")
Let me know what you think or where you had more specific problems.
I have a pandas dataframe looks like the following:
df2 = pd.DataFrame(np.array([['A', '2023-10-17', 'C', 4, 1, 1, 1.5],
['A', '2023-10-17', 'C', 5, 1, 5, 2.4],
['A', '2023-10-17', 'D', 16, 1, 2, 2.3],
['B', '2023-10-17', 'C', 4, 1, 1, 1.5],
['B', '2023-10-17', 'D', 5, 1, 5, 2.4],
['B', '2023-10-17', 'D', 5, 2, 2, 3.3],]),
columns=['Item', 'Day', 'Type', 'Hour', 'SegmentID', 'quant', 'cost'])
df2
and I would like to convert it to a XML like this:
<ItemRecord Item="A" Day="2023-10-17">
<C>
<ItemRecordHourly hour="4">
<Segment id="1">
<quant>1</quant>
<cost>1.5</cost>
</BidSegment>
</ItemRecordHourly>
<ItemRecordHourly hour="5">
<Segment id="1">
<quant>5</quant>
<cost>2.4</cost>
</BidSegment>
</ItemRecordHourly>
</C>
<D>
<ItemRecordHourly hour="16">
<Segment id="1">
<quant>2</quant>
<cost>2.3</cost>
</BidSegment>
</ItemRecordHourly>
</D>
</ItemRecord>
<ItemRecord Item="B" Day="2023-10-17">
<C>
<ItemRecordHourly hour="4">
<Segment id="1">
<quant>1</quant>
<cost>1.5</cost>
</BidSegment>
</ItemRecordHourly>
</C>
<D>
<ItemRecordHourly hour="5">
<Segment id="1">
<quant>5</quant>
<cost>2.4</cost>
</BidSegment>
<Segment id="2">
<quant>2</quant>
<cost>3.3</cost>
</BidSegment>
</ItemRecordHourly>
</D>
</ItemRecord>
I tried to use xml.etree.ElementTree and pd.to_xml() but I could not get it converting the data correctly. Any suggestions would be very helpful! Thanks in advance!
Thanks @mcjeb! Here is the working solution:
grouped = df2.groupby(['Item', 'Day'])
import xml.etree.ElementTree as ET
# Create the root element of the XML
root = ET.Element("Data")
# Iterate through the grouped data and construct the XML structure
for (item, day), group in grouped:
item_record = ET.SubElement(root, "ItemRecord", Item=item, Day=day)
types = group['Type'].unique()
for item_type in types:
item_type_element = ET.SubElement(item_record, item_type)
type_group = group[group['Type'] == item_type]
for _, row in type_group.iterrows():
item_hourly = ET.SubElement(item_type_element, "ItemRecordHourly", hour=str(row['Hour']))
# segment tags
segment = ET.SubElement(item_hourly, "Segment", id=str(row['SegmentID']))
# quant tags
quant = ET.SubElement(segment, "quant")
quant.text=str(row["quant"])
# cost tags
cost = ET.SubElement(segment, "cost")
cost .text=str(row["cost "])
tree = ET.ElementTree(root)
Using xml.etree.ElementTree should work.
Because you start with "<ItemRecord>
" as combination of Item and Day, first group your dataframe:
grouped = df2.groupby(['Item', 'Day'])
Then you can start creating the xml file and iterating over the entries:
import xml.etree.ElementTree as ET
# Create the root element of the XML
root = ET.Element("Data")
# Iterate through the grouped data and construct the XML structure
for (item, day), group in grouped:
item_record = ET.SubElement(root, "ItemRecord", Item=item, Day=day)
Then run down the structure and create tags:
types = group['Type'].unique()
for item_type in types:
item_type_element = ET.SubElement(item_record, item_type)
type_group = group[group['Type'] == item_type]
for _, row in type_group.iterrows():
item_hourly = ET.SubElement(item_type_element, "ItemRecordHourly", hour=str(row['Hour']))
# segment tags
segment = ET.SubElement(item_hourly, "Segment", id=str(row['SegmentID']))
# quant tags
quant = ET.SubElement(segment, "quant")
# cost tags
cost = ET.SubElement(segment, "cost")
Then you can save the xml file:
tree = ET.ElementTree(root)
tree.write("output.xml")
Let me know what you think or where you had more specific problems.