Python Converting Nested array of objects in JSON to flattened excel

Question:

Python Converting Nested array of objects in JSON to flattened excel

Current Code

def read_json(filename):
jsonData = {}
try:
with open(filename, "r", encoding="utf-8") as f:
jsonData = json.loads(f.read())
except:
raise Exception(f"Reading {filename} file encountered an error")
return jsonData
json_data = read_json(filename=".json")

df = pd.json_normalize(json_data,meta=['Documents']).explode('Documents').reset_index(drop=True)
df = df.join(pd.json_normalize(df.pop('Documents')))
df = df.join(pd.json_normalize(df.pop("lossInfo").apply(pd.Series).stack().reset_index(drop=True)))

Expected:

  metaData.form metaData.userNm report.date policy.effdate policy.efftime year.current year.previous
0             1               1           1             11             11           11            11
1             1               1           1             12             12           12            12
3             2               2           2             21             21           21            21
4             2               2           2             22             22           22            22

How to flatten below JSON file.
Collection of 2 or more array of objects with nested data in a single json:

{
  "Documents": [
    {
      "metaData": {
        "form": "1",
        "userNm": "1"
      },
      "report": {
        "date": "1"
      },
      "lossInfo": [
        {
          "policy": {
            "effdate": "11",
            "efftime": "11"
          },
          "year": {
            "current": "11",
            "previous": "11"
          }
        },
        {
          "policy": {
            "effdate": "12",
            "efftime": "12"
          },
          "year": {
            "current": "12",
            "previous": "12"
          }
        }
      ]
    },
    {
      "metaData": {
        "form": "2",
        "userNm": "2"
      },
      "report": {
        "date": "2"
      },
      "lossInfo": [
        {
          "policy": {
            "effdate": "21",
            "efftime": "21"
          },
          "year": {
            "current": "22",
            "previous": "22"
          }
        },
        {
          "policy": {
            "effdate": "21",
            "efftime": "21"
          },
          "year": {
            "current": "22",
            "previous": "22"
          }
        }
      ]
    }
  ]
}
Asked By: Chetan

||

Answers:

@Chetan

For your specific case, this should do the trick.

import pandas as pd

data = {'Documents':[{'metaData':{'form':'1','userNm':'1'},'report':{'date':'1'},'lossInfo':[{'policy':{'effdate':'11','efftime':'11'},'year':{'current':'11','previous':'11'}},{'policy':{'effdate':'12','efftime':'12'},'year':{'current':'12','previous':'12'}}]},{'metaData':{'form':'2','userNm':'2'},'report':{'date':'2'},'lossInfo':[{'policy':{'effdate':'21','efftime':'21'},'year':{'current':'22','previous':'22'}},{'policy':{'effdate':'21','efftime':'21'},'year':{'current':'22','previous':'22'}}]}]}

def flattened_dataframe(data: dict) -> pd.DataFrame:
    metadata_cols = list(pd.json_normalize(data["Documents"]).columns)[1:]

    df = pd.json_normalize(
        data["Documents"], record_path="lossInfo", meta=["metaData", "report"]
    )

    data_pending_right_cols = pd.concat(
        [df, df["metaData"].apply(pd.Series), df["report"].apply(pd.Series)], axis=1
    ).drop(columns=["metaData", "report"])

    lossinfo_cols = list(data_pending_right_cols.columns)[:-3]

    return data_pending_right_cols.set_axis(
        lossinfo_cols + metadata_cols, axis="columns"
    )

flattened_dataframe(data)

Result sought

Answered By: Daniel Cárdenas