Trasform Json into Pandas Dataframe

Question:

I have this kind of json I would transform it into a pandas dataframe, with specific columns names.

{
    "data": [
        {
            "id": 1,
            "name": "3Way Result",
            "suspended": false,
            "bookmaker": {
                "data": [
                    {
                        "id": 27802,
                        "name": "Ladbrokes",
                        "odds": {
                            "data": [
                                {
                                    "label": "1",
                                    "value": "1.61",
                                    "probability": "62.11%",
                                    "dp3": "1.610",
                                    "american": -164,
                                    "factional": null,
                                    "winning": null,
                                    "handicap": null,
                                    "total": null,
                                    "bookmaker_event_id": null,
                                    "last_update": {
                                        "date": "2021-10-01 16:41:27.000000",
                                        "timezone_type": 3,
                                        "timezone": "UTC"
                                    }
                                },
                                {
                                    "label": "X",
                                    "value": "3.90",
                                    "probability": "25.64%",
                                    "dp3": "3.900",
                                    "american": 290,
                                    "factional": null,
                                    "winning": null,
                                    "handicap": null,
                                    "total": null,
                                    "bookmaker_event_id": null,
                                    "last_update": {
                                        "date": "2021-10-01 16:41:27.000000",
                                        "timezone_type": 3,
                                        "timezone": "UTC"
                                    }
                                },
                                {
                                    "label": "2",
                                    "value": "5.20",
                                    "probability": "19.23%",
                                    "dp3": "5.200",
                                    "american": 420,
                                    "factional": null,
                                    "winning": null,
                                    "handicap": null,
                                    "total": null,
                                    "bookmaker_event_id": null,
                                    "last_update": {
                                        "date": "2021-10-01 16:41:27.000000",
                                        "timezone_type": 3,
                                        "timezone": "UTC"
                                    }
                                }
                            ]
                        }
                    },
                    {
                        "id": 70,
                        "name": "Pncl",
                        "odds": {
                            "data": [
                                {
                                    "label": "1",
                                    "value": "1.65",
                                    "probability": "60.61%",
                                    "dp3": "1.645",
                                    "american": -154,
                                    "factional": null,
                                    "winning": null,
                                    "handicap": null,
                                    "total": null,
                                    "bookmaker_event_id": null,
                                    "last_update": {
                                        "date": "2021-10-01 16:59:18.000000",
                                        "timezone_type": 3,
                                        "timezone": "UTC"
                                    }
                                },
                                {
                                    "label": "X",
                                    "value": "4.20",
                                    "probability": "23.81%",
                                    "dp3": "4.200",
                                    "american": 320,
                                    "factional": null,
                                    "winning": null,
                                    "handicap": null,
                                    "total": null,
                                    "bookmaker_event_id": null,
                                    "last_update": {
                                        "date": "2021-10-01 16:59:18.000000",
                                        "timezone_type": 3,
                                        "timezone": "UTC"
                                    }
                                },
                                {
                                    "label": "2",
                                    "value": "5.43",
                                    "probability": "18.42%",
                                    "dp3": "5.430",
                                    "american": 443,
                                    "factional": null,
                                    "winning": null,
                                    "handicap": null,
                                    "total": null,
                                    "bookmaker_event_id": null,
                                    "last_update": {
                                        "date": "2021-10-01 16:59:18.000000",
                                        "timezone_type": 3,
                                        "timezone": "UTC"
                                    }
                                }
                            ]
                        }
                    }
                ]
            }
        }
    ],
    "meta": {
        "plans": [
            {
                "name": "Football Free Plan",
                "features": "Standard",
                "request_limit": "180,60",
                "sport": "Soccer"
            }
        ],
        "sports": [
            {
                "id": 1,
                "name": "Soccer",
                "current": true
            }
        ]
    }
}

All columns name contains the name of the bookmaker plus the label value.
I would take the value in label and use it as column name with the name of the bookmaker in name. Then the float in value use it as row of the dataframe

Here the Expected Output

   1_LadBrokes  X_LadBrokes  2_LadBrokes       last_update_LadBrokes  1_Pncl  X_Pncl  2_Pncl            last_update_Pncl
0         1.61          3.9          5.2  2021-10-01 16:41:27.000000    1.65     4.2    5.43  2021-10-01 16:59:18.000000
Asked By: luka

||

Answers:

You can achieve it like so using json_normalize + apply.

def set_values(x):
    data = x["odds.data"]
    label = data.get("label")
    value = data.get("value")
    last_update_date = data["last_update"]["date"]
    name = x["name"]
    x[f"{label}_{name}"] = value
    x[f"last_update_{name}"] = last_update_date
    return x


df = (
    pd.json_normalize(data["data"], record_path=["bookmaker", "data"])
    .explode("odds.data")
    .apply(lambda x: set_values(x), axis=1)
    .drop(["odds.data", "id", "name"], axis=1)
    .ffill()
    .bfill()
    .head(1)
)

In [39]: df
Out[39]: 
  1_Ladbrokes 1_Pncl 2_Ladbrokes 2_Pncl X_Ladbrokes X_Pncl       last_update_Ladbrokes            last_update_Pncl
0        1.61   1.65        5.20   5.43        3.90   4.20  2021-10-01 16:41:27.000000  2021-10-01 16:59:18.000000
Answered By: yudhiesh

Use pd.json_normalize and create two subdataframes for value and last_update them join them.

out = pd.json_normalize(
          data=data['data'],
          record_path=['bookmaker', 'data', 'odds', 'data'], 
          meta=[['bookmaker', 'data', 'name']]
      )[['label', 'value', 'last_update.date', 'bookmaker.data.name']]

df1 = out.set_index(out['label'] + '_' + out['bookmaker.data.name'])['value']
df2 = out.set_index('bookmaker.data.name')['last_update.date'] 
         .add_prefix('last_update_').drop_duplicates()

df = pd.concat([df1, df2]).to_frame().T

Output:

>>> df
  1_Ladbrokes_Ladbrokes X_Ladbrokes_Ladbrokes 2_Ladbrokes_Ladbrokes 1_Pncl_Pncl X_Pncl_Pncl 2_Pncl_Pncl       last_update_Ladbrokes            last_update_Pncl
0                  1.61                  3.90                  5.20        1.65        4.20        5.43  2021-10-01 16:41:27.000000  2021-10-01 16:59:18.000000
Answered By: Corralien
Categories: questions Tags: , , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.