problem when try to convert json file to dataframe with pandas

Question:

I’m trying to read this local JSON file using pandas, but when I use the JSON NORMALIZE library to extract data from the JSON it throws me errors, I’ve tried many ways to do it but I don’t know if it’s a code problem or a JSON structure problem

I need the transportEvents part of this file to save and upload to bigquery project

 [
  {
    "event": {
      "eventPublishTime": 0,
      "eventSubmissionTime": 0,
      "correlationId": "string",
      "eventName": "string",
      "senderOrgName": "string",
      "senderOrgTypes": [
        "string"
      ],
      "originatorId": "string",
      "eventOccurrenceTime": 1589574600000,
      "eventOccurrenceTime8601": "2020-05-15T15:30:00.000-05:00",
      "fromOceanAggregator": "false",
      "originatorName": "string",
      "containerTransportId": "string",
      "physicalId": "string",
      "eventSubmissionGpsLocation": "string",
      "action": "string",
      "doc": {
        "description": "string",
        "contentUrl": "string",
        "docId": "string",
        "docVersion": 0,
        "docType": "string",
        "docReferences": [
          {
            "referenceName": "string",
            "referenceValue": "string"
          }
        ]
      },
      "destinationOrgName": "string",
      "documentType": "string",
      "vehicleId": "string",
      "vehicleName": "string",
      "voyageId": "string",
      "transportationPhase": "Import",
      "eventTransactionId": "string",
      "shipmentRef": "string",
      "carrierBookingNumber": "string",
      "containerTransportRef": "string",
      "firstPortOfEntryCountry": "AD",
      "cargoRemainingOnboardCountries": [
        "AD"
      ],
      "transitImportCountries": [
        "AD"
      ],
      "transitExportCountries": [
        "AD"
      ],
      "transshipmentPorts": [
        "string"
      ],
      "transshipmentPortDescriptions": [
        "string"
      ],
      "entryPort": "string",
      "exitPort": "string",
      "terminal": "string",
      "terminalDescription": "string",
      "consignmentId": "string",
      "consignmentRef": "string",
      "transportEquipmentId": "string",
      "transportEquipmentRef": "string",
      "equipmentNumber": "string",
      "equipmentType": "string",
      "transportPlanIssuanceTime8601": "2020-05-15T15:30:00.000-05:00",
      "transportPlanIssuanceTime": 1589574600000,
      "transportPlanSequenceNumber": 0,
      "transportPlanTotalNumber": 0,
      "subscriptionId": "string",
      "documentId": "string",
      "documentVersion": 0,
      "dangerousGoodsType": "string",
      "packingListNumber": "string",
      "billOfLadingNumber": "string",
      "consigneeId": "string",
      "consigneeIdType": "string",
      "shipperId": "string",
      "shipperIdType": "string",
      "operatorCode": "string",
      "operatorName": "string",
      "sealType": "Carrier",
      "sealNumber": "string",
      "seals": [
        {
          "sealNumber": "string",
          "sealType": "Carrier"
        }
      ],
      "reasonForSurvey": "string",
      "verifiedGrossMass": "string",
      "netWeight": "string",
      "certificateType": "string",
      "waypointId": "string",
      "newSlotNumber": "string",
      "fullStatus": "Full",
      "equipmentNumbers": [
        "string"
      ],
      "reasonCode": "string",
      "cutOffType": "Cargo",
      "cutOffDateTime8601": "2020-05-15T15:30:00.000-05:00",
      "cutOffDateTime": 1589574600000,
      "cutOffEventType": "string",
      "harmonizedCodes": [
        "string"
      ],
      "eventType": "string",
      "associatedConsignmentIds": [
        "string"
      ],
      "associatedCarrierBookingNumbers": [
        "string"
      ],
      "associatedBillOfLadingNumbers": [
        "string"
      ],
      "eBLProvider": "string",
      "consignmentsToAccess": [
        {
          "transactionRef": {
            "reference": "string",
            "type": "string"
          },
          "carrierIdentifier": "string",
          "consignmentId": "string",
          "carrierBookingNumber": "string",
          "billOfLadingNumber": "string",
          "equipmentNumbers": [
            "string"
          ]
        }
      ],
      "data": [
        {
          "type": "Barge",
          "value": "string"
        }
      ],
      "document": {
        "description": "string",
        "uri": "string",
        "hash": "string",
        "hashEncodingType": "string",
        "platformDocumentId": "string",
        "platformDocumentVersion": 0
      },
      "documents": [
        {
          "description": "string",
          "uri": "string",
          "hash": "string",
          "hashEncodingType": "string",
          "platformDocumentId": "string",
          "platformDocumentVersion": 0
        }
      ],
      "location": {
        "unlocode": "string",
        "address": {
          "address1": "string",
          "address2": "string",
          "city": "string",
          "stateProvince": "string",
          "zipPostal": "string",
          "country": "US"
        },
        "smdgTerminal": "string",
        "geoCoord": {
          "latitude": 0,
          "longitude": 0
        },
        "splc": "string",
        "gln": "string",
        "type": "UN/Locode",
        "value": "string",
        "description": "string",
        "locationKey": "string"
      },
      "bookingData": {
        "originLocation": {
          "unlocode": "string",
          "address": {
            "address1": "string",
            "address2": "string",
            "city": "string",
            "stateProvince": "string",
            "zipPostal": "string",
            "country": "US"
          },
          "smdgTerminal": "string",
          "geoCoord": {
            "latitude": 0,
            "longitude": 0
          },
          "splc": "string",
          "gln": "string",
          "type": "UN/Locode",
          "value": "string",
          "description": "string",
          "locationKey": "string"
        },
        "destinationLocation": {
          "unlocode": "string",
          "address": {
            "address1": "string",
            "address2": "string",
            "city": "string",
            "stateProvince": "string",
            "zipPostal": "string",
            "country": "US"
          },
          "smdgTerminal": "string",
          "geoCoord": {
            "latitude": 0,
            "longitude": 0
          },
          "splc": "string",
          "gln": "string",
          "type": "UN/Locode",
          "value": "string",
          "description": "string",
          "locationKey": "string"
        },
        "contractCarriageCondition": "SD_SD",
        "departureTime8601": "2020-05-15T15:30:00.000-05:00",
        "departureDateEpoch": 1589574600000,
        "vehicleId": "string",
        "vehicleName": "string",
        "voyageId": "string",
        "transportEquipmentDetails": [
          {
            "equipmentType": "string",
            "equipmentQuantity": 0,
            "description": "string"
          }
        ],
        "commodities": [
          {
            "itemNumber": 0,
            "commodityDescription": "string",
            "commodityHarmonizedCode": "string",
            "commodityQuantity": 0,
            "commodityWeight": "string"
          }
        ]
      },
      "subcontractId": "string",
      "subcontractCBN": "string",
      "subcontractBOL": "string",
      "messages": [
        "string"
      ],
      "consignmentStatus": "Active",
      "transferFromConsignment": {
        "carrierBookingNumber": "string",
        "billOfLadingNumber": "string",
        "consignmentId": "string"
      },
      "partLoadWithConsignment": {
        "carrierBookingNumber": "string",
        "billOfLadingNumber": "string",
        "consignmentId": "string"
      },
      "transferFromEquipment": "string",
      "splitFromConsignment": {
        "carrierBookingNumber": "string",
        "billOfLadingNumber": "string",
        "consignmentId": "string"
      },
      "combinedCarrierBookingNumbers": [
        "string"
      ],
      "partBill": "false",
      "partBillOf": {
        "carrierBookingNumber": "string",
        "billOfLadingNumber": "string",
        "consignmentId": "string"
      },
      "shipmentIdentifier": {
        "reference": "string",
        "type": "string",
        "label": "string"
      },
      "plannedCompletionDate": 1589574600000,
      "plannedCompletionDate8601": "2020-05-15T15:30:00.000-05:00",
      "plannedDeliveryDate": 1589574600000,
      "plannedDeliveryDate8601": "2020-05-15T15:30:00.000-05:00",
      "consignmentIdentifiers": [
        {
          "consignmentId": "string",
          "carrierBookingNumber": "string",
          "billOfLadingNumber": "string"
        }
      ],
      "references": [
        {
          "reference": "string",
          "type": "string",
          "label": "string"
        }
      ],
      "goodsDescription": "string",
      "issuanceTime8601": "2020-05-15T15:30:00.000-05:00",
      "callersPartyRole": "SELLER",
      "parties": [
        {
          "partyRef": "string",
          "partyRole": "BUYER"
        }
      ],
      "consignments": [
        "string"
      ],
      "transportEquipment": [
        "string"
      ],
      "shipments": [
        "string"
      ],
      "transportEquipmentDetails": [
        {
          "equipmentType": "string",
          "equipmentQuantity": 0
        }
      ],
      "equipmentPickUpComments": "string",
      "minimumTemperature": 0,
      "maximumTemperature": 0,
      "unit": "degreeCelsius",
      "temperatureInstructions": "string",
      "eventClassifier": "Planned",
      "transportMode": "Vessel",
      "eventAction": "Arrival",
      "facilityType": "PortTerminal",
      "emptyIndicator": "Empty",
      "eventPriority": 0,
      "releaseParty": {
        "partyRef": "string",
        "orgName": "string",
        "address": {
          "address1": "string",
          "address2": "string",
          "city": "string",
          "stateProvince": "string",
          "zipPostal": "string",
          "country": "US"
        }
      },
      "controlBy": "CUSTOMS",
      "controlEventAction": "CONTROL_START",
      "nationalControlCode": "string",
      "procedure": "IMPORTATION",
      "conditionalRelease": "string",
      "releaseObject": "FULL_CONTAINER",
      "releasedBy": "CUSTOMS",
      "declarationRef": "string",
      "nationalProcedureDetails": {
        "procedureCode": "string",
        "procedureDescription": "string",
        "procedureType": "string",
        "procedureStatus": "string",
        "procedureNumber": "string",
        "createTime8601": "2018-03-10T11:30:00.000-05:00",
        "closeTime8601": "2018-03-10T11:30:00.000-05:00",
        "carrierIdentification": "string",
        "procedureLocation": "string"
      },
      "transportEvents": [
        {
          "eventAction": "Arrival",
          "transportMode": "Rail",
          "eventOccurrenceTime8601": "2018-03-13T11:30:00.000-05:00",
          "location": {
            "unlocode": "NLRTM"
          },
          "transportPlanSequenceNumber": 1,
          "transportationPhase": "Import",
          "vehicleId": "JEV4568",
          "vehicleName": "Vehicle Name",
          "voyageId": "1234",
          "emptyIndicator": "Laden"
        }
      ]
    }
  }
]
   

and this is the python code I am using

import pandas as pd
import json
from pandas.io.json import json_normalize

ruta_archivo_json = 'response_1660310720193.json'
data = pd.read_json(ruta_archivo_json)
df = pd.json_normalize(data, 'transportEvents')
print(df)
df.head(10)

Answers:

I’m not sure what you would like to achieve in the final outcome. But if you run below code you should get something in return

import pandas as pd
import json
# from pandas.io.json import json_normalize    #no need this
pd.options.display.max_colwidth = 100    #default=50
pd.options.display.max_rows = 250        #default=60

data = pd.read_json('response_1660310720193.json')
df = pd.json_normalize(data['event']).T
print(df)

Output:

eventPublishTime                                                                0
eventSubmissionTime                                                             0
correlationId                                                              string
eventName                                                                  string
senderOrgName                                                              string
senderOrgTypes                                                           [string]
originatorId                                                               string
eventOccurrenceTime                                                 1589574600000
eventOccurrenceTime8601                             2020-05-15T15:30:00.000-05:00
fromOceanAggregator                                                         false
originatorName                                                             string

<truncated>

declarationRef                                                             string
transportEvents                           [{'eventAction': 'Arrival', 'transpo...
doc.description                                                            string

<truncated>

You should see ‘transportEvents’ in the df, so if you drill deeper you can get the data

df1 = pd.json_normalize(df.T['transportEvents'].explode().tolist()).T
df1.columns = ['transportEvents']
print(df1)
    
                                           transportEvents
eventAction                                        Arrival
transportMode                                         Rail
eventOccurrenceTime8601      2018-03-13T11:30:00.000-05:00
transportPlanSequenceNumber                              1
transportationPhase                                 Import
vehicleId                                          JEV4568
vehicleName                                   Vehicle Name
voyageId                                              1234
emptyIndicator                                       Laden
location.unlocode                                    NLRTM

After the above exploration, you would then know the correct keys to apply the function pd.json_normalize(), with the first key event and nested with second key transportEvents, same output as above

df1 = pd.json_normalize(data, ['event', 'transportEvents'])
df1.columns = ['transportEvents']
print(df1)
Answered By: perpetualstudent

I am not sure what is the expected result, but from what I understood, the below code should help:

import pandas as pd
import json
from pandas.io.json import json_normalize

with open('response_1660310720193.json','r') as f:
    data = json.loads(f.read())

ruta_archivo_json = 'response_1660310720193.json'
df = pd.json_normalize(data, ["event","transportEvents"])
print(df)
df.head(10)
Answered By: Meh