Serializing complex object containing multiple nested objects with data frames

Question:

Below should be a runnable sample of code. i have a Chart1 object which can contain many panes, and each pane can can contain many series. I would like to serialize this to json so i can send to a flask application to render. To do deal with the dataframes, i am using a custom encoder (ChartEncoder below):

from abc import ABC, abstractmethod
from datetime import datetime
import pandas as pd
import copy
import json
from json import JSONEncoder
import pandas_datareader.data as reader
import datetime as dt

class Series1(object):
    def __init__(self, data):
        self.data = data

class Pane1(object):
    def __init__(self, series: Series1 = None , rel_height = None):
        self.series = [] if series is None else series
        self.rel_height = rel_height

class Chart1(ABC):
    def __init__(self, show_volume = True, *args, **kwargs):
        self.show_volume = show_volume 
        self.panes=[Pane1()]
        self.symbol = None
        self.interval = None

    def to_json(self):
        obj = copy.copy(self)
        obj.data = None
        jsn  = json.dumps(obj, cls=ChartEncoder)
        return jsn 

class ChartEncoder(JSONEncoder):
    def default(self, obj):
        if type(obj) is pd.DataFrame:
            return obj.reset_index().to_json(orient="records", date_format = 'iso')
        elif type(obj) is pd.Series:
            df = pd.DataFrame(obj)
            return df.reset_index().to_json(orient="records", date_format = 'iso')

        elif hasattr(obj, '__dict__'):
            return obj.__dict__
        else:
            return ''

if __name__ == '__main__':
    chart = Chart1()
    start = dt.datetime(2022,7,25)
    end = dt.datetime(2022,7,29)
    tickers = ['AAPL', 'MSFT']
    data = {}
    for t in tickers:
        series = reader.DataReader(t,'yahoo', start, end)
        chart.panes[0].series.append(Series1(series))

    json = chart.to_json()
    print(json)

After running the code there are two problems with the json string returned:

  1. it looks like there are escape characters being added that can not be read by javascript JSON.parse.
'{"show_volume": true, "panes": [{"series": [{"data": "[{\"Date\":\"2022-07-25T00:00:00.000Z\",\"High\":155.0399932861,\"Low\":152.2799987793,\"Open\":154.0099945068,\"Close\":152.9499969482,\"Volume\":53623900,\"Adj Close\":152.9499969482},{\"Date\":\"2022-07-26T00:00:00.000Z\",\"High\":153.0899963379,\"Low\":150.8000030518,\"Open\":152.2599945068,\"Close\":151.6000061035,\"Volume\":55138700,\"Adj Close\":151.6000061035},{\"Date\":\"2022-07-27T00:00:00.000Z\",\"High\":157.3300018311,\"Low\":152.1600036621,\"Open\":152.5800018311,\"Close\":156.7899932861,\"Volume\":78620700,\"Adj Close\":156.7899932861},{\"Date\":\"2022-07-28T00:00:00.000Z\",\"High\":157.6399993896,\"Low\":154.4100036621,\"Open\":156.9799957275,\"Close\":157.3500061035,\"Volume\":81378700,\"Adj Close\":157.3500061035},{\"Date\":\"2022-07-29T00:00:00.000Z\",\"High\":163.6300048828,\"Low\":159.5,\"Open\":161.2400054932,\"Close\":162.5099945068,\"Volume\":101689200,\"Adj Close\":162.5099945068}]"}, {"data": "[{\"Date\":\"2022-07-25T00:00:00.000Z\",\"High\":261.5,\"Low\":256.8099975586,\"Open\":261.0,\"Close\":258.8299865723,\"Volume\":21056000,\"Adj Close\":258.8299865723},{\"Date\":\"2022-07-26T00:00:00.000Z\",\"High\":259.8800048828,\"Low\":249.5700073242,\"Open\":259.8599853516,\"Close\":251.8999938965,\"Volume\":39348000,\"Adj Close\":251.8999938965},{\"Date\":\"2022-07-27T00:00:00.000Z\",\"High\":270.049987793,\"Low\":258.8500061035,\"Open\":261.1600036621,\"Close\":268.7399902344,\"Volume\":45994000,\"Adj Close\":268.7399902344},{\"Date\":\"2022-07-28T00:00:00.000Z\",\"High\":277.8399963379,\"Low\":267.8699951172,\"Open\":269.75,\"Close\":276.4100036621,\"Volume\":33459300,\"Adj Close\":276.4100036621},{\"Date\":\"2022-07-29T00:00:00.000Z\",\"High\":282.0,\"Low\":276.6300048828,\"Open\":277.700012207,\"Close\":280.7399902344,\"Volume\":32129400,\"Adj Close\":280.7399902344}]"}], "rel_height": null}], "symbol": null, "interval": null, "data": null}'
  1. After stripping away these characters manually, im left with the below. but even this is not a valid parseable json according to: https://jsonformatter.org/json-parser. You will notice that this is because the the series.data property ("data:" below) is quoted, as opposed to an array
{"show_volume": true, "panes": [{"series": [{"data": "[{"Date":"2022-07-25T00:00:00.000Z","High":155.0399932861,"Low":152.2799987793,"Open":154.0099945068,"Close":152.9499969482,"Volume":53623900,"Adj Close":152.9499969482},{"Date":"2022-07-26T00:00:00.000Z","High":153.0899963379,"Low":150.8000030518,"Open":152.2599945068,"Close":151.6000061035,"Volume":55138700,"Adj Close":151.6000061035},{"Date":"2022-07-27T00:00:00.000Z","High":157.3300018311,"Low":152.1600036621,"Open":152.5800018311,"Close":156.7899932861,"Volume":78620700,"Adj Close":156.7899932861},{"Date":"2022-07-28T00:00:00.000Z","High":157.6399993896,"Low":154.4100036621,"Open":156.9799957275,"Close":157.3500061035,"Volume":81378700,"Adj Close":157.3500061035},{"Date":"2022-07-29T00:00:00.000Z","High":163.6300048828,"Low":159.5,"Open":161.2400054932,"Close":162.5099945068,"Volume":101689200,"Adj Close":162.5099945068}]"}, {"data": "[{"Date":"2022-07-25T00:00:00.000Z","High":261.5,"Low":256.8099975586,"Open":261.0,"Close":258.8299865723,"Volume":21056000,"Adj Close":258.8299865723},{"Date":"2022-07-26T00:00:00.000Z","High":259.8800048828,"Low":249.5700073242,"Open":259.8599853516,"Close":251.8999938965,"Volume":39348000,"Adj Close":251.8999938965},{"Date":"2022-07-27T00:00:00.000Z","High":270.049987793,"Low":258.8500061035,"Open":261.1600036621,"Close":268.7399902344,"Volume":45994000,"Adj Close":268.7399902344},{"Date":"2022-07-28T00:00:00.000Z","High":277.8399963379,"Low":267.8699951172,"Open":269.75,"Close":276.4100036621,"Volume":33459300,"Adj Close":276.4100036621},{"Date":"2022-07-29T00:00:00.000Z","High":282.0,"Low":276.6300048828,"Open":277.700012207,"Close":280.7399902344,"Volume":32129400,"Adj Close":280.7399902344}]"}], "rel_height": null}], "symbol": null, "interval": null, "data": null}

Any help with being able to avoid these two issues would be grateful

Asked By: mike01010

||

Answers:

For starters, the JSON that is emitted is perfectly parsble by Javascript JSON.load, the issue is that your default implementation returns a str object, so that gets serialized as a JSON str.

You probably want to use to_dict (which returns a dict) instead of to_json, you just have to handle the pd.Timestamp objects:

class ChartEncoder(JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (pd.DataFrame, pd.Series)):
            return obj.to_dict(orient="records")
        elif isinstance(obj, pd.Timestamp):
            return obj.isoformat()
        elif hasattr(obj, '__dict__'):
            return obj.__dict__
        else:
            # you probably want this, it doesn't make sense to return an empty string
            return JSONEncoder.default(self, obj)

Note, return obj.__dict__ probably isn’t the right way to do this. You should handle your custom types explicitly.

Answered By: juanpa.arrivillaga
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.