Serializing complex object containing multiple nested objects with data frames
Question:
Below should be a runnable sample of code. i have a Chart1 object which can contain many panes, and each pane can can contain many series. I would like to serialize this to json so i can send to a flask application to render. To do deal with the dataframes, i am using a custom encoder (ChartEncoder below):
from abc import ABC, abstractmethod
from datetime import datetime
import pandas as pd
import copy
import json
from json import JSONEncoder
import pandas_datareader.data as reader
import datetime as dt
class Series1(object):
def __init__(self, data):
self.data = data
class Pane1(object):
def __init__(self, series: Series1 = None , rel_height = None):
self.series = [] if series is None else series
self.rel_height = rel_height
class Chart1(ABC):
def __init__(self, show_volume = True, *args, **kwargs):
self.show_volume = show_volume
self.panes=[Pane1()]
self.symbol = None
self.interval = None
def to_json(self):
obj = copy.copy(self)
obj.data = None
jsn = json.dumps(obj, cls=ChartEncoder)
return jsn
class ChartEncoder(JSONEncoder):
def default(self, obj):
if type(obj) is pd.DataFrame:
return obj.reset_index().to_json(orient="records", date_format = 'iso')
elif type(obj) is pd.Series:
df = pd.DataFrame(obj)
return df.reset_index().to_json(orient="records", date_format = 'iso')
elif hasattr(obj, '__dict__'):
return obj.__dict__
else:
return ''
if __name__ == '__main__':
chart = Chart1()
start = dt.datetime(2022,7,25)
end = dt.datetime(2022,7,29)
tickers = ['AAPL', 'MSFT']
data = {}
for t in tickers:
series = reader.DataReader(t,'yahoo', start, end)
chart.panes[0].series.append(Series1(series))
json = chart.to_json()
print(json)
After running the code there are two problems with the json string returned:
- it looks like there are escape characters being added that can not be read by javascript JSON.parse.
'{"show_volume": true, "panes": [{"series": [{"data": "[{\"Date\":\"2022-07-25T00:00:00.000Z\",\"High\":155.0399932861,\"Low\":152.2799987793,\"Open\":154.0099945068,\"Close\":152.9499969482,\"Volume\":53623900,\"Adj Close\":152.9499969482},{\"Date\":\"2022-07-26T00:00:00.000Z\",\"High\":153.0899963379,\"Low\":150.8000030518,\"Open\":152.2599945068,\"Close\":151.6000061035,\"Volume\":55138700,\"Adj Close\":151.6000061035},{\"Date\":\"2022-07-27T00:00:00.000Z\",\"High\":157.3300018311,\"Low\":152.1600036621,\"Open\":152.5800018311,\"Close\":156.7899932861,\"Volume\":78620700,\"Adj Close\":156.7899932861},{\"Date\":\"2022-07-28T00:00:00.000Z\",\"High\":157.6399993896,\"Low\":154.4100036621,\"Open\":156.9799957275,\"Close\":157.3500061035,\"Volume\":81378700,\"Adj Close\":157.3500061035},{\"Date\":\"2022-07-29T00:00:00.000Z\",\"High\":163.6300048828,\"Low\":159.5,\"Open\":161.2400054932,\"Close\":162.5099945068,\"Volume\":101689200,\"Adj Close\":162.5099945068}]"}, {"data": "[{\"Date\":\"2022-07-25T00:00:00.000Z\",\"High\":261.5,\"Low\":256.8099975586,\"Open\":261.0,\"Close\":258.8299865723,\"Volume\":21056000,\"Adj Close\":258.8299865723},{\"Date\":\"2022-07-26T00:00:00.000Z\",\"High\":259.8800048828,\"Low\":249.5700073242,\"Open\":259.8599853516,\"Close\":251.8999938965,\"Volume\":39348000,\"Adj Close\":251.8999938965},{\"Date\":\"2022-07-27T00:00:00.000Z\",\"High\":270.049987793,\"Low\":258.8500061035,\"Open\":261.1600036621,\"Close\":268.7399902344,\"Volume\":45994000,\"Adj Close\":268.7399902344},{\"Date\":\"2022-07-28T00:00:00.000Z\",\"High\":277.8399963379,\"Low\":267.8699951172,\"Open\":269.75,\"Close\":276.4100036621,\"Volume\":33459300,\"Adj Close\":276.4100036621},{\"Date\":\"2022-07-29T00:00:00.000Z\",\"High\":282.0,\"Low\":276.6300048828,\"Open\":277.700012207,\"Close\":280.7399902344,\"Volume\":32129400,\"Adj Close\":280.7399902344}]"}], "rel_height": null}], "symbol": null, "interval": null, "data": null}'
- After stripping away these characters manually, im left with the below. but even this is not a valid parseable json according to: https://jsonformatter.org/json-parser. You will notice that this is because the the series.data property ("data:" below) is quoted, as opposed to an array
{"show_volume": true, "panes": [{"series": [{"data": "[{"Date":"2022-07-25T00:00:00.000Z","High":155.0399932861,"Low":152.2799987793,"Open":154.0099945068,"Close":152.9499969482,"Volume":53623900,"Adj Close":152.9499969482},{"Date":"2022-07-26T00:00:00.000Z","High":153.0899963379,"Low":150.8000030518,"Open":152.2599945068,"Close":151.6000061035,"Volume":55138700,"Adj Close":151.6000061035},{"Date":"2022-07-27T00:00:00.000Z","High":157.3300018311,"Low":152.1600036621,"Open":152.5800018311,"Close":156.7899932861,"Volume":78620700,"Adj Close":156.7899932861},{"Date":"2022-07-28T00:00:00.000Z","High":157.6399993896,"Low":154.4100036621,"Open":156.9799957275,"Close":157.3500061035,"Volume":81378700,"Adj Close":157.3500061035},{"Date":"2022-07-29T00:00:00.000Z","High":163.6300048828,"Low":159.5,"Open":161.2400054932,"Close":162.5099945068,"Volume":101689200,"Adj Close":162.5099945068}]"}, {"data": "[{"Date":"2022-07-25T00:00:00.000Z","High":261.5,"Low":256.8099975586,"Open":261.0,"Close":258.8299865723,"Volume":21056000,"Adj Close":258.8299865723},{"Date":"2022-07-26T00:00:00.000Z","High":259.8800048828,"Low":249.5700073242,"Open":259.8599853516,"Close":251.8999938965,"Volume":39348000,"Adj Close":251.8999938965},{"Date":"2022-07-27T00:00:00.000Z","High":270.049987793,"Low":258.8500061035,"Open":261.1600036621,"Close":268.7399902344,"Volume":45994000,"Adj Close":268.7399902344},{"Date":"2022-07-28T00:00:00.000Z","High":277.8399963379,"Low":267.8699951172,"Open":269.75,"Close":276.4100036621,"Volume":33459300,"Adj Close":276.4100036621},{"Date":"2022-07-29T00:00:00.000Z","High":282.0,"Low":276.6300048828,"Open":277.700012207,"Close":280.7399902344,"Volume":32129400,"Adj Close":280.7399902344}]"}], "rel_height": null}], "symbol": null, "interval": null, "data": null}
Any help with being able to avoid these two issues would be grateful
Answers:
For starters, the JSON that is emitted is perfectly parsble by Javascript JSON.load
, the issue is that your default
implementation returns a str
object, so that gets serialized as a JSON str
.
You probably want to use to_dict
(which returns a dict
) instead of to_json
, you just have to handle the pd.Timestamp
objects:
class ChartEncoder(JSONEncoder):
def default(self, obj):
if isinstance(obj, (pd.DataFrame, pd.Series)):
return obj.to_dict(orient="records")
elif isinstance(obj, pd.Timestamp):
return obj.isoformat()
elif hasattr(obj, '__dict__'):
return obj.__dict__
else:
# you probably want this, it doesn't make sense to return an empty string
return JSONEncoder.default(self, obj)
Note, return obj.__dict__
probably isn’t the right way to do this. You should handle your custom types explicitly.
Below should be a runnable sample of code. i have a Chart1 object which can contain many panes, and each pane can can contain many series. I would like to serialize this to json so i can send to a flask application to render. To do deal with the dataframes, i am using a custom encoder (ChartEncoder below):
from abc import ABC, abstractmethod
from datetime import datetime
import pandas as pd
import copy
import json
from json import JSONEncoder
import pandas_datareader.data as reader
import datetime as dt
class Series1(object):
def __init__(self, data):
self.data = data
class Pane1(object):
def __init__(self, series: Series1 = None , rel_height = None):
self.series = [] if series is None else series
self.rel_height = rel_height
class Chart1(ABC):
def __init__(self, show_volume = True, *args, **kwargs):
self.show_volume = show_volume
self.panes=[Pane1()]
self.symbol = None
self.interval = None
def to_json(self):
obj = copy.copy(self)
obj.data = None
jsn = json.dumps(obj, cls=ChartEncoder)
return jsn
class ChartEncoder(JSONEncoder):
def default(self, obj):
if type(obj) is pd.DataFrame:
return obj.reset_index().to_json(orient="records", date_format = 'iso')
elif type(obj) is pd.Series:
df = pd.DataFrame(obj)
return df.reset_index().to_json(orient="records", date_format = 'iso')
elif hasattr(obj, '__dict__'):
return obj.__dict__
else:
return ''
if __name__ == '__main__':
chart = Chart1()
start = dt.datetime(2022,7,25)
end = dt.datetime(2022,7,29)
tickers = ['AAPL', 'MSFT']
data = {}
for t in tickers:
series = reader.DataReader(t,'yahoo', start, end)
chart.panes[0].series.append(Series1(series))
json = chart.to_json()
print(json)
After running the code there are two problems with the json string returned:
- it looks like there are escape characters being added that can not be read by javascript JSON.parse.
'{"show_volume": true, "panes": [{"series": [{"data": "[{\"Date\":\"2022-07-25T00:00:00.000Z\",\"High\":155.0399932861,\"Low\":152.2799987793,\"Open\":154.0099945068,\"Close\":152.9499969482,\"Volume\":53623900,\"Adj Close\":152.9499969482},{\"Date\":\"2022-07-26T00:00:00.000Z\",\"High\":153.0899963379,\"Low\":150.8000030518,\"Open\":152.2599945068,\"Close\":151.6000061035,\"Volume\":55138700,\"Adj Close\":151.6000061035},{\"Date\":\"2022-07-27T00:00:00.000Z\",\"High\":157.3300018311,\"Low\":152.1600036621,\"Open\":152.5800018311,\"Close\":156.7899932861,\"Volume\":78620700,\"Adj Close\":156.7899932861},{\"Date\":\"2022-07-28T00:00:00.000Z\",\"High\":157.6399993896,\"Low\":154.4100036621,\"Open\":156.9799957275,\"Close\":157.3500061035,\"Volume\":81378700,\"Adj Close\":157.3500061035},{\"Date\":\"2022-07-29T00:00:00.000Z\",\"High\":163.6300048828,\"Low\":159.5,\"Open\":161.2400054932,\"Close\":162.5099945068,\"Volume\":101689200,\"Adj Close\":162.5099945068}]"}, {"data": "[{\"Date\":\"2022-07-25T00:00:00.000Z\",\"High\":261.5,\"Low\":256.8099975586,\"Open\":261.0,\"Close\":258.8299865723,\"Volume\":21056000,\"Adj Close\":258.8299865723},{\"Date\":\"2022-07-26T00:00:00.000Z\",\"High\":259.8800048828,\"Low\":249.5700073242,\"Open\":259.8599853516,\"Close\":251.8999938965,\"Volume\":39348000,\"Adj Close\":251.8999938965},{\"Date\":\"2022-07-27T00:00:00.000Z\",\"High\":270.049987793,\"Low\":258.8500061035,\"Open\":261.1600036621,\"Close\":268.7399902344,\"Volume\":45994000,\"Adj Close\":268.7399902344},{\"Date\":\"2022-07-28T00:00:00.000Z\",\"High\":277.8399963379,\"Low\":267.8699951172,\"Open\":269.75,\"Close\":276.4100036621,\"Volume\":33459300,\"Adj Close\":276.4100036621},{\"Date\":\"2022-07-29T00:00:00.000Z\",\"High\":282.0,\"Low\":276.6300048828,\"Open\":277.700012207,\"Close\":280.7399902344,\"Volume\":32129400,\"Adj Close\":280.7399902344}]"}], "rel_height": null}], "symbol": null, "interval": null, "data": null}'
- After stripping away these characters manually, im left with the below. but even this is not a valid parseable json according to: https://jsonformatter.org/json-parser. You will notice that this is because the the series.data property ("data:" below) is quoted, as opposed to an array
{"show_volume": true, "panes": [{"series": [{"data": "[{"Date":"2022-07-25T00:00:00.000Z","High":155.0399932861,"Low":152.2799987793,"Open":154.0099945068,"Close":152.9499969482,"Volume":53623900,"Adj Close":152.9499969482},{"Date":"2022-07-26T00:00:00.000Z","High":153.0899963379,"Low":150.8000030518,"Open":152.2599945068,"Close":151.6000061035,"Volume":55138700,"Adj Close":151.6000061035},{"Date":"2022-07-27T00:00:00.000Z","High":157.3300018311,"Low":152.1600036621,"Open":152.5800018311,"Close":156.7899932861,"Volume":78620700,"Adj Close":156.7899932861},{"Date":"2022-07-28T00:00:00.000Z","High":157.6399993896,"Low":154.4100036621,"Open":156.9799957275,"Close":157.3500061035,"Volume":81378700,"Adj Close":157.3500061035},{"Date":"2022-07-29T00:00:00.000Z","High":163.6300048828,"Low":159.5,"Open":161.2400054932,"Close":162.5099945068,"Volume":101689200,"Adj Close":162.5099945068}]"}, {"data": "[{"Date":"2022-07-25T00:00:00.000Z","High":261.5,"Low":256.8099975586,"Open":261.0,"Close":258.8299865723,"Volume":21056000,"Adj Close":258.8299865723},{"Date":"2022-07-26T00:00:00.000Z","High":259.8800048828,"Low":249.5700073242,"Open":259.8599853516,"Close":251.8999938965,"Volume":39348000,"Adj Close":251.8999938965},{"Date":"2022-07-27T00:00:00.000Z","High":270.049987793,"Low":258.8500061035,"Open":261.1600036621,"Close":268.7399902344,"Volume":45994000,"Adj Close":268.7399902344},{"Date":"2022-07-28T00:00:00.000Z","High":277.8399963379,"Low":267.8699951172,"Open":269.75,"Close":276.4100036621,"Volume":33459300,"Adj Close":276.4100036621},{"Date":"2022-07-29T00:00:00.000Z","High":282.0,"Low":276.6300048828,"Open":277.700012207,"Close":280.7399902344,"Volume":32129400,"Adj Close":280.7399902344}]"}], "rel_height": null}], "symbol": null, "interval": null, "data": null}
Any help with being able to avoid these two issues would be grateful
For starters, the JSON that is emitted is perfectly parsble by Javascript JSON.load
, the issue is that your default
implementation returns a str
object, so that gets serialized as a JSON str
.
You probably want to use to_dict
(which returns a dict
) instead of to_json
, you just have to handle the pd.Timestamp
objects:
class ChartEncoder(JSONEncoder):
def default(self, obj):
if isinstance(obj, (pd.DataFrame, pd.Series)):
return obj.to_dict(orient="records")
elif isinstance(obj, pd.Timestamp):
return obj.isoformat()
elif hasattr(obj, '__dict__'):
return obj.__dict__
else:
# you probably want this, it doesn't make sense to return an empty string
return JSONEncoder.default(self, obj)
Note, return obj.__dict__
probably isn’t the right way to do this. You should handle your custom types explicitly.