Is it possible to upload a csv file in Dash and also store it as a pandas DataFrame?
Question:
I am developing a dashboard in Dash with Python and in one of the core components I am trying to upload a csv file and display it in a datatable format (see below). That works well (see picture), I followed this example: https://dash.plotly.com/dash-core-components/upload
However, I would also like to use the table as a pandas DataFrame later in the code. Since I upload the csv file after I’ve run the code for the dashboard, I could not find a way to return the csv contents as a DataFrame. Any way in which this can be done? My code is below.
Thank you in advance!
###############################################################################
# Upload files
# https://dash.plotly.com/dash-core-components/upload
###############################################################################
def parse_contents(contents, filename, date):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assume that the user uploaded a CSV file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assume that the user uploaded an excel file
df = pd.read_excel(io.BytesIO(decoded))
except Exception as e:
print(e)
return html.Div([
'There was an error processing this file.'
])
trade_upload = pd.DataFrame(df)
return dbc.Table.from_dataframe(trade_upload)
@app.callback(Output('output-data-upload', 'children'),
[Input('upload-data', 'contents')],
[State('upload-data', 'filename'),
State('upload-data', 'last_modified')])
def update_output(list_of_contents, list_of_names, list_of_dates):
if list_of_contents is not None:
children = [
parse_contents(c, n, d) for c, n, d in
zip(list_of_contents, list_of_names, list_of_dates)]
return children
if __name__ == '__main__':
app.run_server(port=8051, debug=False)
Answers:
When you define the parse_contents
function, you can simply return df
:
def parse_contents(contents, filename):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assume that the user uploaded a CSV file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assume that the user uploaded an excel file
df = pd.read_excel(io.BytesIO(decoded))
except Exception as e:
print(e)
return html.Div([
'There was an error processing this file.'
])
return df
Then, you can call parse_contents
in your following callbacks and generate a pandas dataframe:
@app.callback(
Output('table-container', 'data'),
[Input('file_upload', 'contents')],
State('file_upload', 'filename'))
def filter_df(content, name):
if content is not None:
# Return all the rows on initial load/no country selected.
df = parse_contents(content, name)
dff = df.to_json()
dff_pandas = pd.Dataframe(dff)
else:
df = parse_contents(content, name)
dff = df.to_json()
dff_pandas = pd.Dataframe(dff)
dff_pandas_filtered = dff_pandas.query('column_A == 012345')
You can keep it as a global variable. Here is a code for a single file upload.
1.Layout
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
# Allow multiple files to be uploaded
multiple=False
),
html.Div(id='output-data-upload'),
])
2.Function
def parse_contents(contents, filename, date):
content_type, content_string = contents.split(',')
global df#define data frame as global
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assume that the user uploaded a CSV file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assume that the user uploaded an excel file
df = pd.read_excel(io.BytesIO(decoded))
except Exception as e:
print(e)
return html.Div([
'There was an error processing this file.'
])
return html.Div([
html.H5(filename),
html.H6(datetime.datetime.fromtimestamp(date)),
dash_table.DataTable(
data=df.to_dict('records'),
columns=[{'name': i, 'id': i} for i in df.columns]
),
html.Hr(), # horizontal line
# For debugging, display the raw contents provided by the web browser
html.Div('Raw Content'),
html.Pre(contents[0:200] + '...', style={
'whiteSpace': 'pre-wrap',
'wordBreak': 'break-all'
})
])
3.Callback
@app.callback(Output('output-data-upload', 'children'),
Input('upload-data', 'contents'),
State('upload-data', 'filename'),
State('upload-data', 'last_modified'))
def update_output(content, filename, date):
children=parse_contents(content, filename, date)
print(type(df))#this will show data type as a pandas dataframe
print(df)
return children
I am developing a dashboard in Dash with Python and in one of the core components I am trying to upload a csv file and display it in a datatable format (see below). That works well (see picture), I followed this example: https://dash.plotly.com/dash-core-components/upload
However, I would also like to use the table as a pandas DataFrame later in the code. Since I upload the csv file after I’ve run the code for the dashboard, I could not find a way to return the csv contents as a DataFrame. Any way in which this can be done? My code is below.
Thank you in advance!
###############################################################################
# Upload files
# https://dash.plotly.com/dash-core-components/upload
###############################################################################
def parse_contents(contents, filename, date):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assume that the user uploaded a CSV file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assume that the user uploaded an excel file
df = pd.read_excel(io.BytesIO(decoded))
except Exception as e:
print(e)
return html.Div([
'There was an error processing this file.'
])
trade_upload = pd.DataFrame(df)
return dbc.Table.from_dataframe(trade_upload)
@app.callback(Output('output-data-upload', 'children'),
[Input('upload-data', 'contents')],
[State('upload-data', 'filename'),
State('upload-data', 'last_modified')])
def update_output(list_of_contents, list_of_names, list_of_dates):
if list_of_contents is not None:
children = [
parse_contents(c, n, d) for c, n, d in
zip(list_of_contents, list_of_names, list_of_dates)]
return children
if __name__ == '__main__':
app.run_server(port=8051, debug=False)
When you define the parse_contents
function, you can simply return df
:
def parse_contents(contents, filename):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assume that the user uploaded a CSV file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assume that the user uploaded an excel file
df = pd.read_excel(io.BytesIO(decoded))
except Exception as e:
print(e)
return html.Div([
'There was an error processing this file.'
])
return df
Then, you can call parse_contents
in your following callbacks and generate a pandas dataframe:
@app.callback(
Output('table-container', 'data'),
[Input('file_upload', 'contents')],
State('file_upload', 'filename'))
def filter_df(content, name):
if content is not None:
# Return all the rows on initial load/no country selected.
df = parse_contents(content, name)
dff = df.to_json()
dff_pandas = pd.Dataframe(dff)
else:
df = parse_contents(content, name)
dff = df.to_json()
dff_pandas = pd.Dataframe(dff)
dff_pandas_filtered = dff_pandas.query('column_A == 012345')
You can keep it as a global variable. Here is a code for a single file upload.
1.Layout
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
# Allow multiple files to be uploaded
multiple=False
),
html.Div(id='output-data-upload'),
])
2.Function
def parse_contents(contents, filename, date):
content_type, content_string = contents.split(',')
global df#define data frame as global
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assume that the user uploaded a CSV file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assume that the user uploaded an excel file
df = pd.read_excel(io.BytesIO(decoded))
except Exception as e:
print(e)
return html.Div([
'There was an error processing this file.'
])
return html.Div([
html.H5(filename),
html.H6(datetime.datetime.fromtimestamp(date)),
dash_table.DataTable(
data=df.to_dict('records'),
columns=[{'name': i, 'id': i} for i in df.columns]
),
html.Hr(), # horizontal line
# For debugging, display the raw contents provided by the web browser
html.Div('Raw Content'),
html.Pre(contents[0:200] + '...', style={
'whiteSpace': 'pre-wrap',
'wordBreak': 'break-all'
})
])
3.Callback
@app.callback(Output('output-data-upload', 'children'),
Input('upload-data', 'contents'),
State('upload-data', 'filename'),
State('upload-data', 'last_modified'))
def update_output(content, filename, date):
children=parse_contents(content, filename, date)
print(type(df))#this will show data type as a pandas dataframe
print(df)
return children