How to add a hyperlink for downloading a dataframe as csv using Pandas
Question:
I’m working with a .csv
cars dataset (that I got from here) and I’m trying to:
- Convert this
.csv
to a pandas dataframe (Done, see df)
- Group the dataframe by cars models (Done, see grouped_df)
- Make a column with a hyperlink to download the
.csv/df
for each car in the grouped_df
(To be finalized)
CODE :
import pandas as pd
from IPython.display import HTML
df = pd.read_csv("cars.csv", delimiter=';', skiprows=[1])
display(df.head(5))
grouped_df = df.groupby('Car').agg(NumberOfCars=('Car', 'size'),
MeanOfWeight=('Weight', 'mean'),
MeanOfAcceleration=('Acceleration', 'mean'))
grouped_df.insert(len(grouped_df.columns), "DownloadCsv", 'Download the details (.csv)')
grouped_df['DownloadCsv'] = grouped_df['DownloadCsv'].apply(lambda x: f'<a href="https://stackoverflow.com/{x}">{x}</a>')
HTML(grouped_df.sample(5).to_html(escape=False))
>>> display(df.head(5))
>>> HTML(grouped_df.sample(5).to_html(escape=False))
For example, if the user click on "Download the details (.csv
)" of the second row, he should get a .csv
of all rows in df
where the column car equals to "Ford Torino 500".
By the way, the code will be implemented in a streamlit app.
Is there any way to achieve this ? Do you have any propositions, please ?
EDIT :
By following @furas’s suggestion, I made this code :
b_buf = io.BytesIO()
with gzip.open(b_buf, 'wb') as f:
f.write(df.to_csv().encode())
b_buf.seek(0)
with open("my_zip.zip", "wb") as f:
f.write(b_buf.getvalue())
csv_url = base64.b64encode(b_buf.getvalue()).decode()
grouped_df['DownloadCsv'] = grouped_df['DownloadCsv'].apply(
lambda x: '<a.href="data:text/csv;base64,"' + csv_url)
st.write(HTML(grouped_df.sample(5).to_html(escape=False)), unsafe_allow_html=True)
Unfortunately, the column grouped_df['DownloadCsv']
seems to be empty in the streamlit app !
What am I missing ?
Answers:
Since you aim to integrate this in streamlit
, why not use a streamlit button to download the csv?
The example below writes the dataframe to streamlit columns and adds download buttons for the individual csv files.
import streamlit as st
import pandas as pd
df = pd.read_csv("cars.csv", delimiter=';', skiprows=[1])
grouped_df = df.groupby('Car').agg(NumberOfCars=('Car', 'size'),
MeanOfWeight=('Weight', 'mean'),
MeanOfAcceleration=('Acceleration', 'mean')).reset_index()
grouped_df.insert(len(grouped_df.columns), "DownloadCsv", 'Download the details (.csv)')
colms = st.columns((2, 1, 1, 1, 2))
#write headers
for col, field_name in zip(colms, grouped_df.columns):
col.write(f'**{field_name}**')
def writerow(row):
col1, col2, col3, col4, col5 = st.columns((2, 1, 1, 1, 2))
with col1:
st.write(row['Car'])
with col2:
st.write(row['NumberOfCars'])
with col3:
st.write(row['MeanOfWeight'])
with col4:
st.write(row['MeanOfAcceleration'])
with col5:
st.download_button("Download details (.csv)", df[df['Car'] == row['Car']].to_csv(), f"details_{row['Car']}.csv", "text/csv")
grouped_df.apply(writerow, axis=1)
Result:
I’m working with a .csv
cars dataset (that I got from here) and I’m trying to:
- Convert this
.csv
to a pandas dataframe (Done, see df) - Group the dataframe by cars models (Done, see grouped_df)
- Make a column with a hyperlink to download the
.csv/df
for each car in thegrouped_df
(To be finalized)
CODE :
import pandas as pd
from IPython.display import HTML
df = pd.read_csv("cars.csv", delimiter=';', skiprows=[1])
display(df.head(5))
grouped_df = df.groupby('Car').agg(NumberOfCars=('Car', 'size'),
MeanOfWeight=('Weight', 'mean'),
MeanOfAcceleration=('Acceleration', 'mean'))
grouped_df.insert(len(grouped_df.columns), "DownloadCsv", 'Download the details (.csv)')
grouped_df['DownloadCsv'] = grouped_df['DownloadCsv'].apply(lambda x: f'<a href="https://stackoverflow.com/{x}">{x}</a>')
HTML(grouped_df.sample(5).to_html(escape=False))
>>> display(df.head(5))
>>> HTML(grouped_df.sample(5).to_html(escape=False))
For example, if the user click on "Download the details (.csv
)" of the second row, he should get a .csv
of all rows in df
where the column car equals to "Ford Torino 500".
By the way, the code will be implemented in a streamlit app.
Is there any way to achieve this ? Do you have any propositions, please ?
EDIT :
By following @furas’s suggestion, I made this code :
b_buf = io.BytesIO()
with gzip.open(b_buf, 'wb') as f:
f.write(df.to_csv().encode())
b_buf.seek(0)
with open("my_zip.zip", "wb") as f:
f.write(b_buf.getvalue())
csv_url = base64.b64encode(b_buf.getvalue()).decode()
grouped_df['DownloadCsv'] = grouped_df['DownloadCsv'].apply(
lambda x: '<a.href="data:text/csv;base64,"' + csv_url)
st.write(HTML(grouped_df.sample(5).to_html(escape=False)), unsafe_allow_html=True)
Unfortunately, the column grouped_df['DownloadCsv']
seems to be empty in the streamlit app !
What am I missing ?
Since you aim to integrate this in streamlit
, why not use a streamlit button to download the csv?
The example below writes the dataframe to streamlit columns and adds download buttons for the individual csv files.
import streamlit as st
import pandas as pd
df = pd.read_csv("cars.csv", delimiter=';', skiprows=[1])
grouped_df = df.groupby('Car').agg(NumberOfCars=('Car', 'size'),
MeanOfWeight=('Weight', 'mean'),
MeanOfAcceleration=('Acceleration', 'mean')).reset_index()
grouped_df.insert(len(grouped_df.columns), "DownloadCsv", 'Download the details (.csv)')
colms = st.columns((2, 1, 1, 1, 2))
#write headers
for col, field_name in zip(colms, grouped_df.columns):
col.write(f'**{field_name}**')
def writerow(row):
col1, col2, col3, col4, col5 = st.columns((2, 1, 1, 1, 2))
with col1:
st.write(row['Car'])
with col2:
st.write(row['NumberOfCars'])
with col3:
st.write(row['MeanOfWeight'])
with col4:
st.write(row['MeanOfAcceleration'])
with col5:
st.download_button("Download details (.csv)", df[df['Car'] == row['Car']].to_csv(), f"details_{row['Car']}.csv", "text/csv")
grouped_df.apply(writerow, axis=1)
Result: