Replacing data in xlsx sheet with pandas dataframe
Question:
I have an xlsx file with multiple tabs, one of them being Town_names
that already has some data in it.
I’d like to overwrite that data with a dataframe – Town_namesDF
– while keeping the rest of the xlsx tabs intact.
I’ve tried the following:
with pd.ExcelWriter(r'path/to/file.xlsx', engine='openpyxl', mode='a') as writer:
Town_namesDF.to_excel(writer,sheet_name='Town_names')
writer.save()
writer.close()
But it ends up creating a new tab Town_names1
instead of overwriting the Town_names
tab. Am I missing something? Thanks.
Answers:
You could try this to store all of the other sheets temporarily and then add them back. I don’t think this would save any formulas or formatting though.
Store_sheet1=pd.read_excel('path/to/file.xlsx',sheetname='Sheet1')
Store_sheet2=pd.read_excel('path/to/file.xlsx',sheetname='Sheet2')
Store_sheet3=pd.read_excel('path/to/file.xlsx',sheetname='Sheet3')
with pd.ExcelWriter(r'path/to/file.xlsx', engine='openpyxl', mode='a') as writer:
Town_namesDF.to_excel(writer,sheet_name='Town_names')
Store_sheet1.to_excel(writer,sheet_name='Sheet1')
Store_sheet2.to_excel(writer,sheet_name='Sheet2')
Store_sheet3.to_excel(writer,sheet_name='Sheet3')
writer.save()
writer.close()
Since you want to overwrite, but there is no direct option for that(like in julia’s XLSX there is option for cell_ref). Simply delete the duplicate if it exists and then write.
with pd.ExcelWriter('/path/to/file.xlsx',engine = "openpyxl", mode='a') as writer:
workBook = writer.book
try:
workBook.remove(workBook['Town_names'])
except:
print("worksheet doesn't exist")
finally:
df.to_excel(writer, sheet_name='Town_names')
writer.save()
Well, I’ve managed to do this. This is not a clean solution and not fast at all, but I’ve made use of openpyxl documentation for working with pandas found here: https://openpyxl.readthedocs.io/en/latest/pandas.html
I’m effectively selecting the Town_names
sheet, clearing it with ws.delete_rows()
and then appending each row of my dataframe to the sheet.
wb = openpyxl.load_workbook(r'path/to/file.xlsx')
ws = wb.get_sheet_by_name('Town_names')
ws.delete_rows(0, 1000)
wb.save(r'path/to/file.xlsx')
wb = openpyxl.load_workbook(r'path/to/file.xlsx')
activeSheet = wb.get_sheet_by_name('Town_names')
for r in dataframe_to_rows(Town_namesDF, index=False, header=True):
activeSheet.append(r)
for cell in activeSheet['A'] + activeSheet[1]:
cell.style = 'Pandas'
wb.save(r'path/to/file.xlsx')
A bit messy and I hope there’s a better solution than mine, but this worked for me.
Hi you could use xlwings for that task. Here is an example.
import xlwings as xw
import pandas as pd
filename = "test.xlsx"
df = pd.read_excel(filename, "Town_names")
# Do your modifications of the worksheet here. For example, the following line "df * 2".
df = df * 2
app = xw.App(visible=False)
wb = xw.Book(filename)
ws = wb.sheets["Town_names"]
ws.clear()
ws["A1"].options(pd.DataFrame, header=1, index=False, expand='table').value = df
# If formatting of column names and index is needed as xlsxwriter does it, the following lines will do it.
ws["A1"].expand("right").api.Font.Bold = True
ws["A1"].expand("down").api.Font.Bold = True
ws["A1"].expand("right").api.Borders.Weight = 2
ws["A1"].expand("down").api.Borders.Weight = 2
wb.save(filename)
app.quit()
since pandas version 1.3.0. there is a new parameter: "if_sheet_exists"
{‘error’, ‘new’, ‘replace’}
pd.ExcelWriter(r'path/to/file.xlsx', engine='openpyxl', mode='a', if_sheet_exists='replace')
I have an xlsx file with multiple tabs, one of them being Town_names
that already has some data in it.
I’d like to overwrite that data with a dataframe – Town_namesDF
– while keeping the rest of the xlsx tabs intact.
I’ve tried the following:
with pd.ExcelWriter(r'path/to/file.xlsx', engine='openpyxl', mode='a') as writer:
Town_namesDF.to_excel(writer,sheet_name='Town_names')
writer.save()
writer.close()
But it ends up creating a new tab Town_names1
instead of overwriting the Town_names
tab. Am I missing something? Thanks.
You could try this to store all of the other sheets temporarily and then add them back. I don’t think this would save any formulas or formatting though.
Store_sheet1=pd.read_excel('path/to/file.xlsx',sheetname='Sheet1')
Store_sheet2=pd.read_excel('path/to/file.xlsx',sheetname='Sheet2')
Store_sheet3=pd.read_excel('path/to/file.xlsx',sheetname='Sheet3')
with pd.ExcelWriter(r'path/to/file.xlsx', engine='openpyxl', mode='a') as writer:
Town_namesDF.to_excel(writer,sheet_name='Town_names')
Store_sheet1.to_excel(writer,sheet_name='Sheet1')
Store_sheet2.to_excel(writer,sheet_name='Sheet2')
Store_sheet3.to_excel(writer,sheet_name='Sheet3')
writer.save()
writer.close()
Since you want to overwrite, but there is no direct option for that(like in julia’s XLSX there is option for cell_ref). Simply delete the duplicate if it exists and then write.
with pd.ExcelWriter('/path/to/file.xlsx',engine = "openpyxl", mode='a') as writer:
workBook = writer.book
try:
workBook.remove(workBook['Town_names'])
except:
print("worksheet doesn't exist")
finally:
df.to_excel(writer, sheet_name='Town_names')
writer.save()
Well, I’ve managed to do this. This is not a clean solution and not fast at all, but I’ve made use of openpyxl documentation for working with pandas found here: https://openpyxl.readthedocs.io/en/latest/pandas.html
I’m effectively selecting the Town_names
sheet, clearing it with ws.delete_rows()
and then appending each row of my dataframe to the sheet.
wb = openpyxl.load_workbook(r'path/to/file.xlsx')
ws = wb.get_sheet_by_name('Town_names')
ws.delete_rows(0, 1000)
wb.save(r'path/to/file.xlsx')
wb = openpyxl.load_workbook(r'path/to/file.xlsx')
activeSheet = wb.get_sheet_by_name('Town_names')
for r in dataframe_to_rows(Town_namesDF, index=False, header=True):
activeSheet.append(r)
for cell in activeSheet['A'] + activeSheet[1]:
cell.style = 'Pandas'
wb.save(r'path/to/file.xlsx')
A bit messy and I hope there’s a better solution than mine, but this worked for me.
Hi you could use xlwings for that task. Here is an example.
import xlwings as xw
import pandas as pd
filename = "test.xlsx"
df = pd.read_excel(filename, "Town_names")
# Do your modifications of the worksheet here. For example, the following line "df * 2".
df = df * 2
app = xw.App(visible=False)
wb = xw.Book(filename)
ws = wb.sheets["Town_names"]
ws.clear()
ws["A1"].options(pd.DataFrame, header=1, index=False, expand='table').value = df
# If formatting of column names and index is needed as xlsxwriter does it, the following lines will do it.
ws["A1"].expand("right").api.Font.Bold = True
ws["A1"].expand("down").api.Font.Bold = True
ws["A1"].expand("right").api.Borders.Weight = 2
ws["A1"].expand("down").api.Borders.Weight = 2
wb.save(filename)
app.quit()
since pandas version 1.3.0. there is a new parameter: "if_sheet_exists"
{‘error’, ‘new’, ‘replace’}
pd.ExcelWriter(r'path/to/file.xlsx', engine='openpyxl', mode='a', if_sheet_exists='replace')