How can I simplify my pandas script using a loop?
Question:
I have the following code:
import pandas as pd
df22=pd.read_excel(r"C:UsersHDesktopFilesTable22.xlsx")
#Select the sheets that are to be transformed
df3=pd.read_excel(r"C:UsersHDesktopFilesTable3.xlsx")
df4=pd.read_excel(r"C:UsersHDesktopFilesTable4.xlsx")
df5=pd.read_excel(r"C:UsersHDesktopFilesTable5.xlsx")
df6=pd.read_excel(r"C:UsersHDesktopFilesTable6.xlsx")
df7=pd.read_excel(r"C:UsersHDesktopFilesTable7.xlsx")
df8=pd.read_excel(r"C:UsersHDesktopFilesTable8.xlsx")
df9=pd.read_excel(r"C:UsersHDesktopFilesTable9.xlsx")
df10=pd.read_excel(r"C:UsersHDesktopFilesTable10.xlsx")
df11=pd.read_excel(r"C:UsersHDesktopFilesTable11.xlsx")
df12=pd.read_excel(r"C:UsersHDesktopFilesTable12.xlsx")
df13=pd.read_excel(r"C:UsersHDesktopFilesTable13.xlsx")
df14=pd.read_excel(r"C:UsersHDesktopFilesTable14.xlsx")
df15=pd.read_excel(r"C:UsersHDesktopFilesTable15.xlsx")
df16=pd.read_excel(r"C:UsersHDesktopFilesTable16.xlsx")
df17=pd.read_excel(r"C:UsersHDesktopFilesTable17.xlsx")
df18=pd.read_excel(r"C:UsersHDesktopFilesTable18.xlsx")
df19=pd.read_excel(r"C:UsersHDesktopFilesTable19.xlsx")
df20=pd.read_excel(r"C:UsersHDesktopFilesTable20.xlsx")
df21=pd.read_excel(r"C:UsersHDesktopFilesTable21.xlsx")
df=pd.concat([df22,df3,df4,df5,df6,df7,df8,df9,df10,df11,df12,df13,df14,df15,df16,df17,df18,df19,df20,df21], join='inner')
df.to_excel(r'C:UsersHDesktopFilesAllweeks.xlsx', sheet_name='sheet1', index = False)
It appends Week22.xlsx with all weeks between 3 to 21. I’m trying to find out if anyone knows how this script can be improved. I was trying to use loops but I just couldn’t get it to work.
Answers:
Use list comprehension:
df22=pd.read_excel(r"C:UsersHDesktopFilesTable22.xlsx")
dfs = [pd.read_excel(rf"C:UsersHDesktopFilesTable{x}.xlsx") for x in range(3, 22)]
df=pd.concat([df22] + dfs, join='inner')
df.to_excel(r'C:UsersHDesktopFilesAllweeks.xlsx', sheet_name='sheet1', index = False)
Or create list of all DataFrames and then append last dataframe to list like first:
dfs = [pd.read_excel(rf"C:UsersHDesktopFilesTable{x}.xlsx") for x in range(3, 23)]
df=pd.concat(dfs[-1:] + dfs[:-1], join='inner')
#another idea is swap order - 22, 21, 20 ... 3
#df=pd.concat(dfs[::-1], join='inner')
df.to_excel(r'C:UsersHDesktopFilesAllweeks.xlsx', sheet_name='sheet1', index = False)
You could use a for-loop to read files from Table3 to Table21, and concatenate each dataframe with Table22, for example
import pandas as pd
df22 = pd.read_excel(r'C:UsersHDesktopFilesTable22.xlsx')
for i in range(3, 22):
df22 = pd.concat([df22, pd.read_excel(r'C:UsersHDesktopFilesTable' + str(i) + '.xlsx')])
df22.to_excel(r'C:UsersHDesktopFilesAllweeks.xlsx', sheet_name='sheet1', index=False)
#df22.to_excel('C:\Users\H\Desktop\Files\Allweeks.xlsx', sheet_name='sheet1', index=False)
Note that the integer i
has to be converted to string str(i)
in the file path.
I have the following code:
import pandas as pd
df22=pd.read_excel(r"C:UsersHDesktopFilesTable22.xlsx")
#Select the sheets that are to be transformed
df3=pd.read_excel(r"C:UsersHDesktopFilesTable3.xlsx")
df4=pd.read_excel(r"C:UsersHDesktopFilesTable4.xlsx")
df5=pd.read_excel(r"C:UsersHDesktopFilesTable5.xlsx")
df6=pd.read_excel(r"C:UsersHDesktopFilesTable6.xlsx")
df7=pd.read_excel(r"C:UsersHDesktopFilesTable7.xlsx")
df8=pd.read_excel(r"C:UsersHDesktopFilesTable8.xlsx")
df9=pd.read_excel(r"C:UsersHDesktopFilesTable9.xlsx")
df10=pd.read_excel(r"C:UsersHDesktopFilesTable10.xlsx")
df11=pd.read_excel(r"C:UsersHDesktopFilesTable11.xlsx")
df12=pd.read_excel(r"C:UsersHDesktopFilesTable12.xlsx")
df13=pd.read_excel(r"C:UsersHDesktopFilesTable13.xlsx")
df14=pd.read_excel(r"C:UsersHDesktopFilesTable14.xlsx")
df15=pd.read_excel(r"C:UsersHDesktopFilesTable15.xlsx")
df16=pd.read_excel(r"C:UsersHDesktopFilesTable16.xlsx")
df17=pd.read_excel(r"C:UsersHDesktopFilesTable17.xlsx")
df18=pd.read_excel(r"C:UsersHDesktopFilesTable18.xlsx")
df19=pd.read_excel(r"C:UsersHDesktopFilesTable19.xlsx")
df20=pd.read_excel(r"C:UsersHDesktopFilesTable20.xlsx")
df21=pd.read_excel(r"C:UsersHDesktopFilesTable21.xlsx")
df=pd.concat([df22,df3,df4,df5,df6,df7,df8,df9,df10,df11,df12,df13,df14,df15,df16,df17,df18,df19,df20,df21], join='inner')
df.to_excel(r'C:UsersHDesktopFilesAllweeks.xlsx', sheet_name='sheet1', index = False)
It appends Week22.xlsx with all weeks between 3 to 21. I’m trying to find out if anyone knows how this script can be improved. I was trying to use loops but I just couldn’t get it to work.
Use list comprehension:
df22=pd.read_excel(r"C:UsersHDesktopFilesTable22.xlsx")
dfs = [pd.read_excel(rf"C:UsersHDesktopFilesTable{x}.xlsx") for x in range(3, 22)]
df=pd.concat([df22] + dfs, join='inner')
df.to_excel(r'C:UsersHDesktopFilesAllweeks.xlsx', sheet_name='sheet1', index = False)
Or create list of all DataFrames and then append last dataframe to list like first:
dfs = [pd.read_excel(rf"C:UsersHDesktopFilesTable{x}.xlsx") for x in range(3, 23)]
df=pd.concat(dfs[-1:] + dfs[:-1], join='inner')
#another idea is swap order - 22, 21, 20 ... 3
#df=pd.concat(dfs[::-1], join='inner')
df.to_excel(r'C:UsersHDesktopFilesAllweeks.xlsx', sheet_name='sheet1', index = False)
You could use a for-loop to read files from Table3 to Table21, and concatenate each dataframe with Table22, for example
import pandas as pd
df22 = pd.read_excel(r'C:UsersHDesktopFilesTable22.xlsx')
for i in range(3, 22):
df22 = pd.concat([df22, pd.read_excel(r'C:UsersHDesktopFilesTable' + str(i) + '.xlsx')])
df22.to_excel(r'C:UsersHDesktopFilesAllweeks.xlsx', sheet_name='sheet1', index=False)
#df22.to_excel('C:\Users\H\Desktop\Files\Allweeks.xlsx', sheet_name='sheet1', index=False)
Note that the integer i
has to be converted to string str(i)
in the file path.