Concat dataframes with same names from multiple folders using pandas

Question:

I have three folders folder1, folder2, and folder3. They have data frames as follows:

folder1/
df1.csv
df4.csv
df5.csv

folder2/
df1.csv
df3.csv
df4.csv

folder3/
df4.csv

I am confused about how to contact the data frames using pandas.concat() with the same names in all three folders and save them in a new folder "finalfolder" such that the finalfolder contains concatinated files:

finalfolder/
df1.csv (concat from folder1 and folder2)
df3.csv (From folder 2)
df4.csv (concat from folder1, 2, and 3)
df5.csv (From folder 1)
Asked By: Gun

||

Answers:

edit to first answer:

from os import listdir
import pandas as pd

folder_paths = ['put all the folder paths here']
df_dict = {'folder': [], 'file': []}
for folder_path in folder_paths:
    for file in listdir(folder_path):
        df_dict['folder'].append(folder_path)
        df_dict['file'].append(file)

df = pd.DataFrame(df_dict)
for file, group in df.groupby(df['file']):
    df_temp = pd.DataFrame()
    for folder in group['folder'].tolist():
        df_temp = pd.concat([df_temp, pd.read_csv(f'{folder}/{file}')])
    df_temp.to_csv(f'finalfolder/{file}')

this should do the trick, just make the list with your folders and it should do what you want.

Answered By: maxxel_
import os
import csv as cs
import pandas as pd

base = os.path.abspath('/home/hari/Documents/python/pandas/') #base 
directory 
where the program saved

print(os.path.join(base, 'dir1/df1.csv'))
q = os.path.join(base, 'dir1/df1.csv')
w = os.path.join(base, 'dir1/df4.csv')
e = os.path.join(base, 'dir1/df5.csv')
r = os.path.join(base, 'dir2/df1.csv')
t = os.path.join(base, 'dir2/df3.csv')
y = os.path.join(base, 'dir2/df4.csv')
u = os.path.join(base, 'dir3/df4.csv')

csv = [q, w, e, r, t, y, u]

fi = pd.concat(map(pd.read_csv, csv), ignore_index=True) #used map 
function because of array

print(fi) #for testing

final = open(os.path.join(base, 'final/final.csv'), 'w', encoding='UTF8')

write = cs.writer(final) #passing the final file in csv writer

write.writerow(fi) #passing the concatenated csv's in writer

#make sure to change the url in base directory
Answered By: Hari
import os
folders_list = ['folder1','folder2','folder3']
files1 = os.listdir(folders_list[0])
files2 = os.listdir(folders_list[1])
files3 = os.listdir(folders_list[2])

max_files_size =3
for i in range(max_files_size):
    f1=False
    f2=False
    f3=False
    try:
        folder1_df = pd.read_csv(os.path.abspath("folder1")+"/"+files1[i])
        f1=True
    except:
        pass
    try:
        folder2_df = pd.read_csv(os.path.abspath("folder2")+"/"+files2[i])
        f2=True
    except:
        pass
    try:
        folder3_df = pd.read_csv(os.path.abspath("folder3")+"/"+files3[i])
        f3=True
    except:
        pass
    if f1 and f2 and f3:
        final_df = pd.concat([folder1_df,folder2_df,folder3_df])
        final_df.to_csv(os.path.abspath("final_folder")+"/"+files1[i], index=None)
        print(final_df.shape)
    elif f1 and f3:
        final_df = pd.concat([folder1_df,folder3_df])
        final_df.to_csv(os.path.abspath("final_folder")+"/"+files1[i], index=None)
        print(final_df.shape)
    elif f2 and f3:
        final_df = pd.concat([folder2_df,folder3_df])
        final_df.to_csv(os.path.abspath("final_folder")+"/"+files2[i], index=None)
        print(final_df.shape)
    elif f1 and f2:
        final_df = pd.concat([folder1_df,folder2_df])
        final_df.to_csv(os.path.abspath("final_folder")+"/"+files2[i], index=None)
        print(final_df.shape)
    elif f1:
        final_df = pd.concat([folder1_df])
        final_df.to_csv(os.path.abspath("final_folder")+"/"+files1[i], index=None)
        print(final_df.shape)
    elif f2:
        final_df = pd.concat([folder2_df])
        final_df.to_csv(os.path.abspath("final_folder")+"/"+files2[i], index=None)
        print(final_df.shape)
    elif f3:
        final_df = pd.concat([folder3_df])
        final_df.to_csv(os.path.abspath("final_folder")+"/"+files3[i], index=None)
        print(final_df.shape)
    else:
        print("No")

A bit complicated but it should work if you are working on windows. Provide all folders names in list. max_files_size is maximum number of files any folder can have. You must have final_folder in which you want to save new files.
Because all folders does not have same number of files Try block will handle that. If will check files with same names in all folders, concat them and save in new folder. os.path.abspath get absolute path to files

Answered By: Hira
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.