Create new column and assign value to new column
Question:
i would like to seek help in assigning the filename value (eg. file1.txt) to a new col (eg. filename). However im stuck at the portion to create new column and assigning the value with the filename. No new column was seen when i export it as .csv. Appreciate if can advise whether my logic is wrong.
-raw text (no column names)-
file1.txt -> AL; 1A;
file1.txt -> BL; 2A;
file1.txt -> CL; 3A;
-sample file path - C:UsersCLDesktopfolderfile1.txt-
-desired output (add filename col)-
name class filename
AL 1A file1.txt
BL 2A file2.txt
CL 3A file3.txt
-current progress-
import os
import pandas as pd
import glob
src_path = r'C:UsersCLDesktopfolder' #3 files total
for f in glob.glob(os.path.join(src_path ,"*.txt")):
filename = f #file1.txt
files = [pd.read_csv(f, delimiter=';', names = ['name', 'class'], index_col = False)]
files_df = pd.DataFrame(files) #convert to df to add new column
files_df['filename'] = f #assign value to new column
files_df = pd.concat(files_df) #concat all file data together
files_df.to_csv("df.csv")
---update---
#trying to include index value for each row's data.
#desired output
name class filename
AL 1A file1_1.txt
AL 1A file1_2.txt
BL 2A file2_1.txt
BL 2A file2_2.txt
CL 3A file3_1.txt
CL 3A file3_2.txt
import os
import pandas as pd
import glob
i = 0
src_path = r'C:UsersCLDesktopfolder' #3 files total
pd.concat([pd.read_csv(f, delimiter=';', names=['name', 'class'], index_col=False
).assign(filename=f)
for i, f in enumerate(glob.glob(os.path.join(src_path ,"*.txt"))), i+=1]
).to_csv("df.csv")
Answers:
Untested, but I imagine your code should be changed to:
import os
import pandas as pd
import glob
src_path = r'C:UsersCLDesktopfolder' #3 files total
all_dfs = []
for f in glob.glob(os.path.join(src_path ,"*.txt")):
tmp_df = pd.read_csv(f, delimiter=';', names=['name', 'class'], index_col=False)
all_dfs.append(tmp_df.assign(filename=f))
files_df = pd.concat(all_dfs) #concat all file data together
files_df.to_csv("df.csv")
As a "one-liner":
import os
import pandas as pd
import glob
src_path = r'C:UsersCLDesktopfolder' #3 files total
pd.concat([pd.read_csv(f, delimiter=';', names=['name', 'class'], index_col=False
).assign(filename=f)
for f in glob.glob(os.path.join(src_path ,"*.txt"))]
).to_csv("df.csv")
i would like to seek help in assigning the filename value (eg. file1.txt) to a new col (eg. filename). However im stuck at the portion to create new column and assigning the value with the filename. No new column was seen when i export it as .csv. Appreciate if can advise whether my logic is wrong.
-raw text (no column names)-
file1.txt -> AL; 1A;
file1.txt -> BL; 2A;
file1.txt -> CL; 3A;
-sample file path - C:UsersCLDesktopfolderfile1.txt-
-desired output (add filename col)-
name class filename
AL 1A file1.txt
BL 2A file2.txt
CL 3A file3.txt
-current progress-
import os
import pandas as pd
import glob
src_path = r'C:UsersCLDesktopfolder' #3 files total
for f in glob.glob(os.path.join(src_path ,"*.txt")):
filename = f #file1.txt
files = [pd.read_csv(f, delimiter=';', names = ['name', 'class'], index_col = False)]
files_df = pd.DataFrame(files) #convert to df to add new column
files_df['filename'] = f #assign value to new column
files_df = pd.concat(files_df) #concat all file data together
files_df.to_csv("df.csv")
---update---
#trying to include index value for each row's data.
#desired output
name class filename
AL 1A file1_1.txt
AL 1A file1_2.txt
BL 2A file2_1.txt
BL 2A file2_2.txt
CL 3A file3_1.txt
CL 3A file3_2.txt
import os
import pandas as pd
import glob
i = 0
src_path = r'C:UsersCLDesktopfolder' #3 files total
pd.concat([pd.read_csv(f, delimiter=';', names=['name', 'class'], index_col=False
).assign(filename=f)
for i, f in enumerate(glob.glob(os.path.join(src_path ,"*.txt"))), i+=1]
).to_csv("df.csv")
Untested, but I imagine your code should be changed to:
import os
import pandas as pd
import glob
src_path = r'C:UsersCLDesktopfolder' #3 files total
all_dfs = []
for f in glob.glob(os.path.join(src_path ,"*.txt")):
tmp_df = pd.read_csv(f, delimiter=';', names=['name', 'class'], index_col=False)
all_dfs.append(tmp_df.assign(filename=f))
files_df = pd.concat(all_dfs) #concat all file data together
files_df.to_csv("df.csv")
As a "one-liner":
import os
import pandas as pd
import glob
src_path = r'C:UsersCLDesktopfolder' #3 files total
pd.concat([pd.read_csv(f, delimiter=';', names=['name', 'class'], index_col=False
).assign(filename=f)
for f in glob.glob(os.path.join(src_path ,"*.txt"))]
).to_csv("df.csv")