how do I read file from two folder with the same order in python
Question:
I have two folders with the same file names, but when I try to read all text files from the folders in python, it reads in a different order. but I need to read files from two folders in the same order because they correspond. I used the following code to read all text files in a folder.
dir_psnr=current_path+'\'+dir_psnr+'\'
os.chdir(dir_psnr) #change directory to downloads folder
files_path =[os.path.abspath(x) for x in os.listdir()]
fnames_psnr_tmp = [x for x in files_path if x.endswith(".txt")]
the address of the folders are as follows:
F:RD_data_from_twitch_systemRD_data_from_twitch_systempsnr
F:RD_data_from_twitch_systemRD_data_from_twitch_systembitrate
the name of text files in both two folders are as follows:
asmr_1.txt
asmr_2.txt
Counter_strike_1.txt
Counter_strike_2.txt
dota2_1.txt
what is the problem? and how can I read files in the same order?
the full code is :
def reading_file_to_array(dir_psnr,current_path):
dir_psnr=current_path+'\'+dir_psnr+'\'
os.chdir(dir_psnr) #change directory to downloads folder
files_path =[os.path.abspath(x) for x in os.listdir()]
fnames_psnr_tmp = [x for x in files_path if x.endswith(".txt")]
.
.
.
return()
current_path='F:/RD_data_from_twitch_system/RD_data_from_twitch_system'
current_dir ='F:/RD_data_from_twitch_system/RD_data_from_twitch_system'
all_sub_dir_paths = glob(str(current_dir) + '/*/')
all_sub_dir_names = [Path(sub_dir).name for sub_dir in all_sub_dir_paths]
for i in range(len(all_sub_dir_names)):
if all_sub_dir_names[i]=='bitrate':
bitrate_1080p,bitrate_720p,bitrate_480p,bitrate_360p,bitrate_160p=reading_file_to_array(all_sub_dir_names[i], current_path)
else:
psnr_1080p,psnr_720p,psnr_480p,psnr_360p,psnr_160p=reading_file_to_array(all_sub_dir_names[i], current_path)
Answers:
Since the file names are the same, you could list the files in one directory and then add the bases to both for processing. This could be done in a generator that you can use in a loop. For example
folder1 = r"F:RD_data_from_twitch_systemRD_data_from_twitch_systempsnr"
folder2 = r"F:RD_data_from_twitch_systemRD_data_from_twitch_systembitrate"
def list_directories(primary, secondary):
primary = os.path.abspath(primary)
secondary = os.path.abspath(secondary)
for fn in os.listdir(primary):
if fn.endswith(".txt"):
yield (os.path.join(primary, fn),
os.path.join(secondary, fn))
# print files for test
for f1, f2 in list_directories(folder1, folder2):
print(f1, f2)
Its usually a bad idea to os.chdir
– especially without remembering which directory you came from. As long as your code builds absolute path names, the current working directory doesn’t matter.
The easiest way would be to use listdir and to append the path to the front of every element of the list.
import os
#hardcoded folders
def reading_file_to_array(dir_1, dir_2):
list_1 = [f"{dir_1}/"+f for f in os.listdir(dir_1)]
list_2 = [f"{dir_2}/"+f for f in os.listdir(dir_2)]
# Add more lists
# Do sorting stuff here if needed
return zip(list_1, list_2)
for f1, f2 in reading_file_to_array("./f_1", "./f_2"):
print(f1, f2)
#more dynamic appraoch
def reading_file_to_array_dyn(dirs):
results = list()
for directory in dirs:
results.append([f"{directory}/"+f for f in os.listdir(directory)])
# Do sorting stuff here if needed
return zip(*results)
for f1, f2 in reading_file_to_array_dyn(["./f_1", "./f_2"]):
print(f1, f2)
The result of this test code looks like this for me:
./f_1/a.txt ./f_2/a.txt
./f_1/b.txt ./f_2/b.txt
./f_1/c.txt ./f_2/c.txt
If you want to filter the files in the folder based on type, I recommend the package glob.
I have two folders with the same file names, but when I try to read all text files from the folders in python, it reads in a different order. but I need to read files from two folders in the same order because they correspond. I used the following code to read all text files in a folder.
dir_psnr=current_path+'\'+dir_psnr+'\'
os.chdir(dir_psnr) #change directory to downloads folder
files_path =[os.path.abspath(x) for x in os.listdir()]
fnames_psnr_tmp = [x for x in files_path if x.endswith(".txt")]
the address of the folders are as follows:
F:RD_data_from_twitch_systemRD_data_from_twitch_systempsnr
F:RD_data_from_twitch_systemRD_data_from_twitch_systembitrate
the name of text files in both two folders are as follows:
asmr_1.txt
asmr_2.txt
Counter_strike_1.txt
Counter_strike_2.txt
dota2_1.txt
what is the problem? and how can I read files in the same order?
the full code is :
def reading_file_to_array(dir_psnr,current_path):
dir_psnr=current_path+'\'+dir_psnr+'\'
os.chdir(dir_psnr) #change directory to downloads folder
files_path =[os.path.abspath(x) for x in os.listdir()]
fnames_psnr_tmp = [x for x in files_path if x.endswith(".txt")]
.
.
.
return()
current_path='F:/RD_data_from_twitch_system/RD_data_from_twitch_system'
current_dir ='F:/RD_data_from_twitch_system/RD_data_from_twitch_system'
all_sub_dir_paths = glob(str(current_dir) + '/*/')
all_sub_dir_names = [Path(sub_dir).name for sub_dir in all_sub_dir_paths]
for i in range(len(all_sub_dir_names)):
if all_sub_dir_names[i]=='bitrate':
bitrate_1080p,bitrate_720p,bitrate_480p,bitrate_360p,bitrate_160p=reading_file_to_array(all_sub_dir_names[i], current_path)
else:
psnr_1080p,psnr_720p,psnr_480p,psnr_360p,psnr_160p=reading_file_to_array(all_sub_dir_names[i], current_path)
Since the file names are the same, you could list the files in one directory and then add the bases to both for processing. This could be done in a generator that you can use in a loop. For example
folder1 = r"F:RD_data_from_twitch_systemRD_data_from_twitch_systempsnr"
folder2 = r"F:RD_data_from_twitch_systemRD_data_from_twitch_systembitrate"
def list_directories(primary, secondary):
primary = os.path.abspath(primary)
secondary = os.path.abspath(secondary)
for fn in os.listdir(primary):
if fn.endswith(".txt"):
yield (os.path.join(primary, fn),
os.path.join(secondary, fn))
# print files for test
for f1, f2 in list_directories(folder1, folder2):
print(f1, f2)
Its usually a bad idea to os.chdir
– especially without remembering which directory you came from. As long as your code builds absolute path names, the current working directory doesn’t matter.
The easiest way would be to use listdir and to append the path to the front of every element of the list.
import os
#hardcoded folders
def reading_file_to_array(dir_1, dir_2):
list_1 = [f"{dir_1}/"+f for f in os.listdir(dir_1)]
list_2 = [f"{dir_2}/"+f for f in os.listdir(dir_2)]
# Add more lists
# Do sorting stuff here if needed
return zip(list_1, list_2)
for f1, f2 in reading_file_to_array("./f_1", "./f_2"):
print(f1, f2)
#more dynamic appraoch
def reading_file_to_array_dyn(dirs):
results = list()
for directory in dirs:
results.append([f"{directory}/"+f for f in os.listdir(directory)])
# Do sorting stuff here if needed
return zip(*results)
for f1, f2 in reading_file_to_array_dyn(["./f_1", "./f_2"]):
print(f1, f2)
The result of this test code looks like this for me:
./f_1/a.txt ./f_2/a.txt
./f_1/b.txt ./f_2/b.txt
./f_1/c.txt ./f_2/c.txt
If you want to filter the files in the folder based on type, I recommend the package glob.