OS Looping over files is printing out twice
Question:
The following code works as intended, looping over files in my folder and performing some read write actions.
The files are updated correctly too.
Issue is with the print out. It ends up printing the same set of results twice in console. Why?
import os
import csv
root_directory = '.'
for subdir, dirs, files in os.walk(root_directory):
for file in files:
if file.endswith('.csv'):
initial = {}
repeated = set()
my_file = file
sample = my_file[:2]
output_file = '{}_result.txt'.format(sample.lower())
with open(my_file, mode='r') as in_file,
open(output_file, mode='w') as out_file:
next(in_file)
reader = csv.reader(in_file, delimiter="t")
for i, line in enumerate(reader):
row = line[0].split(',')
if row[1] in initial:
shouldAdd = initial[row[1]] != row[2]
if shouldAdd:
repeated.add(row[1])
out_file.write('{}n'.format(row[1])) # writing to file here no issues, no duplicates
else:
initial[row[1]] = row[2]
#Issue is here. Why printing twice
print('Total repeats for {} sample: {}'.format(sample, len(repeated)))
Prints out as follows:
Total repeats for AA sample: 123
Total repeats for BB sample: 45
Total repeats for AA sample: 123
Total repeats for BB sample: 45
Got 4 lines above. It should have stopped after line 2.
Answers:
It appears to print twice because you save _result.txt
s that also end in .txt
in the same directory. Try saving your result files say with .out
instead so file.endswith('.txt')
does not pick them up.
You’re ignoring the subdir
variable. The same filename exists in a subdirectory, but you’re processing the file in the root directory twice. You need to use os.path.join(subdir, file)
to get the full pathname of the file.
for subdir, dirs, files in os.walk(root_directory):
for file in files:
if file.endswith('.csv'):
fullname = os.path.join(subdir, file)
initial = {}
repeated = set()
my_file = file
sample = my_file[:2]
output_file = os.path.join(subdir, '{}_result.txt'.format(sample.lower()))
with open(fullname, mode='r') as in_file,
open(output_file, mode='w') as out_file:
next(in_file)
reader = csv.reader(in_file, delimiter="t")
for i, line in enumerate(reader):
row = line[0].split(',')
if row[1] in initial:
shouldAdd = initial[row[1]] != row[2]
if shouldAdd:
repeated.add(row[1])
out_file.write('{}n'.format(row[1])) # writing to file here no issues, no duplicates
else:
initial[row[1]] = row[2]
#Issue is here. Why printing twice
print('Total repeats for {}/{} sample: {}'.format(subdir, sample, len(repeated)))
The following code works as intended, looping over files in my folder and performing some read write actions.
The files are updated correctly too.
Issue is with the print out. It ends up printing the same set of results twice in console. Why?
import os
import csv
root_directory = '.'
for subdir, dirs, files in os.walk(root_directory):
for file in files:
if file.endswith('.csv'):
initial = {}
repeated = set()
my_file = file
sample = my_file[:2]
output_file = '{}_result.txt'.format(sample.lower())
with open(my_file, mode='r') as in_file,
open(output_file, mode='w') as out_file:
next(in_file)
reader = csv.reader(in_file, delimiter="t")
for i, line in enumerate(reader):
row = line[0].split(',')
if row[1] in initial:
shouldAdd = initial[row[1]] != row[2]
if shouldAdd:
repeated.add(row[1])
out_file.write('{}n'.format(row[1])) # writing to file here no issues, no duplicates
else:
initial[row[1]] = row[2]
#Issue is here. Why printing twice
print('Total repeats for {} sample: {}'.format(sample, len(repeated)))
Prints out as follows:
Total repeats for AA sample: 123
Total repeats for BB sample: 45
Total repeats for AA sample: 123
Total repeats for BB sample: 45
Got 4 lines above. It should have stopped after line 2.
It appears to print twice because you save _result.txt
s that also end in .txt
in the same directory. Try saving your result files say with .out
instead so file.endswith('.txt')
does not pick them up.
You’re ignoring the subdir
variable. The same filename exists in a subdirectory, but you’re processing the file in the root directory twice. You need to use os.path.join(subdir, file)
to get the full pathname of the file.
for subdir, dirs, files in os.walk(root_directory):
for file in files:
if file.endswith('.csv'):
fullname = os.path.join(subdir, file)
initial = {}
repeated = set()
my_file = file
sample = my_file[:2]
output_file = os.path.join(subdir, '{}_result.txt'.format(sample.lower()))
with open(fullname, mode='r') as in_file,
open(output_file, mode='w') as out_file:
next(in_file)
reader = csv.reader(in_file, delimiter="t")
for i, line in enumerate(reader):
row = line[0].split(',')
if row[1] in initial:
shouldAdd = initial[row[1]] != row[2]
if shouldAdd:
repeated.add(row[1])
out_file.write('{}n'.format(row[1])) # writing to file here no issues, no duplicates
else:
initial[row[1]] = row[2]
#Issue is here. Why printing twice
print('Total repeats for {}/{} sample: {}'.format(subdir, sample, len(repeated)))