How can I concatenate all .txt files in a folder and separate all of them with a line?
Question:
I’m very new to using Python, and I suspect this is easier than I think, but I have a lot (more than 200) .txt files in a folder that I would like to concatenate in a single one.
The problem: I want each .txt file to be separated by a new line in this new file.
I’m on Mac, by the way.
I saw some options online that require a list of all files names. I just have a lot of them, and would like to save some time by doing it differently. Any idea?
Answers:
The OS only answer
It’s as simple as:
import os # OS is the only module you'll need
l = os.listdir('text folder') # Folder to access
text = "" # Main string
for file in l:
if file.endswith(".txt"): # Check file extension
with open(f'text folder/{file}', "r") as t:
text += t.read() # Read file and add to the main string
l
is the folder. text
is the full text from all the .txt
files. We use os.listdir()
to retrieve all items from a folder.
You can use the glob
package to get all the text files from a given folder. Then, iterate each file and gather the contents into a list. Finally, write the contents separated by a newline in the output file using the .join()
method of Python str
.
Here is an example:
from glob import glob
def main():
txt_files = glob("folder/*.txt")
contents = []
for file in txt_files:
with open(file) as f_in:
contents.append(f_in.read())
with open("out_file.txt", mode="w") as f_out:
f_out.write("n".join(contents))
if __name__ == "__main__":
main()
If you have lots of files or/and the files are huge, consider using a lazy version to avoid saturating the RAM:
from glob import glob
def gen_contents(txt_files: list[str]):
for file in txt_files:
with open(file) as f_in:
yield from f_in.readlines()
yield "n"
def main():
txt_files = glob("*.txt")
with open("result.txt", mode="w") as f_out:
contents = gen_contents(txt_files)
f_out.writelines(contents)
if __name__ == "__main__":
main()
As you would like to concatenate whole files you can read in the contents of one file in a single statement, and write it out in a single statement, like this:
import glob
import os
def main():
os.chdir('H:\')
with open('out.txt','w') as outfile:
for fname in glob.glob('*.txt'):
if fname == 'out.txt':
continue
# print(fname)
with open(fname, 'r') as infile:
txt = infile.readlines()
outfile.writelines(txt)
outfile.write('n') # separator line
if __name__ == '__main__':
main()
Using the with
statement takes care of closing the file properly after the with-block is left, or even if the script crashes.
I’m very new to using Python, and I suspect this is easier than I think, but I have a lot (more than 200) .txt files in a folder that I would like to concatenate in a single one.
The problem: I want each .txt file to be separated by a new line in this new file.
I’m on Mac, by the way.
I saw some options online that require a list of all files names. I just have a lot of them, and would like to save some time by doing it differently. Any idea?
The OS only answer
It’s as simple as:
import os # OS is the only module you'll need
l = os.listdir('text folder') # Folder to access
text = "" # Main string
for file in l:
if file.endswith(".txt"): # Check file extension
with open(f'text folder/{file}', "r") as t:
text += t.read() # Read file and add to the main string
l
is the folder. text
is the full text from all the .txt
files. We use os.listdir()
to retrieve all items from a folder.
You can use the glob
package to get all the text files from a given folder. Then, iterate each file and gather the contents into a list. Finally, write the contents separated by a newline in the output file using the .join()
method of Python str
.
Here is an example:
from glob import glob
def main():
txt_files = glob("folder/*.txt")
contents = []
for file in txt_files:
with open(file) as f_in:
contents.append(f_in.read())
with open("out_file.txt", mode="w") as f_out:
f_out.write("n".join(contents))
if __name__ == "__main__":
main()
If you have lots of files or/and the files are huge, consider using a lazy version to avoid saturating the RAM:
from glob import glob
def gen_contents(txt_files: list[str]):
for file in txt_files:
with open(file) as f_in:
yield from f_in.readlines()
yield "n"
def main():
txt_files = glob("*.txt")
with open("result.txt", mode="w") as f_out:
contents = gen_contents(txt_files)
f_out.writelines(contents)
if __name__ == "__main__":
main()
As you would like to concatenate whole files you can read in the contents of one file in a single statement, and write it out in a single statement, like this:
import glob
import os
def main():
os.chdir('H:\')
with open('out.txt','w') as outfile:
for fname in glob.glob('*.txt'):
if fname == 'out.txt':
continue
# print(fname)
with open(fname, 'r') as infile:
txt = infile.readlines()
outfile.writelines(txt)
outfile.write('n') # separator line
if __name__ == '__main__':
main()
Using the with
statement takes care of closing the file properly after the with-block is left, or even if the script crashes.