Insert a line into the middle of a text file in Python
Question:
I want to insert a line into the middle of a text file in Python, so I tried
with open(erroredFilepath, 'r+t') as erroredFile:
fileContents = erroredFile.read()
if 'insert_here' in fileContents:
insertString.join(fileContents.rsplit('insert_here'))
erroredFile.truncate()
erroredFile.write(insertString)
However, insertString
got written at the end of the file. Why?
As an aside, I tried to simply things, by just using strings, instead of files.
'123456789'.join('qwertyuiop'.split('y'))
gives
'qwert123456789uiop'
what happened to the ‘y’ ?
Answers:
Although OS-level details of files vary, in general, when you have a file open in r+
mode and do some read or write operation, the “current position” is left after the last read or write.
When you did:
fileContents = erroredFile.read()
the stream erroredFile
was read to the end, so the current position is now “at the end”.
The truncate function defaults to using the current position as the size to which to truncate. Assume the file is 100 bytes long, so that the current position “at the end” is byte 100. Then:
erroredFile.truncate()
means “make the file 100 bytes long”—which it already is.
The current position remains at the end of the file, so the subsequent write
appends.
Presumably you wanted to seek back to the beginning of the file, and/or use truncate(0)
(note that just truncate(0)
will, at least on Unix-like systems, leave the seek position at the end of the file so that the next write
leaves a hole where the original data used to be). You could also be slightly more clever: if you’re inserting, just overwrite-and-extend in place (no truncate
is required at all).
(Joel Hinz already answered the second question, I see.)
If you want to write in the middle of the file use the fileinput
module.
import fileinput
for line in fileinput.input(erroredFilepath, inplace=True):
print("something", end ="")
from the docs
if the keyword argument inplace=True
is passed to fileinput.input()
or to the FileInput
constructor, the file is moved to a backup file and standard output is directed to the input file (if a file of the same name as the backup file already exists, it will be replaced silently).
Whatever you print
will go in the file. So you have to read and print
every line and modify whichever you want to replace. Also, when print
ing existing lines, use end=""
as it will prevent print
from adding an extra newline.
Not a Python answer but it may widen your horizon. Use sed
:
$ cat input.txt
foo
bar
baz
INSERT HERE
qux
quux
$ sed '/INSERT HERE/anew stuff' < input.txt
foo
bar
baz
INSERT HERE
new stuff
qux
quux
The command a
will append the text on a new line. If you want to insert the text before the match, use the command i
:
$ sed '/INSERT HERE/inew stuff' < input.txt
foo
bar
baz
new stuff
INSERT HERE
qux
quux
Why do you not try a two steps solution? At first, you read and fix the string, at the second step, you rewrite the file. Probably it’s not the most efficient algorithm, but I think it works.
with open(erroredFilepath, 'r') as erroredFile:
fileContents = erroredFile.read()
fileContents.replace('insert_here', 'insert_string')
with open(erroredFilePath, 'w') as fixingFile:
fixingFile.write(fileContents)
test.txt
a
b
c
d
e
1. Read into a list then overwrite
def match_then_insert(filename, match, content):
lines = open(filename).read().splitlines()
index = lines.index(match)
lines.insert(index, content)
open(filename, mode='w').write('n'.join(lines))
match_then_insert('test.txt', match='c', content='123')
Result
a
b
123
c
d
e
2. FileInput
from fileinput import FileInput
def match_then_insert(filename, match, content):
for line in FileInput(filename, inplace=True):
if match in line:
line = content + 'n' + line
print(line, end='') # Redirect to the original file
match_then_insert('test.txt', match='c', content='123')
3. seek
def match_then_insert(filename, match, content):
with open(filename, mode='rb+') as f:
while True:
try:
line = f.readline()
except IndexError:
break
line_str = line.decode().splitlines()[0]
if line_str == match:
f.seek(-len(line), 1)
rest = f.read()
f.seek(-len(rest), 1)
f.truncate()
content = content + 'n'
f.write(content.encode())
f.write(rest)
break
match_then_insert('test.txt', match='c', content='123')
Compare
Method
Time/s
Read into a list then overwrite
54.42
FileInput
121.59
seek
3.53
from timeit import timeit
from fileinput import FileInput
def init_txt():
open('test.txt', mode='w').write('n'.join(['a', 'b', 'c', 'd', 'e']))
def f1(filename='test.txt', match='c', content='123'):
lines = open(filename).read().splitlines()
index = lines.index(match)
lines.insert(index, content)
open(filename, mode='w').write('n'.join(lines))
def f2(filename='test.txt', match='c', content='123'):
for line in FileInput(filename, inplace=True):
if match in line:
line = content + 'n' + line
print(line, end='')
def f3(filename='test.txt', match='c', content='123'):
with open(filename, mode='rb+') as f:
while True:
try:
line = f.readline()
except IndexError:
break
line_str = line.decode().splitlines()[0]
if line_str == match:
f.seek(-len(line), 1)
rest = f.read()
f.seek(-len(rest), 1)
f.truncate()
content = content + 'n'
f.write(content.encode())
f.write(rest)
break
init_txt()
print(timeit(f1, number=1000))
init_txt()
print(timeit(f2, number=1000))
init_txt()
print(timeit(f3, number=1000))
I want to insert a line into the middle of a text file in Python, so I tried
with open(erroredFilepath, 'r+t') as erroredFile:
fileContents = erroredFile.read()
if 'insert_here' in fileContents:
insertString.join(fileContents.rsplit('insert_here'))
erroredFile.truncate()
erroredFile.write(insertString)
However, insertString
got written at the end of the file. Why?
As an aside, I tried to simply things, by just using strings, instead of files.
'123456789'.join('qwertyuiop'.split('y'))
gives
'qwert123456789uiop'
what happened to the ‘y’ ?
Although OS-level details of files vary, in general, when you have a file open in r+
mode and do some read or write operation, the “current position” is left after the last read or write.
When you did:
fileContents = erroredFile.read()
the stream erroredFile
was read to the end, so the current position is now “at the end”.
The truncate function defaults to using the current position as the size to which to truncate. Assume the file is 100 bytes long, so that the current position “at the end” is byte 100. Then:
erroredFile.truncate()
means “make the file 100 bytes long”—which it already is.
The current position remains at the end of the file, so the subsequent write
appends.
Presumably you wanted to seek back to the beginning of the file, and/or use truncate(0)
(note that just truncate(0)
will, at least on Unix-like systems, leave the seek position at the end of the file so that the next write
leaves a hole where the original data used to be). You could also be slightly more clever: if you’re inserting, just overwrite-and-extend in place (no truncate
is required at all).
(Joel Hinz already answered the second question, I see.)
If you want to write in the middle of the file use the fileinput
module.
import fileinput
for line in fileinput.input(erroredFilepath, inplace=True):
print("something", end ="")
from the docs
if the keyword argument
inplace=True
is passed tofileinput.input()
or to theFileInput
constructor, the file is moved to a backup file and standard output is directed to the input file (if a file of the same name as the backup file already exists, it will be replaced silently).
Whatever you print
will go in the file. So you have to read and print
every line and modify whichever you want to replace. Also, when print
ing existing lines, use end=""
as it will prevent print
from adding an extra newline.
Not a Python answer but it may widen your horizon. Use sed
:
$ cat input.txt
foo
bar
baz
INSERT HERE
qux
quux
$ sed '/INSERT HERE/anew stuff' < input.txt
foo
bar
baz
INSERT HERE
new stuff
qux
quux
The command a
will append the text on a new line. If you want to insert the text before the match, use the command i
:
$ sed '/INSERT HERE/inew stuff' < input.txt
foo
bar
baz
new stuff
INSERT HERE
qux
quux
Why do you not try a two steps solution? At first, you read and fix the string, at the second step, you rewrite the file. Probably it’s not the most efficient algorithm, but I think it works.
with open(erroredFilepath, 'r') as erroredFile:
fileContents = erroredFile.read()
fileContents.replace('insert_here', 'insert_string')
with open(erroredFilePath, 'w') as fixingFile:
fixingFile.write(fileContents)
test.txt
a
b
c
d
e
1. Read into a list then overwrite
def match_then_insert(filename, match, content):
lines = open(filename).read().splitlines()
index = lines.index(match)
lines.insert(index, content)
open(filename, mode='w').write('n'.join(lines))
match_then_insert('test.txt', match='c', content='123')
Result
a
b
123
c
d
e
2. FileInput
from fileinput import FileInput
def match_then_insert(filename, match, content):
for line in FileInput(filename, inplace=True):
if match in line:
line = content + 'n' + line
print(line, end='') # Redirect to the original file
match_then_insert('test.txt', match='c', content='123')
3. seek
def match_then_insert(filename, match, content):
with open(filename, mode='rb+') as f:
while True:
try:
line = f.readline()
except IndexError:
break
line_str = line.decode().splitlines()[0]
if line_str == match:
f.seek(-len(line), 1)
rest = f.read()
f.seek(-len(rest), 1)
f.truncate()
content = content + 'n'
f.write(content.encode())
f.write(rest)
break
match_then_insert('test.txt', match='c', content='123')
Compare
Method | Time/s |
---|---|
Read into a list then overwrite | 54.42 |
FileInput | 121.59 |
seek | 3.53 |
from timeit import timeit
from fileinput import FileInput
def init_txt():
open('test.txt', mode='w').write('n'.join(['a', 'b', 'c', 'd', 'e']))
def f1(filename='test.txt', match='c', content='123'):
lines = open(filename).read().splitlines()
index = lines.index(match)
lines.insert(index, content)
open(filename, mode='w').write('n'.join(lines))
def f2(filename='test.txt', match='c', content='123'):
for line in FileInput(filename, inplace=True):
if match in line:
line = content + 'n' + line
print(line, end='')
def f3(filename='test.txt', match='c', content='123'):
with open(filename, mode='rb+') as f:
while True:
try:
line = f.readline()
except IndexError:
break
line_str = line.decode().splitlines()[0]
if line_str == match:
f.seek(-len(line), 1)
rest = f.read()
f.seek(-len(rest), 1)
f.truncate()
content = content + 'n'
f.write(content.encode())
f.write(rest)
break
init_txt()
print(timeit(f1, number=1000))
init_txt()
print(timeit(f2, number=1000))
init_txt()
print(timeit(f3, number=1000))