Ignore words with punctuation when replacing them in text file using dictionary
Question:
I have the a text file with multiple lines. I need to use a dictionary to replace words in that text file with new ones.
I need to exclude words with punctuation on them (e.g. cat! dance?).
I need to save the results in a new text file and also keep the formatting of the original one.
Here is what I have been trying to do so far in Python.
The result I want is a new text file with the following three lines:
I love my cat!
I love to drink water
Do I really want to dance?
#--create my custom dictionary
mydict = {
"cat": "dog",
"milk": "water", #--mistakenly had these switched in original post
"dance": "sit"
}
#--import list of punctuations
from string import punctuation
#--create sample data
f=open("mytextfile.txt", "w+")
f.write("I love my cat!" + "n" + "I love to drink milk" + "n" + "Do I really want to dance?" + "n")
f.close()
#--read in sample data
i = open("mytextfile.txt", "r")
txtfile = i.read()
i.close
#print(txtfile)
#--create an empty list for new textfile
new_txtfile = []
#--for loop through each sentence
for line in txtfile:
words = line.split()
for word in words:
new_word = []
if word in punctuation:
new_word = word #--do not replace word if it is punctuated
else:
new_word = mydict.get(word) #--replace word using dictionary
new_line= 'n'.join(new_word)
new_textfile.append(new_line)
print(new_txtfile)
#--save the new textfile
#--make sure the new lines are preserved
with open("my_new_file.txt", 'w') as file:
for row in new_textfile:
s = " ".join(map(str, row))
file.write(s+'n')
Answers:
You can just remove extra characters (i.e. n
) and check if there is a match between the word in the dict and if so then map it across. If there is punctation then the word will not match – so we do not need a separate rule for this. "cat" != "dog"
but also "cat!" != "cat
.
#--create my custom dictionary
my_dict = {
"cat": "dog",
"water": "milk",
"dance": "sit"
}
#--import list of punctuations
from string import punctuation
#--create sample data
with open("mytextfile.txt", "w+") as f:
f.write("I love my cat!" + "n" + "I love to drink milk" + "n" +
"Do I really want to dance?" + "n")
#--read in sample data
with open("mytextfile.txt", "r") as i:
txtfile = i.readlines()
#print(txtfile)
#--create an empty list for new textfile
new_txtfile = []
#--for loop through each sentence
for line in txtfile:
print(line)
words = line.replace("n", "").split(" ")
print(words)
for index, word in enumerate(words):
new_word = []
if word in my_dict.keys():
print(f'replacing {word} with {my_dict[word]}')
words[index] = my_dict[word]
elif word in my_dict.values():
for key, value in my_dict.items():
if word == value:
words[index] = key
words[-1] = words[-1] + "n"
new_txtfile.append(" ".join(words))
#--save the new textfile
#--make sure the new lines are preserved
with open("my_new_file.txt", 'w') as file:
file.writelines(new_txtfile)
You forgot brackets in your close method. Should be i.close()
insted i.close
. The best practise is using a context manager instead of open/close methods. Read more.
Also you try append to non existing list new_textfile
instead new_txtfile
.
Bellow you will find solution with comments:
from string import punctuation # [1]
mydict = {
"cat": "dog",
"milk": "water", # [2]
"dance": "sit"
}
with open("mytextfile.txt", "w+") as f: # [3]
f.write("I love my cat!" + "n" + "I love to drink milk" + "n" + "Do I really want to dance?" + "n")
new_txtfile = []
with open("mytextfile.txt", "r") as file:
for line in file:
words = line.replace("n", "").split()
for index, word in enumerate(words):
if mydict.get(word) and word[-1] not in punctuation: # [4]]
words[index] = mydict.get(word)
new_txtfile.append(" ".join(words))
with open("my_new_file.txt", 'w') as file:
file.write("n".join(new_txtfile))
- All import should be on the top of file. Read more.
- I replaced value and key here. It has more sense in this case.
- Replaced open/close by context manager to avoid errors (like missing brackets 😉 )
- I checked if the word is in the dict. If yes, and the last character is not punctationa I finding index for this word and in
words
list i replace word with this index using result from the dict
the final file otput:
I love my cat!
I love to drink water
Do I really want to dance?
You don’t really need to consider punctuation because, for example, ‘cat!’ is not equal to ‘cat’. So, all you need is this:
INFILE = "mytextfile.txt"
OUTFILE = "my_new_file.txt"
mydict = {
"cat": "dog",
"water": "milk",
"dance": "sit"
}
with open(INFILE, "w") as txt:
print("I love my cat!", file=txt)
print("I love to drink milk", file=txt)
print("Do I really want to dance?", file=txt)
with open(INFILE) as txt, open(OUTFILE, 'w') as newtext:
for line in map(str.strip, txt):
words = line.split()
for i, word in enumerate(words):
if (replacement := mydict.get(word)) is not None:
words[i] = replacement
print(' '.join(words), file=newtext)
Output (file content):
I love my cat!
I love to drink milk
Do I really want to dance?
Note:
Of course, given the dictionary as shown in the question, this won’t modify the file because none of the keys (words) will match
I have the a text file with multiple lines. I need to use a dictionary to replace words in that text file with new ones.
I need to exclude words with punctuation on them (e.g. cat! dance?).
I need to save the results in a new text file and also keep the formatting of the original one.
Here is what I have been trying to do so far in Python.
The result I want is a new text file with the following three lines:
I love my cat!
I love to drink water
Do I really want to dance?
#--create my custom dictionary
mydict = {
"cat": "dog",
"milk": "water", #--mistakenly had these switched in original post
"dance": "sit"
}
#--import list of punctuations
from string import punctuation
#--create sample data
f=open("mytextfile.txt", "w+")
f.write("I love my cat!" + "n" + "I love to drink milk" + "n" + "Do I really want to dance?" + "n")
f.close()
#--read in sample data
i = open("mytextfile.txt", "r")
txtfile = i.read()
i.close
#print(txtfile)
#--create an empty list for new textfile
new_txtfile = []
#--for loop through each sentence
for line in txtfile:
words = line.split()
for word in words:
new_word = []
if word in punctuation:
new_word = word #--do not replace word if it is punctuated
else:
new_word = mydict.get(word) #--replace word using dictionary
new_line= 'n'.join(new_word)
new_textfile.append(new_line)
print(new_txtfile)
#--save the new textfile
#--make sure the new lines are preserved
with open("my_new_file.txt", 'w') as file:
for row in new_textfile:
s = " ".join(map(str, row))
file.write(s+'n')
You can just remove extra characters (i.e. n
) and check if there is a match between the word in the dict and if so then map it across. If there is punctation then the word will not match – so we do not need a separate rule for this. "cat" != "dog"
but also "cat!" != "cat
.
#--create my custom dictionary
my_dict = {
"cat": "dog",
"water": "milk",
"dance": "sit"
}
#--import list of punctuations
from string import punctuation
#--create sample data
with open("mytextfile.txt", "w+") as f:
f.write("I love my cat!" + "n" + "I love to drink milk" + "n" +
"Do I really want to dance?" + "n")
#--read in sample data
with open("mytextfile.txt", "r") as i:
txtfile = i.readlines()
#print(txtfile)
#--create an empty list for new textfile
new_txtfile = []
#--for loop through each sentence
for line in txtfile:
print(line)
words = line.replace("n", "").split(" ")
print(words)
for index, word in enumerate(words):
new_word = []
if word in my_dict.keys():
print(f'replacing {word} with {my_dict[word]}')
words[index] = my_dict[word]
elif word in my_dict.values():
for key, value in my_dict.items():
if word == value:
words[index] = key
words[-1] = words[-1] + "n"
new_txtfile.append(" ".join(words))
#--save the new textfile
#--make sure the new lines are preserved
with open("my_new_file.txt", 'w') as file:
file.writelines(new_txtfile)
You forgot brackets in your close method. Should be i.close()
insted i.close
. The best practise is using a context manager instead of open/close methods. Read more.
Also you try append to non existing list new_textfile
instead new_txtfile
.
Bellow you will find solution with comments:
from string import punctuation # [1]
mydict = {
"cat": "dog",
"milk": "water", # [2]
"dance": "sit"
}
with open("mytextfile.txt", "w+") as f: # [3]
f.write("I love my cat!" + "n" + "I love to drink milk" + "n" + "Do I really want to dance?" + "n")
new_txtfile = []
with open("mytextfile.txt", "r") as file:
for line in file:
words = line.replace("n", "").split()
for index, word in enumerate(words):
if mydict.get(word) and word[-1] not in punctuation: # [4]]
words[index] = mydict.get(word)
new_txtfile.append(" ".join(words))
with open("my_new_file.txt", 'w') as file:
file.write("n".join(new_txtfile))
- All import should be on the top of file. Read more.
- I replaced value and key here. It has more sense in this case.
- Replaced open/close by context manager to avoid errors (like missing brackets 😉 )
- I checked if the word is in the dict. If yes, and the last character is not punctationa I finding index for this word and in
words
list i replace word with this index using result from the dict
the final file otput:
I love my cat!
I love to drink water
Do I really want to dance?
You don’t really need to consider punctuation because, for example, ‘cat!’ is not equal to ‘cat’. So, all you need is this:
INFILE = "mytextfile.txt"
OUTFILE = "my_new_file.txt"
mydict = {
"cat": "dog",
"water": "milk",
"dance": "sit"
}
with open(INFILE, "w") as txt:
print("I love my cat!", file=txt)
print("I love to drink milk", file=txt)
print("Do I really want to dance?", file=txt)
with open(INFILE) as txt, open(OUTFILE, 'w') as newtext:
for line in map(str.strip, txt):
words = line.split()
for i, word in enumerate(words):
if (replacement := mydict.get(word)) is not None:
words[i] = replacement
print(' '.join(words), file=newtext)
Output (file content):
I love my cat!
I love to drink milk
Do I really want to dance?
Note:
Of course, given the dictionary as shown in the question, this won’t modify the file because none of the keys (words) will match