How to search for a string in text files?
Question:
I want to check if a string is in a text file. If it is, do X. If it’s not, do Y. However, this code always returns True
for some reason. Can anyone see what is wrong?
def check():
datafile = file('example.txt')
found = False
for line in datafile:
if blabla in line:
found = True
break
check()
if True:
print "true"
else:
print "false"
Answers:
if True:
print "true"
This always happens because True is always True.
You want something like this:
if check():
print "true"
else:
print "false"
Good luck!
As Jeffrey Said, you are not checking the value of check()
. In addition, your check()
function is not returning anything. Note the difference:
def check():
with open('example.txt') as f:
datafile = f.readlines()
found = False # This isn't really necessary
for line in datafile:
if blabla in line:
# found = True # Not necessary
return True
return False # Because you finished the search without finding
Then you can test the output of check()
:
if check():
print('True')
else:
print('False')
Your check
function should return the found
boolean and use that to determine what to print.
def check():
datafile = file('example.txt')
found = False
for line in datafile:
if blabla in line:
found = True
break
return found
found = check()
if found:
print "true"
else:
print "false"
the second block could also be condensed to:
if check():
print "true"
else:
print "false"
found = False
def check():
datafile = file('example.txt')
for line in datafile:
if blabla in line:
found = True
break
return found
if check():
print "true"
else:
print "false"
Two problems:
-
Your function does not return anything; a function that does not explicitly return anything returns None (which is falsy)
-
True is always True – you are not checking the result of your function
.
def check(fname, txt):
with open(fname) as dataf:
return any(txt in line for line in dataf)
if check('example.txt', 'blabla'):
print "true"
else:
print "false"
The reason why you always got True
has already been given, so I’ll just offer another suggestion:
If your file is not too large, you can read it into a string, and just use that (easier and often faster than reading and checking line per line):
with open('example.txt') as f:
if 'blabla' in f.read():
print("true")
Another trick: you can alleviate the possible memory problems by using mmap.mmap()
to create a “string-like” object that uses the underlying file (instead of reading the whole file in memory):
import mmap
with open('example.txt') as f:
s = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
if s.find('blabla') != -1:
print('true')
NOTE: in python 3, mmaps behave like bytearray
objects rather than strings, so the subsequence you look for with find()
has to be a bytes
object rather than a string as well, eg. s.find(b'blabla')
:
#!/usr/bin/env python3
import mmap
with open('example.txt', 'rb', 0) as file,
mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) as s:
if s.find(b'blabla') != -1:
print('true')
You could also use regular expressions on mmap
e.g., case-insensitive search: if re.search(br'(?i)blabla', s):
Here’s another way to possibly answer your question using the find function which gives you a literal numerical value of where something truly is
open('file', 'r').read().find('')
in find write the word you want to find
and 'file'
stands for your file name
How to search the text in the file and Returns an file path in which the word is found
(Как искать часть текста в файле и возвращять путь к файлу в котором это слово найдено)
import os
import re
class Searcher:
def __init__(self, path, query):
self.path = path
if self.path[-1] != '/':
self.path += '/'
self.path = self.path.replace('/', '\')
self.query = query
self.searched = {}
def find(self):
for root, dirs, files in os.walk( self.path ):
for file in files:
if re.match(r'.*?.txt$', file) is not None:
if root[-1] != '\':
root += '\'
f = open(root + file, 'rt')
txt = f.read()
f.close()
count = len( re.findall( self.query, txt ) )
if count > 0:
self.searched[root + file] = count
def getResults(self):
return self.searched
In Main()
# -*- coding: UTF-8 -*-
import sys
from search import Searcher
path = 'c:\temp\'
search = 'search string'
if __name__ == '__main__':
if len(sys.argv) == 3:
# создаем объект поисковика и передаем ему аргументы
Search = Searcher(sys.argv[1], sys.argv[2])
else:
Search = Searcher(path, search)
# начать поиск
Search.find()
# получаем результат
results = Search.getResults()
# выводим результат
print 'Found ', len(results), ' files:'
for file, count in results.items():
print 'File: ', file, ' Found entries:' , count
I made a little function for this purpose. It searches for a word in the input file and then adds it to the output file.
def searcher(outf, inf, string):
with open(outf, 'a') as f1:
if string in open(inf).read():
f1.write(string)
- outf is the output file
- inf is the input file
- string is of course, the desired string that you wish to find and add to outf.
If user wants to search for the word in given text file.
fopen = open('logfile.txt',mode='r+')
fread = fopen.readlines()
x = input("Enter the search string: ")
for line in fread:
if x in line:
print(line)
found = False
def check():
datafile = file('example.txt')
for line in datafile:
if "blabla" in line:
found = True
break
return found
if check():
print "found"
else:
print "not found"
Here’s another. Takes an absolute file path and a given string and passes it to word_find(), uses readlines() method on the given file within the enumerate() method which gives an iterable count as it traverses line by line, in the end giving you the line with the matching string, plus the given line number. Cheers.
def word_find(file, word):
with open(file, 'r') as target_file:
for num, line in enumerate(target_file.readlines(), 1):
if str(word) in line:
print(f'<Line {num}> {line}')
else:
print(f'> {word} not found.')
if __name__ == '__main__':
file_to_process = '/path/to/file'
string_to_find = input()
word_find(file_to_process, string_to_find)
"found" needs to be created as global variable in the function as "if else" statement is out of the function. You also don’t need to use "break" to break the loop code.
The following should work to find out if the text file has desired string.
with open('text_text.txt') as f:
datafile = f.readlines()
def check():
global found
found = False
for line in datafile:
if 'the' in line:
found = True
check()
if found == True:
print("True")
else:
print("False")
I want to check if a string is in a text file. If it is, do X. If it’s not, do Y. However, this code always returns True
for some reason. Can anyone see what is wrong?
def check():
datafile = file('example.txt')
found = False
for line in datafile:
if blabla in line:
found = True
break
check()
if True:
print "true"
else:
print "false"
if True:
print "true"
This always happens because True is always True.
You want something like this:
if check():
print "true"
else:
print "false"
Good luck!
As Jeffrey Said, you are not checking the value of check()
. In addition, your check()
function is not returning anything. Note the difference:
def check():
with open('example.txt') as f:
datafile = f.readlines()
found = False # This isn't really necessary
for line in datafile:
if blabla in line:
# found = True # Not necessary
return True
return False # Because you finished the search without finding
Then you can test the output of check()
:
if check():
print('True')
else:
print('False')
Your check
function should return the found
boolean and use that to determine what to print.
def check():
datafile = file('example.txt')
found = False
for line in datafile:
if blabla in line:
found = True
break
return found
found = check()
if found:
print "true"
else:
print "false"
the second block could also be condensed to:
if check():
print "true"
else:
print "false"
found = False
def check():
datafile = file('example.txt')
for line in datafile:
if blabla in line:
found = True
break
return found
if check():
print "true"
else:
print "false"
Two problems:
-
Your function does not return anything; a function that does not explicitly return anything returns None (which is falsy)
-
True is always True – you are not checking the result of your function
.
def check(fname, txt):
with open(fname) as dataf:
return any(txt in line for line in dataf)
if check('example.txt', 'blabla'):
print "true"
else:
print "false"
The reason why you always got True
has already been given, so I’ll just offer another suggestion:
If your file is not too large, you can read it into a string, and just use that (easier and often faster than reading and checking line per line):
with open('example.txt') as f:
if 'blabla' in f.read():
print("true")
Another trick: you can alleviate the possible memory problems by using mmap.mmap()
to create a “string-like” object that uses the underlying file (instead of reading the whole file in memory):
import mmap
with open('example.txt') as f:
s = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
if s.find('blabla') != -1:
print('true')
NOTE: in python 3, mmaps behave like bytearray
objects rather than strings, so the subsequence you look for with find()
has to be a bytes
object rather than a string as well, eg. s.find(b'blabla')
:
#!/usr/bin/env python3
import mmap
with open('example.txt', 'rb', 0) as file,
mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) as s:
if s.find(b'blabla') != -1:
print('true')
You could also use regular expressions on mmap
e.g., case-insensitive search: if re.search(br'(?i)blabla', s):
Here’s another way to possibly answer your question using the find function which gives you a literal numerical value of where something truly is
open('file', 'r').read().find('')
in find write the word you want to find
and 'file'
stands for your file name
How to search the text in the file and Returns an file path in which the word is found
(Как искать часть текста в файле и возвращять путь к файлу в котором это слово найдено)
import os
import re
class Searcher:
def __init__(self, path, query):
self.path = path
if self.path[-1] != '/':
self.path += '/'
self.path = self.path.replace('/', '\')
self.query = query
self.searched = {}
def find(self):
for root, dirs, files in os.walk( self.path ):
for file in files:
if re.match(r'.*?.txt$', file) is not None:
if root[-1] != '\':
root += '\'
f = open(root + file, 'rt')
txt = f.read()
f.close()
count = len( re.findall( self.query, txt ) )
if count > 0:
self.searched[root + file] = count
def getResults(self):
return self.searched
In Main()
# -*- coding: UTF-8 -*-
import sys
from search import Searcher
path = 'c:\temp\'
search = 'search string'
if __name__ == '__main__':
if len(sys.argv) == 3:
# создаем объект поисковика и передаем ему аргументы
Search = Searcher(sys.argv[1], sys.argv[2])
else:
Search = Searcher(path, search)
# начать поиск
Search.find()
# получаем результат
results = Search.getResults()
# выводим результат
print 'Found ', len(results), ' files:'
for file, count in results.items():
print 'File: ', file, ' Found entries:' , count
I made a little function for this purpose. It searches for a word in the input file and then adds it to the output file.
def searcher(outf, inf, string):
with open(outf, 'a') as f1:
if string in open(inf).read():
f1.write(string)
- outf is the output file
- inf is the input file
- string is of course, the desired string that you wish to find and add to outf.
If user wants to search for the word in given text file.
fopen = open('logfile.txt',mode='r+')
fread = fopen.readlines()
x = input("Enter the search string: ")
for line in fread:
if x in line:
print(line)
found = False
def check():
datafile = file('example.txt')
for line in datafile:
if "blabla" in line:
found = True
break
return found
if check():
print "found"
else:
print "not found"
Here’s another. Takes an absolute file path and a given string and passes it to word_find(), uses readlines() method on the given file within the enumerate() method which gives an iterable count as it traverses line by line, in the end giving you the line with the matching string, plus the given line number. Cheers.
def word_find(file, word):
with open(file, 'r') as target_file:
for num, line in enumerate(target_file.readlines(), 1):
if str(word) in line:
print(f'<Line {num}> {line}')
else:
print(f'> {word} not found.')
if __name__ == '__main__':
file_to_process = '/path/to/file'
string_to_find = input()
word_find(file_to_process, string_to_find)
"found" needs to be created as global variable in the function as "if else" statement is out of the function. You also don’t need to use "break" to break the loop code.
The following should work to find out if the text file has desired string.
with open('text_text.txt') as f:
datafile = f.readlines()
def check():
global found
found = False
for line in datafile:
if 'the' in line:
found = True
check()
if found == True:
print("True")
else:
print("False")