A script that searches through the text of all sub-directories' files for a string then prints to a created file
Question:
I’m a scripting noob and I’m stuck on this problem.
I would like the code to do a few things:
- Ask for user input for a string to search for.
- Iterate through the sub-directories from a given file path.
- Open files that have one of the extension types listed.
- Open the file and search for the user inputted string.
- Print the results from the query to a text file.
The code seems to take some time to run but turns up nothing.
import os.path
# Ask the user to enter string to search
search_str = input("Keyword or phrase:n")
# Store file names for later printing
file_names = []
# Path to search
path = os.path.dirname(os.path.realpath(__file__))
# Acceptable file extensions
extensions = {".xlsx", ".txt", ".pdf", ".doc", ".docx", ".mb", ".xlsm", ".xltx", ".xltm"}
# Create file to store search results
search_files = open('search.txt', 'w')
search_files.write(f'I searched for "{search_str}" in your files.nnHere is what I found:nn')
# Program to search files for keyword
def search_all_files_by_keyword(path):
# Store file count number
file_count = 0
for root, dirs, files in os.walk(path):
for file in files:
try:
# Apply file type filter, search for acceptable ext in extension
ext = os.path.splitext(file)
if ext in extensions:
# Define file pathway
file_path = os.path.join(root, file)
# Open file for reading
with open(file, 'r') as f:
# Read file and search for keyword or phrase
if search_str in f.read():
# Add file path to file_names and increase file_count, then close file
file_names.append(file_path)
file_count += 1
f.close()
# If keyword or phrase is not found, do nothing and close file
else:
f.close()
except:
pass
# Print search results to file
if file_count >= 1:
search_files.write(f"{file_names}n")
else:
search_files.write(f'No results found for "{search_str}".')
# Run program
search_all_files_by_keyword(path)
Answers:
Try the path module to search all folder/sub-folders this way:
import re
from pathlib import Path
# Ask the user to enter string to search
search_str = input("Keyword or phrase:n")
# Store file names for later printing
file_names = []
# Path to search
path = Path("path/to/directory") # Replace with your actual file path
# Acceptable file extensions
extensions = {".xlsx", ".txt", ".pdf", ".doc", ".docx", ".mb"]
# to search for string in a file
def search_in_file(file_path, search_str):
with open(file_path, 'r', encoding='utf-8') as f:
file_content = f.read()
matches = re.findall(search_str, file_content)
if matches:
return matches
else:
return None
# Iterate through the sub-directories
for file_path in path.glob("**/*"):
if file_path.suffix in extensions:
matches = search_in_file(file_path, search_str)
if matches:
file_names.append(str(file_path))
with open("results.txt", "a", encoding='utf-8') as results_file:
results_file.write(f"{file_path}n")
results_file.write(f"{matches}nn")
I’m a scripting noob and I’m stuck on this problem.
I would like the code to do a few things:
- Ask for user input for a string to search for.
- Iterate through the sub-directories from a given file path.
- Open files that have one of the extension types listed.
- Open the file and search for the user inputted string.
- Print the results from the query to a text file.
The code seems to take some time to run but turns up nothing.
import os.path
# Ask the user to enter string to search
search_str = input("Keyword or phrase:n")
# Store file names for later printing
file_names = []
# Path to search
path = os.path.dirname(os.path.realpath(__file__))
# Acceptable file extensions
extensions = {".xlsx", ".txt", ".pdf", ".doc", ".docx", ".mb", ".xlsm", ".xltx", ".xltm"}
# Create file to store search results
search_files = open('search.txt', 'w')
search_files.write(f'I searched for "{search_str}" in your files.nnHere is what I found:nn')
# Program to search files for keyword
def search_all_files_by_keyword(path):
# Store file count number
file_count = 0
for root, dirs, files in os.walk(path):
for file in files:
try:
# Apply file type filter, search for acceptable ext in extension
ext = os.path.splitext(file)
if ext in extensions:
# Define file pathway
file_path = os.path.join(root, file)
# Open file for reading
with open(file, 'r') as f:
# Read file and search for keyword or phrase
if search_str in f.read():
# Add file path to file_names and increase file_count, then close file
file_names.append(file_path)
file_count += 1
f.close()
# If keyword or phrase is not found, do nothing and close file
else:
f.close()
except:
pass
# Print search results to file
if file_count >= 1:
search_files.write(f"{file_names}n")
else:
search_files.write(f'No results found for "{search_str}".')
# Run program
search_all_files_by_keyword(path)
Try the path module to search all folder/sub-folders this way:
import re
from pathlib import Path
# Ask the user to enter string to search
search_str = input("Keyword or phrase:n")
# Store file names for later printing
file_names = []
# Path to search
path = Path("path/to/directory") # Replace with your actual file path
# Acceptable file extensions
extensions = {".xlsx", ".txt", ".pdf", ".doc", ".docx", ".mb"]
# to search for string in a file
def search_in_file(file_path, search_str):
with open(file_path, 'r', encoding='utf-8') as f:
file_content = f.read()
matches = re.findall(search_str, file_content)
if matches:
return matches
else:
return None
# Iterate through the sub-directories
for file_path in path.glob("**/*"):
if file_path.suffix in extensions:
matches = search_in_file(file_path, search_str)
if matches:
file_names.append(str(file_path))
with open("results.txt", "a", encoding='utf-8') as results_file:
results_file.write(f"{file_path}n")
results_file.write(f"{matches}nn")