A script that searches through the text of all sub-directories' files for a string then prints to a created file

Question:

I’m a scripting noob and I’m stuck on this problem.

I would like the code to do a few things:

  1. Ask for user input for a string to search for.
  2. Iterate through the sub-directories from a given file path.
  3. Open files that have one of the extension types listed.
  4. Open the file and search for the user inputted string.
  5. Print the results from the query to a text file.

The code seems to take some time to run but turns up nothing.

import os.path

# Ask the user to enter string to search
search_str = input("Keyword or phrase:n")

# Store file names for later printing 
file_names = []

# Path to search 
path = os.path.dirname(os.path.realpath(__file__))

# Acceptable file extensions
extensions = {".xlsx", ".txt", ".pdf", ".doc", ".docx", ".mb", ".xlsm", ".xltx", ".xltm"}

# Create file to store search results
search_files = open('search.txt', 'w')
search_files.write(f'I searched for "{search_str}" in your files.nnHere is what I found:nn')


# Program to search files for keyword
def search_all_files_by_keyword(path):

    # Store file count number
    file_count = 0

    for root, dirs, files in os.walk(path):

        for file in files:

            try:

                # Apply file type filter, search for acceptable ext in extension
                ext = os.path.splitext(file)
                if ext in extensions:

                    # Define file pathway
                    file_path = os.path.join(root, file)

                    # Open file for reading
                    with open(file, 'r') as f:

                        # Read file and search for keyword or phrase
                        if search_str in f.read():

                            # Add file path to file_names and increase file_count, then close file
                            file_names.append(file_path)
                            file_count += 1
                            f.close()

                        # If keyword or phrase is not found, do nothing and close file
                        else:
                            f.close()

            except:
                pass

    # Print search results to file
    if file_count >= 1:
        search_files.write(f"{file_names}n")
    else:
        search_files.write(f'No results found for "{search_str}".')

# Run program 
search_all_files_by_keyword(path)
Asked By: sssucksatCoding

||

Answers:

Try the path module to search all folder/sub-folders this way:


import re
from pathlib import Path

# Ask the user to enter string to search
search_str = input("Keyword or phrase:n")

# Store file names for later printing
file_names = []

# Path to search
path = Path("path/to/directory") # Replace with your actual file path

# Acceptable file extensions
extensions = {".xlsx", ".txt", ".pdf", ".doc", ".docx", ".mb"]

# to search for string in a file
def search_in_file(file_path, search_str):
    with open(file_path, 'r', encoding='utf-8') as f:
        file_content = f.read()
        matches = re.findall(search_str, file_content)
        if matches:
            return matches
        else:
            return None

# Iterate through the sub-directories 
for file_path in path.glob("**/*"):
    if file_path.suffix in extensions:
        matches = search_in_file(file_path, search_str)
        
        if matches:
            file_names.append(str(file_path))
            
            with open("results.txt", "a", encoding='utf-8') as results_file:
                results_file.write(f"{file_path}n")
                results_file.write(f"{matches}nn")
Answered By: Daniel Hao
Categories: questions Tags: ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.