iterate through specific files zipped in a folder and move them based on text/string found in Python
Question:
I have multiple zipped files that I need to identify a string within the specific .html. All of the .html that I need to read end with the last 7 characters of ‘bb.html’.
My goal is to move the whole .zip file if the html within contains the string/word.
I have this code written which works on the file that is listed but I need to iterate through thousands of zipped files. It doesn’t have to be written as a function.
import os
import zipfile
def check_files():
os.listdir(source_folder)
zip = zipfile.ZipFile(source_file3)
file = zip.read("bb.html")
if b'word' in file:
shutil.copy(source_file3, source_folder2)
print('word found-file moved')
else:
print('word not found')
most of the help I find iterates over the files inside, I need to iterate over ALL the .zip files and read into each bb.html file only.
I am new to Python so I have that as a challenge as well.
Thanks in advance.
Thanks so much for the answers!!!!
FINAL CODE:
source_file3 = ('C:/Users/SMITH/Desktop/zipped/Message/testzip.zip')
source_folder3 = (r'J:/server/zippedMessages')
dest_folder = ('L:/_Mine/Zipped Messages Moved')
def check_files():
os.listdir(source_folder3)
zip = zipfile.ZipFile(source_file3)
file = zip.read("bb.html")
if b'Health in file:
shutil.copy(source_file3, dest_folder)
print('word found-file moved')
else:
print('word not found')
folderdir = source_folder3
for filename in os.listdir(folderdir):
if filename.endswith(".zip"):
source_file3 = os.path.join(folderdir, filename)
zip = zipfile.ZipFile(source_file3)
check_files()
Answers:
There should be many examples how to iterate files in folder (not in ZIP file)
You should use for
-loop with os.listdir()
or glob.glob()
for filename in os.listdir(source_folder):
if filename.endswith(".zip"):
source_file3 = os.path.join(source_folder, filename)
zip = zipfile.ZipFile(source_file3)
# ... code ...
for source_file3 in glob.glob(f'{source_folder}/*.zip'):
zip = zipfile.ZipFile(source_file3)
# ... code ...
EDIT:
If you need to iterate files inside ZIP then use ZipFile.namelist() or ZipFile.infolist()
zip = zipfile.ZipFile(source_file3)
for inner_filename in zip.namelist():
file = zip.read(inner_filename)
# ... code ...
or
zip = zipfile.ZipFile(source_file3)
for inner_fileobject in zip.infolist():
file = zip.read(inner_fileobject)
# ... code ...
I have multiple zipped files that I need to identify a string within the specific .html. All of the .html that I need to read end with the last 7 characters of ‘bb.html’.
My goal is to move the whole .zip file if the html within contains the string/word.
I have this code written which works on the file that is listed but I need to iterate through thousands of zipped files. It doesn’t have to be written as a function.
import os
import zipfile
def check_files():
os.listdir(source_folder)
zip = zipfile.ZipFile(source_file3)
file = zip.read("bb.html")
if b'word' in file:
shutil.copy(source_file3, source_folder2)
print('word found-file moved')
else:
print('word not found')
most of the help I find iterates over the files inside, I need to iterate over ALL the .zip files and read into each bb.html file only.
I am new to Python so I have that as a challenge as well.
Thanks in advance.
Thanks so much for the answers!!!!
FINAL CODE:
source_file3 = ('C:/Users/SMITH/Desktop/zipped/Message/testzip.zip')
source_folder3 = (r'J:/server/zippedMessages')
dest_folder = ('L:/_Mine/Zipped Messages Moved')
def check_files():
os.listdir(source_folder3)
zip = zipfile.ZipFile(source_file3)
file = zip.read("bb.html")
if b'Health in file:
shutil.copy(source_file3, dest_folder)
print('word found-file moved')
else:
print('word not found')
folderdir = source_folder3
for filename in os.listdir(folderdir):
if filename.endswith(".zip"):
source_file3 = os.path.join(folderdir, filename)
zip = zipfile.ZipFile(source_file3)
check_files()
There should be many examples how to iterate files in folder (not in ZIP file)
You should use for
-loop with os.listdir()
or glob.glob()
for filename in os.listdir(source_folder):
if filename.endswith(".zip"):
source_file3 = os.path.join(source_folder, filename)
zip = zipfile.ZipFile(source_file3)
# ... code ...
for source_file3 in glob.glob(f'{source_folder}/*.zip'):
zip = zipfile.ZipFile(source_file3)
# ... code ...
EDIT:
If you need to iterate files inside ZIP then use ZipFile.namelist() or ZipFile.infolist()
zip = zipfile.ZipFile(source_file3)
for inner_filename in zip.namelist():
file = zip.read(inner_filename)
# ... code ...
or
zip = zipfile.ZipFile(source_file3)
for inner_fileobject in zip.infolist():
file = zip.read(inner_fileobject)
# ... code ...