Use fnmatch.filter to filter files by more than one possible file extension
Question:
Given the following piece of python code:
for root, dirs, files in os.walk(directory):
for filename in fnmatch.filter(files, '*.png'):
pass
How can I filter for more than one extension? In this special case I want to get all files ending with *.png, *.gif, *.jpg or *.jpeg.
For now I came up with
for root, dirs, files in os.walk(directory):
for extension in ['jpg', 'jpeg', 'gif', 'png']:
for filename in fnmatch.filter(files, '*.' + extension):
pass
But I think it is not very elegant and performant.
Someone has a better idea?
Answers:
This isn’t really elegant either, but it works:
for root, dirs, files in os.walk(directory):
for filename in fnmatch.filter(files, '*.png') + fnmatch.filter(files, '*.jpg') + fnmatch.filter(files, '*.jpeg') + fnmatch.filter(files, '*.gif'):
pass
This would be a better way, perhaps because you are not calling +
repeatedly and using a tuple
instead of list
.
for root, dirs, files in os.walk(directory):
for extension in ('*.jpg', '*.jpeg', '*.gif', '*.png'):
for filename in fnmatch.filter(files, extension):
pass
A tuple
is better because you are not going to modify the extension once you have created them. You are just using to iterate over them.
I think your code is actually fine. If you want to touch every filename only once, define your own filtering function:
def is_image_file(filename, extensions=['.jpg', '.jpeg', '.gif', '.png']):
return any(filename.endswith(e) for e in extensions)
for root, dirs, files in os.walk(directory):
for filename in filter(is_image_file, files):
pass
If you only need to check extensions (i.e. no further wildcards), why don’t you simply use basic string operations?
for root, dirs, files in os.walk(directory):
for filename in files:
if filename.endswith(('.jpg', '.jpeg', '.gif', '.png')):
pass
I’ve been using this with a lot of success.
import fnmatch
import functools
import itertools
import os
# Remove the annotations if you're not on Python3
def find_files(dir_path: str=None, patterns: [str]=None) -> [str]:
"""
Returns a generator yielding files matching the given patterns
:type dir_path: str
:type patterns: [str]
:rtype : [str]
:param dir_path: Directory to search for files/directories under. Defaults to current dir.
:param patterns: Patterns of files to search for. Defaults to ["*"]. Example: ["*.json", "*.xml"]
"""
path = dir_path or "."
path_patterns = patterns or ["*"]
for root_dir, dir_names, file_names in os.walk(path):
filter_partial = functools.partial(fnmatch.filter, file_names)
for file_name in itertools.chain(*map(filter_partial, path_patterns)):
yield os.path.join(root_dir, file_name)
Examples:
for f in find_files(test_directory):
print(f)
yields:
.test.json
.test.xml
.test.ini
.test_helpers.py
.__init__.py
Testing with multiple patterns:
for f in find_files(test_directory, ["*.xml", "*.json", "*.ini"]):
print(f)
yields:
.test.json
.test.xml
.test.ini
Here is what I am using to filter files in apache log directories.
Here I exclude errors flles
rep_filters = [now.strftime("%Y%m%d")]
def files_filter(liste_fic, filters = rep_filters):
s = "(fic for fic in liste_fic if fic.find('error') < 0"
for filter in filters:
s += " and fic.find('%s') >=0 " % filter
s += ")"
return eval(s)
Please try this:
# pattern_list = ['*.jpg', '__.*']
def checkFilepatter(filename, pattern_list):
for pattern in pattern_list:
if fnmatch.fnmatch(filename, pattern):
return True
return False
You can use a list comprehension to check if my_file
matches any of the file masks defined in patterns
:
import fnmatch
my_file = 'my_precious.txt'
patterns = ('*.txt', '*.html', '*.mp3')
if [pat for pat in patterns if fnmatch.fnmatch(my_file, pat)]:
print('We have a match!')
else:
print('No match')
Internally, fnmatch
users regular expressions. And there’s a method that makes a regex from an fnmatch pattern — fnmatch.translate
. This may also give a little speed-up.
import fnmatch
import os
import re
image_exts = ['jpg', 'jpeg', 'gif', 'png']
image_re = re.compile('|'.join(fnmatch.translate('*.' + e) for e in image_exts))
for root, dirs, files in os.walk(directory):
for filename in files:
if image_re.match(filename):
...
The clearest solution is:
import os
for root, dirs, files in os.walk(directory):
for filename in files:
_, ext = os.path.splitext(filename)
if ext in ['.jpg', '.jpeg', '.gif', '.png']:
...
or, using pathlib
,
for path in pathlib.Path(directory).glob('**/*'):
if path.suffix in ['.jpg', '.jpeg', '.gif', '.png']:
...
Given the following piece of python code:
for root, dirs, files in os.walk(directory):
for filename in fnmatch.filter(files, '*.png'):
pass
How can I filter for more than one extension? In this special case I want to get all files ending with *.png, *.gif, *.jpg or *.jpeg.
For now I came up with
for root, dirs, files in os.walk(directory):
for extension in ['jpg', 'jpeg', 'gif', 'png']:
for filename in fnmatch.filter(files, '*.' + extension):
pass
But I think it is not very elegant and performant.
Someone has a better idea?
This isn’t really elegant either, but it works:
for root, dirs, files in os.walk(directory):
for filename in fnmatch.filter(files, '*.png') + fnmatch.filter(files, '*.jpg') + fnmatch.filter(files, '*.jpeg') + fnmatch.filter(files, '*.gif'):
pass
This would be a better way, perhaps because you are not calling +
repeatedly and using a tuple
instead of list
.
for root, dirs, files in os.walk(directory):
for extension in ('*.jpg', '*.jpeg', '*.gif', '*.png'):
for filename in fnmatch.filter(files, extension):
pass
A tuple
is better because you are not going to modify the extension once you have created them. You are just using to iterate over them.
I think your code is actually fine. If you want to touch every filename only once, define your own filtering function:
def is_image_file(filename, extensions=['.jpg', '.jpeg', '.gif', '.png']):
return any(filename.endswith(e) for e in extensions)
for root, dirs, files in os.walk(directory):
for filename in filter(is_image_file, files):
pass
If you only need to check extensions (i.e. no further wildcards), why don’t you simply use basic string operations?
for root, dirs, files in os.walk(directory):
for filename in files:
if filename.endswith(('.jpg', '.jpeg', '.gif', '.png')):
pass
I’ve been using this with a lot of success.
import fnmatch
import functools
import itertools
import os
# Remove the annotations if you're not on Python3
def find_files(dir_path: str=None, patterns: [str]=None) -> [str]:
"""
Returns a generator yielding files matching the given patterns
:type dir_path: str
:type patterns: [str]
:rtype : [str]
:param dir_path: Directory to search for files/directories under. Defaults to current dir.
:param patterns: Patterns of files to search for. Defaults to ["*"]. Example: ["*.json", "*.xml"]
"""
path = dir_path or "."
path_patterns = patterns or ["*"]
for root_dir, dir_names, file_names in os.walk(path):
filter_partial = functools.partial(fnmatch.filter, file_names)
for file_name in itertools.chain(*map(filter_partial, path_patterns)):
yield os.path.join(root_dir, file_name)
Examples:
for f in find_files(test_directory):
print(f)
yields:
.test.json
.test.xml
.test.ini
.test_helpers.py
.__init__.py
Testing with multiple patterns:
for f in find_files(test_directory, ["*.xml", "*.json", "*.ini"]):
print(f)
yields:
.test.json
.test.xml
.test.ini
Here is what I am using to filter files in apache log directories.
Here I exclude errors flles
rep_filters = [now.strftime("%Y%m%d")]
def files_filter(liste_fic, filters = rep_filters):
s = "(fic for fic in liste_fic if fic.find('error') < 0"
for filter in filters:
s += " and fic.find('%s') >=0 " % filter
s += ")"
return eval(s)
Please try this:
# pattern_list = ['*.jpg', '__.*']
def checkFilepatter(filename, pattern_list):
for pattern in pattern_list:
if fnmatch.fnmatch(filename, pattern):
return True
return False
You can use a list comprehension to check if my_file
matches any of the file masks defined in patterns
:
import fnmatch
my_file = 'my_precious.txt'
patterns = ('*.txt', '*.html', '*.mp3')
if [pat for pat in patterns if fnmatch.fnmatch(my_file, pat)]:
print('We have a match!')
else:
print('No match')
Internally, fnmatch
users regular expressions. And there’s a method that makes a regex from an fnmatch pattern — fnmatch.translate
. This may also give a little speed-up.
import fnmatch
import os
import re
image_exts = ['jpg', 'jpeg', 'gif', 'png']
image_re = re.compile('|'.join(fnmatch.translate('*.' + e) for e in image_exts))
for root, dirs, files in os.walk(directory):
for filename in files:
if image_re.match(filename):
...
The clearest solution is:
import os
for root, dirs, files in os.walk(directory):
for filename in files:
_, ext = os.path.splitext(filename)
if ext in ['.jpg', '.jpeg', '.gif', '.png']:
...
or, using pathlib
,
for path in pathlib.Path(directory).glob('**/*'):
if path.suffix in ['.jpg', '.jpeg', '.gif', '.png']:
...