How to check whether a directory is a sub directory of another directory
Question:
I like to write a template system in Python, which allows to include files.
e.g.
This is a template
You can safely include files with safe_include`othertemplate.rst`
As you know, including files might be dangerous. For example, if I use the template system in a web application which allows users to create their own templates, they might do something like
I want your passwords: safe_include`/etc/password`
So therefore, I have to restrict the inclusion of files to files which are for example in a certain subdirectory (e.g. /home/user/templates
)
The question is now: How can I check, whether /home/user/templates/includes/inc1.rst
is in a subdirectory of /home/user/templates
?
Would the following code work and be secure?
import os.path
def in_directory(file, directory, allow_symlink = False):
#make both absolute
directory = os.path.abspath(directory)
file = os.path.abspath(file)
#check whether file is a symbolic link, if yes, return false if they are not allowed
if not allow_symlink and os.path.islink(file):
return False
#return true, if the common prefix of both is equal to directory
#e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b
return os.path.commonprefix([file, directory]) == directory
As long, as allow_symlink
is False, it should be secure, I think. Allowing symlinks of course would make it insecure if the user is able to create such links.
UPDATE – Solution
The code above does not work, if intermediate directories are symbolic links.
To prevent this, you have to use realpath
instead of abspath
.
UPDATE: adding a trailing / to directory to solve the problem with commonprefix() Reorx pointed out.
This also makes allow_symlink
unnecessary as symlinks are expanded to their real destination
import os.path
def in_directory(file, directory):
#make both absolute
directory = os.path.join(os.path.realpath(directory), '')
file = os.path.realpath(file)
#return true, if the common prefix of both is equal to directory
#e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b
return os.path.commonprefix([file, directory]) == directory
Answers:
os.path.realpath(path): Return the canonical path of the specified filename, eliminating any symbolic links encountered in the path (if they are supported by the operating system).
Use it on directory and subdirectory name, then check latter starts with former.
I would test the result from commonprefix against the filename to get a better answer, something like this:
def is_in_folder(filename, folder='/tmp/'):
# normalize both parameters
fn = os.path.normpath(filename)
fd = os.path.normpath(folder)
# get common prefix
commonprefix = os.path.commonprefix([fn, fd])
if commonprefix == fd:
# in case they have common prefix, check more:
sufix_part = fn.replace(fd, '')
sufix_part = sufix_part.lstrip('/')
new_file_name = os.path.join(fd, sufix_part)
if new_file_name == fn:
return True
pass
# for all other, it's False
return False
so, I needed this, and due to the criticisms about commonprefx, I went a different way:
def os_path_split_asunder(path, debug=False):
"""
http://stackoverflow.com/a/4580931/171094
"""
parts = []
while True:
newpath, tail = os.path.split(path)
if debug: print repr(path), (newpath, tail)
if newpath == path:
assert not tail
if path: parts.append(path)
break
parts.append(tail)
path = newpath
parts.reverse()
return parts
def is_subdirectory(potential_subdirectory, expected_parent_directory):
"""
Is the first argument a sub-directory of the second argument?
:param potential_subdirectory:
:param expected_parent_directory:
:return: True if the potential_subdirectory is a child of the expected parent directory
>>> is_subdirectory('/var/test2', '/var/test')
False
>>> is_subdirectory('/var/test', '/var/test2')
False
>>> is_subdirectory('var/test2', 'var/test')
False
>>> is_subdirectory('var/test', 'var/test2')
False
>>> is_subdirectory('/var/test/sub', '/var/test')
True
>>> is_subdirectory('/var/test', '/var/test/sub')
False
>>> is_subdirectory('var/test/sub', 'var/test')
True
>>> is_subdirectory('var/test', 'var/test')
True
>>> is_subdirectory('var/test', 'var/test/fake_sub/..')
True
>>> is_subdirectory('var/test/sub/sub2/sub3/../..', 'var/test')
True
>>> is_subdirectory('var/test/sub', 'var/test/fake_sub/..')
True
>>> is_subdirectory('var/test', 'var/test/sub')
False
"""
def _get_normalized_parts(path):
return os_path_split_asunder(os.path.realpath(os.path.abspath(os.path.normpath(path))))
# make absolute and handle symbolic links, split into components
sub_parts = _get_normalized_parts(potential_subdirectory)
parent_parts = _get_normalized_parts(expected_parent_directory)
if len(parent_parts) > len(sub_parts):
# a parent directory never has more path segments than its child
return False
# we expect the zip to end with the short path, which we know to be the parent
return all(part1==part2 for part1, part2 in zip(sub_parts, parent_parts))
def is_subdir(path, directory):
path = os.path.realpath(path)
directory = os.path.realpath(directory)
relative = os.path.relpath(path, directory)
return not relative.startswith(os.pardir + os.sep)
Based on another answer here, with correction, and with a user-friendlier name:
def isA_subdirOfB_orAisB(A, B):
"""It is assumed that A is a directory."""
relative = os.path.relpath(os.path.realpath(A),
os.path.realpath(B))
return not (relative == os.pardir
or relative.startswith(os.pardir + os.sep))
Python 3’s pathlib
module makes this straightforward with its Path.parents attribute. For example:
from pathlib import Path
root = Path('/path/to/root')
child = root / 'some' / 'child' / 'dir'
other = Path('/some/other/path')
Then:
>>> root in child.parents
True
>>> other in child.parents
False
Problems with many of the suggested methods
If you’re going to test for directory parentage with string comparison or os.path.commonprefix
methods, these are prone to errors with similarly-named paths or relative paths. For example:
/path/to/files/myfile
would be shown as a child path of /path/to/file
using many of the methods.
/path/to/files/../../myfiles
would not be shown as a parent of /path/myfiles/myfile
by many of the methods. In fact, it is.
The previous answer by Rob Dennis provides a good way to compare path parentage without encountering these problems. Python 3.4 added the pathlib
module which can perform these kind of path operations in a more sophisticated way, optionally without referencing the underlying OS. jme has described in another previous answer how to use pathlib
for the purpose of accurately determining if one path is a child of another. If you prefer not to use pathlib
(not sure why, it’s pretty great) then Python 3.5 introduced a new OS-based method in os.path
that allows you to do perform path parent-child checks in a similarly accurate and error-free manner with a lot less code.
New for Python 3.5
Python 3.5 introduced the function os.path.commonpath
. This is a method that is specific to the OS that the code is running on. You can use commonpath
in the following way to accurately determine path parentage:
def path_is_parent(parent_path, child_path):
# Smooth out relative path names, note: if you are concerned about symbolic links, you should use os.path.realpath too
parent_path = os.path.abspath(parent_path)
child_path = os.path.abspath(child_path)
# Compare the common path of the parent and child path with the common path of just the parent path. Using the commonpath method on just the parent path will regularise the path name in the same way as the comparison that deals with both paths, removing any trailing path separator
return os.path.commonpath([parent_path]) == os.path.commonpath([parent_path, child_path])
Accurate one-liner
You can combine the whole lot into a one-line if statement in Python 3.5. It’s ugly, it includes unnecessary duplicate calls to os.path.abspath
and it definitely won’t fit in the PEP 8 79-character line-length guidelines, but if you like that kind of thing, here goes:
if os.path.commonpath([os.path.abspath(parent_path_to_test)]) == os.path.commonpath([os.path.abspath(parent_path_to_test), os.path.abspath(child_path_to_test)]):
# Yes, the child path is under the parent path
New for Python 3.9
pathlib
has a new method on PurePath
called is_relative_to
which performs this function directly. You can read the python documentation on how is_relative_to
works if you need to see how to use it. Or you can see my other answer for a more full description of how to use it.
I like the “path in other_path.parents” approached mentioned in another answer because I’m a big fan of pathlib, BUT I feel that approach is a bit heavy (it creates one Path instance for each parent to root of path). Also the case where path == other_path will fail with that approach, whereas os.commonpath would succeed on that case.
The following is a different approach, with its own set of pros and cons compared to other methods identified in the various answers:
try:
other_path.relative_to(path)
except ValueError:
...no common path...
else:
...common path...
which is a little more verbose but can easily be added as a function in your application’s common utilities module or even add the method to Path at startup time.
def is_in_directory(filepath, directory):
return os.path.realpath(filepath).startswith(
os.path.realpath(directory) + os.sep)
I used below function for similar problem:
def is_subdir(p1, p2):
"""returns true if p1 is p2 or its subdirectory"""
p1, p2 = os.path.realpath(p1), os.path.realpath(p2)
return p1 == p2 or p1.startswith(p2+os.sep)
After running into problems with symbolic link I’ve modified the function. Now it checks if both paths are directories.
def is_subdir(p1, p2):
"""check if p1 is p2 or its subdirectory
:param str p1: subdirectory candidate
:param str p2: parent directory
:returns True if p1,p2 are directories and p1 is p2 or its subdirectory"""
if os.path.isdir(p1) and os.path.isdir(p2):
p1, p2 = os.path.realpath(p1), os.path.realpath(p2)
return p1 == p2 or p1.startswith(p2+os.sep)
else:
return False
with your inspirations, this method has been added to my utils:
def is_in_basefolder(path_to_check: PosixPath, basefolder: PosixPath):
"""
check if a given path is in base folder
parameters:
path_to_check: a path to match with base folder
basefolder: the base folder
"""
path = path_to_check.resolve()
base = basefolder.resolve()
if path == base:
return True
if base.stem in path.parts:
return True
else:
return False
New for Python 3.9
pathlib
has a new method on PurePath
called is_relative_to
which performs this function directly. You can read the python documentation on how is_relative_to
works, or use this example:
from pathlib import Path
child_path = Path("/path/to/file")
if child_path.is_relative_to("/path"):
print("/path/to/file is a child of /path") # This prints
if child_path.is_relative_to("/anotherpath"):
print("/path/to/file is a child of /anotherpath") # This does not print
import os
from typing import Union
def equals_or_contained(path: Union[str, os.PathLike], directory: Union[str, os.PathLike]) -> bool:
"""checks whether the path is inside the directory or is equal to it"""
p = Path(path).absolute()
d = Path(directory).absolute()
if p == d:
return True
try:
p.relative_to(d)
return True
except ValueError:
return False
I’m quite late to the party here, but would os.path.relpath
not solve the problem? The following should work, operating under the assumption that os.path.relpath
computes the minimum relative path (so eg. ..
is not then followed by going back into the same directory).
import os
def is_child_path(parentPath, childPath):
try:
# If the computed relative path starts with .. then it must
# go above the parent, so cannot be a child.
return not os.path.relpath(childPath, parentPath).startswith("..")
except ValueError:
# Raised on Windows if the drives are different.
return False
Examples of this function being called:
>>> is_child_path("/a", "/a")
True
>>> is_child_path("/a", "/a/b")
True
>>> is_child_path("/a", "/b")
False
>>> is_child_path("/a", "/b/../a")
True
>>> is_child_path("/a", "/a/../b")
False
>>> is_child_path("a", "a/b")
True
>>> is_child_path("a", "a/b/..")
True
>>> is_child_path("a", "a/b/../..")
False
This function depends only on os
, and I believe relies on a path string-only computation, meaning the path does not have to exist on the filesystem. This does mean that it does not know (or care) about symlinks or the existence of files or directories, but you can easily use os.path.isfile
or os.path.isdir
to check for existence, and os.path.realpath
to resolve symlinks, before calling is_child_path
itself.
I like to write a template system in Python, which allows to include files.
e.g.
This is a template You can safely include files with safe_include`othertemplate.rst`
As you know, including files might be dangerous. For example, if I use the template system in a web application which allows users to create their own templates, they might do something like
I want your passwords: safe_include`/etc/password`
So therefore, I have to restrict the inclusion of files to files which are for example in a certain subdirectory (e.g. /home/user/templates
)
The question is now: How can I check, whether /home/user/templates/includes/inc1.rst
is in a subdirectory of /home/user/templates
?
Would the following code work and be secure?
import os.path
def in_directory(file, directory, allow_symlink = False):
#make both absolute
directory = os.path.abspath(directory)
file = os.path.abspath(file)
#check whether file is a symbolic link, if yes, return false if they are not allowed
if not allow_symlink and os.path.islink(file):
return False
#return true, if the common prefix of both is equal to directory
#e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b
return os.path.commonprefix([file, directory]) == directory
As long, as allow_symlink
is False, it should be secure, I think. Allowing symlinks of course would make it insecure if the user is able to create such links.
UPDATE – Solution
The code above does not work, if intermediate directories are symbolic links.
To prevent this, you have to use realpath
instead of abspath
.
UPDATE: adding a trailing / to directory to solve the problem with commonprefix() Reorx pointed out.
This also makes allow_symlink
unnecessary as symlinks are expanded to their real destination
import os.path
def in_directory(file, directory):
#make both absolute
directory = os.path.join(os.path.realpath(directory), '')
file = os.path.realpath(file)
#return true, if the common prefix of both is equal to directory
#e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b
return os.path.commonprefix([file, directory]) == directory
os.path.realpath(path): Return the canonical path of the specified filename, eliminating any symbolic links encountered in the path (if they are supported by the operating system).
Use it on directory and subdirectory name, then check latter starts with former.
I would test the result from commonprefix against the filename to get a better answer, something like this:
def is_in_folder(filename, folder='/tmp/'):
# normalize both parameters
fn = os.path.normpath(filename)
fd = os.path.normpath(folder)
# get common prefix
commonprefix = os.path.commonprefix([fn, fd])
if commonprefix == fd:
# in case they have common prefix, check more:
sufix_part = fn.replace(fd, '')
sufix_part = sufix_part.lstrip('/')
new_file_name = os.path.join(fd, sufix_part)
if new_file_name == fn:
return True
pass
# for all other, it's False
return False
so, I needed this, and due to the criticisms about commonprefx, I went a different way:
def os_path_split_asunder(path, debug=False):
"""
http://stackoverflow.com/a/4580931/171094
"""
parts = []
while True:
newpath, tail = os.path.split(path)
if debug: print repr(path), (newpath, tail)
if newpath == path:
assert not tail
if path: parts.append(path)
break
parts.append(tail)
path = newpath
parts.reverse()
return parts
def is_subdirectory(potential_subdirectory, expected_parent_directory):
"""
Is the first argument a sub-directory of the second argument?
:param potential_subdirectory:
:param expected_parent_directory:
:return: True if the potential_subdirectory is a child of the expected parent directory
>>> is_subdirectory('/var/test2', '/var/test')
False
>>> is_subdirectory('/var/test', '/var/test2')
False
>>> is_subdirectory('var/test2', 'var/test')
False
>>> is_subdirectory('var/test', 'var/test2')
False
>>> is_subdirectory('/var/test/sub', '/var/test')
True
>>> is_subdirectory('/var/test', '/var/test/sub')
False
>>> is_subdirectory('var/test/sub', 'var/test')
True
>>> is_subdirectory('var/test', 'var/test')
True
>>> is_subdirectory('var/test', 'var/test/fake_sub/..')
True
>>> is_subdirectory('var/test/sub/sub2/sub3/../..', 'var/test')
True
>>> is_subdirectory('var/test/sub', 'var/test/fake_sub/..')
True
>>> is_subdirectory('var/test', 'var/test/sub')
False
"""
def _get_normalized_parts(path):
return os_path_split_asunder(os.path.realpath(os.path.abspath(os.path.normpath(path))))
# make absolute and handle symbolic links, split into components
sub_parts = _get_normalized_parts(potential_subdirectory)
parent_parts = _get_normalized_parts(expected_parent_directory)
if len(parent_parts) > len(sub_parts):
# a parent directory never has more path segments than its child
return False
# we expect the zip to end with the short path, which we know to be the parent
return all(part1==part2 for part1, part2 in zip(sub_parts, parent_parts))
def is_subdir(path, directory):
path = os.path.realpath(path)
directory = os.path.realpath(directory)
relative = os.path.relpath(path, directory)
return not relative.startswith(os.pardir + os.sep)
Based on another answer here, with correction, and with a user-friendlier name:
def isA_subdirOfB_orAisB(A, B):
"""It is assumed that A is a directory."""
relative = os.path.relpath(os.path.realpath(A),
os.path.realpath(B))
return not (relative == os.pardir
or relative.startswith(os.pardir + os.sep))
Python 3’s pathlib
module makes this straightforward with its Path.parents attribute. For example:
from pathlib import Path
root = Path('/path/to/root')
child = root / 'some' / 'child' / 'dir'
other = Path('/some/other/path')
Then:
>>> root in child.parents
True
>>> other in child.parents
False
Problems with many of the suggested methods
If you’re going to test for directory parentage with string comparison or os.path.commonprefix
methods, these are prone to errors with similarly-named paths or relative paths. For example:
/path/to/files/myfile
would be shown as a child path of/path/to/file
using many of the methods./path/to/files/../../myfiles
would not be shown as a parent of/path/myfiles/myfile
by many of the methods. In fact, it is.
The previous answer by Rob Dennis provides a good way to compare path parentage without encountering these problems. Python 3.4 added the pathlib
module which can perform these kind of path operations in a more sophisticated way, optionally without referencing the underlying OS. jme has described in another previous answer how to use pathlib
for the purpose of accurately determining if one path is a child of another. If you prefer not to use pathlib
(not sure why, it’s pretty great) then Python 3.5 introduced a new OS-based method in os.path
that allows you to do perform path parent-child checks in a similarly accurate and error-free manner with a lot less code.
New for Python 3.5
Python 3.5 introduced the function os.path.commonpath
. This is a method that is specific to the OS that the code is running on. You can use commonpath
in the following way to accurately determine path parentage:
def path_is_parent(parent_path, child_path):
# Smooth out relative path names, note: if you are concerned about symbolic links, you should use os.path.realpath too
parent_path = os.path.abspath(parent_path)
child_path = os.path.abspath(child_path)
# Compare the common path of the parent and child path with the common path of just the parent path. Using the commonpath method on just the parent path will regularise the path name in the same way as the comparison that deals with both paths, removing any trailing path separator
return os.path.commonpath([parent_path]) == os.path.commonpath([parent_path, child_path])
Accurate one-liner
You can combine the whole lot into a one-line if statement in Python 3.5. It’s ugly, it includes unnecessary duplicate calls to os.path.abspath
and it definitely won’t fit in the PEP 8 79-character line-length guidelines, but if you like that kind of thing, here goes:
if os.path.commonpath([os.path.abspath(parent_path_to_test)]) == os.path.commonpath([os.path.abspath(parent_path_to_test), os.path.abspath(child_path_to_test)]):
# Yes, the child path is under the parent path
New for Python 3.9
pathlib
has a new method on PurePath
called is_relative_to
which performs this function directly. You can read the python documentation on how is_relative_to
works if you need to see how to use it. Or you can see my other answer for a more full description of how to use it.
I like the “path in other_path.parents” approached mentioned in another answer because I’m a big fan of pathlib, BUT I feel that approach is a bit heavy (it creates one Path instance for each parent to root of path). Also the case where path == other_path will fail with that approach, whereas os.commonpath would succeed on that case.
The following is a different approach, with its own set of pros and cons compared to other methods identified in the various answers:
try:
other_path.relative_to(path)
except ValueError:
...no common path...
else:
...common path...
which is a little more verbose but can easily be added as a function in your application’s common utilities module or even add the method to Path at startup time.
def is_in_directory(filepath, directory):
return os.path.realpath(filepath).startswith(
os.path.realpath(directory) + os.sep)
I used below function for similar problem:
def is_subdir(p1, p2):
"""returns true if p1 is p2 or its subdirectory"""
p1, p2 = os.path.realpath(p1), os.path.realpath(p2)
return p1 == p2 or p1.startswith(p2+os.sep)
After running into problems with symbolic link I’ve modified the function. Now it checks if both paths are directories.
def is_subdir(p1, p2):
"""check if p1 is p2 or its subdirectory
:param str p1: subdirectory candidate
:param str p2: parent directory
:returns True if p1,p2 are directories and p1 is p2 or its subdirectory"""
if os.path.isdir(p1) and os.path.isdir(p2):
p1, p2 = os.path.realpath(p1), os.path.realpath(p2)
return p1 == p2 or p1.startswith(p2+os.sep)
else:
return False
with your inspirations, this method has been added to my utils:
def is_in_basefolder(path_to_check: PosixPath, basefolder: PosixPath):
"""
check if a given path is in base folder
parameters:
path_to_check: a path to match with base folder
basefolder: the base folder
"""
path = path_to_check.resolve()
base = basefolder.resolve()
if path == base:
return True
if base.stem in path.parts:
return True
else:
return False
New for Python 3.9
pathlib
has a new method on PurePath
called is_relative_to
which performs this function directly. You can read the python documentation on how is_relative_to
works, or use this example:
from pathlib import Path
child_path = Path("/path/to/file")
if child_path.is_relative_to("/path"):
print("/path/to/file is a child of /path") # This prints
if child_path.is_relative_to("/anotherpath"):
print("/path/to/file is a child of /anotherpath") # This does not print
import os
from typing import Union
def equals_or_contained(path: Union[str, os.PathLike], directory: Union[str, os.PathLike]) -> bool:
"""checks whether the path is inside the directory or is equal to it"""
p = Path(path).absolute()
d = Path(directory).absolute()
if p == d:
return True
try:
p.relative_to(d)
return True
except ValueError:
return False
I’m quite late to the party here, but would os.path.relpath
not solve the problem? The following should work, operating under the assumption that os.path.relpath
computes the minimum relative path (so eg. ..
is not then followed by going back into the same directory).
import os
def is_child_path(parentPath, childPath):
try:
# If the computed relative path starts with .. then it must
# go above the parent, so cannot be a child.
return not os.path.relpath(childPath, parentPath).startswith("..")
except ValueError:
# Raised on Windows if the drives are different.
return False
Examples of this function being called:
>>> is_child_path("/a", "/a")
True
>>> is_child_path("/a", "/a/b")
True
>>> is_child_path("/a", "/b")
False
>>> is_child_path("/a", "/b/../a")
True
>>> is_child_path("/a", "/a/../b")
False
>>> is_child_path("a", "a/b")
True
>>> is_child_path("a", "a/b/..")
True
>>> is_child_path("a", "a/b/../..")
False
This function depends only on os
, and I believe relies on a path string-only computation, meaning the path does not have to exist on the filesystem. This does mean that it does not know (or care) about symlinks or the existence of files or directories, but you can easily use os.path.isfile
or os.path.isdir
to check for existence, and os.path.realpath
to resolve symlinks, before calling is_child_path
itself.