How to textually find an imported name in a module
Question:
I wrote a method called buildRegex
that, given a name (of type str
), returns a regex
object that finds a from ... import ... name
statement in a Python
module.
For example, here is the expected behaviour of buildRegex
:
>>> regObj = buildRegex('foo')
>>> regObj.search('from a import fool') is None
True
>>> regObj.search('from a import foo') is not None
True
>>> regObj.search('from a.b.c import foo as food') is None
True
>>> regObj.search('from a.b.c import fool, bar as foo') is not None
True
What I have so far works for all the examples above (and more):
def buildRegex(name):
singleImportedName = r'(b{0}b(?!s+ass+))'.format(name)
importStatement = r'froms+(w+(.w+)*)s+imports+([^#n]*)(?={0})'.format(singleImportedName )
return re.compile(importStatement)
buildRegex
assumes that the searched module has no SyntaxError
s which is OK.
My problem is, when looking for the imported name foo
, I also need to know if it is an alias to a different name. I.e. if a module has the statement:
from a.b.c import bar as foo
I want to know what foo
is aliasing, in this case, that would be bar
. Currently, due to asserted lookaheads
in the regex, that is not possible. So, finally my question:
How can I refactor the regex so that this information is not lost, i.e., if the given name is an alias, then the the name its aliasing is in one of the regex
‘s groups?
Answers:
I’d recommend that instead of writing complicated regular expressions to parse imports, one would actually use the ast.parse
to parse the source code into abstract syntax tree and find the names from there, as ast.parse
is guaranteed to parse Python correctly. Something like:
import ast
class ImportFinder(ast.NodeVisitor):
def __init__(self):
self.imports = []
def visit_Import(self, node):
names = []
for i in node.names:
names.append((i.name, i.asname))
self.imports.append(['import', names])
def visit_ImportFrom(self, node):
module = node.module
level = node.level # how many dots
names = []
for i in node.names:
names.append((i.name, i.asname))
self.imports.append(('from', level, module, names))
def parse_imports(source):
tree = ast.parse(source)
finder = ImportFinder()
finder.visit(tree)
return finder.imports
Example usage:
import pprint
pprint.pprint(parse_imports('''
from foo import bar, baz, frob
from .. import bar as spam, baz as ham, frob
import bar.baz
import bar.foo as baf
'''))
Prints out:
[('from', 0, 'foo', [('bar', None), ('baz', None), ('frob', None)]),
('from', 2, None, [('bar', 'spam'), ('baz', 'ham'), ('frob', None)]),
['import', [('bar.baz', None)]],
['import', [('bar.foo', 'baf')]]]
The integer on the from
lines gives the number of .
before the module name.
import inspect
import importlib
import ast
class Imports(ast.NodeVisitor):
def visit_Import(self, node):
print("In Import")
for imp in node.names:
if imp.asname is not None:
print("module name = {}, alias = {}".format(imp.name, imp.asname))
else:
print("module name = {}".format(imp.name))
print()
def visit_ImportFrom(self, node):
print("In ImportFrom")
for imp in node.names:
if imp.asname is not None:
print("module = {}nname = {}nalias = {}nlevel = {}n".
format(node.module, imp.name, imp.asname, node.level))
else:
print("module = {}nname = {}nlevel = {}n".
format(node.module, imp.name, node.level))
print()
mod = "temp_test"
mod = importlib.import_module(mod)
p = ast.parse(inspect.getsource(mod))
Imports().visit(p)
Input:
from bisect import bisect_left as bs
import datetime
import time
import numpy as np
def foo():
from re import findall
class Foo():
def test(self):
from re import compile as cp, finditer as ft
Output:
In ImportFrom
module = bisect
name = bisect_left
alias = bs
level = 0
In Import
module name = datetime
In Import
module name = time
In Import
module name = numpy, alias = np
In ImportFrom
module = re
name = findall
level = 0
In ImportFrom
module = re
name = compile
alias = cp
level = 0
module = re
name = finditer
alias = ft
level = 0
class Import(names)
An import statement. names is a list of alias nodes.
class ImportFrom(module, names, level)
Represents from x import y. module is a raw string of the ‘from’ name, without any leading dots, or None for statements such as from . import foo. level is an integer holding the level of the relative import (0 means absolute import).
The greentreesnakes documentation for me at least has a much better explanation of what all the nodes do and how to use the ast module than the actual ast documentation itself.
You can use also pass the module directly or open the py file and pass the content to ast.parse:
with open("temp_test.py") as f:
p = ast.parse(f.read(), filename="<ast>", mode="exec")
Imports().visit(p)
And passing the module:
import temp_test
p = ast.parse(inspect.getsource(temp_test))
Imports().visit(p)
I wrote a method called buildRegex
that, given a name (of type str
), returns a regex
object that finds a from ... import ... name
statement in a Python
module.
For example, here is the expected behaviour of buildRegex
:
>>> regObj = buildRegex('foo')
>>> regObj.search('from a import fool') is None
True
>>> regObj.search('from a import foo') is not None
True
>>> regObj.search('from a.b.c import foo as food') is None
True
>>> regObj.search('from a.b.c import fool, bar as foo') is not None
True
What I have so far works for all the examples above (and more):
def buildRegex(name):
singleImportedName = r'(b{0}b(?!s+ass+))'.format(name)
importStatement = r'froms+(w+(.w+)*)s+imports+([^#n]*)(?={0})'.format(singleImportedName )
return re.compile(importStatement)
buildRegex
assumes that the searched module has no SyntaxError
s which is OK.
My problem is, when looking for the imported name foo
, I also need to know if it is an alias to a different name. I.e. if a module has the statement:
from a.b.c import bar as foo
I want to know what foo
is aliasing, in this case, that would be bar
. Currently, due to asserted lookaheads
in the regex, that is not possible. So, finally my question:
How can I refactor the regex so that this information is not lost, i.e., if the given name is an alias, then the the name its aliasing is in one of the regex
‘s groups?
I’d recommend that instead of writing complicated regular expressions to parse imports, one would actually use the ast.parse
to parse the source code into abstract syntax tree and find the names from there, as ast.parse
is guaranteed to parse Python correctly. Something like:
import ast
class ImportFinder(ast.NodeVisitor):
def __init__(self):
self.imports = []
def visit_Import(self, node):
names = []
for i in node.names:
names.append((i.name, i.asname))
self.imports.append(['import', names])
def visit_ImportFrom(self, node):
module = node.module
level = node.level # how many dots
names = []
for i in node.names:
names.append((i.name, i.asname))
self.imports.append(('from', level, module, names))
def parse_imports(source):
tree = ast.parse(source)
finder = ImportFinder()
finder.visit(tree)
return finder.imports
Example usage:
import pprint
pprint.pprint(parse_imports('''
from foo import bar, baz, frob
from .. import bar as spam, baz as ham, frob
import bar.baz
import bar.foo as baf
'''))
Prints out:
[('from', 0, 'foo', [('bar', None), ('baz', None), ('frob', None)]),
('from', 2, None, [('bar', 'spam'), ('baz', 'ham'), ('frob', None)]),
['import', [('bar.baz', None)]],
['import', [('bar.foo', 'baf')]]]
The integer on the from
lines gives the number of .
before the module name.
import inspect
import importlib
import ast
class Imports(ast.NodeVisitor):
def visit_Import(self, node):
print("In Import")
for imp in node.names:
if imp.asname is not None:
print("module name = {}, alias = {}".format(imp.name, imp.asname))
else:
print("module name = {}".format(imp.name))
print()
def visit_ImportFrom(self, node):
print("In ImportFrom")
for imp in node.names:
if imp.asname is not None:
print("module = {}nname = {}nalias = {}nlevel = {}n".
format(node.module, imp.name, imp.asname, node.level))
else:
print("module = {}nname = {}nlevel = {}n".
format(node.module, imp.name, node.level))
print()
mod = "temp_test"
mod = importlib.import_module(mod)
p = ast.parse(inspect.getsource(mod))
Imports().visit(p)
Input:
from bisect import bisect_left as bs
import datetime
import time
import numpy as np
def foo():
from re import findall
class Foo():
def test(self):
from re import compile as cp, finditer as ft
Output:
In ImportFrom
module = bisect
name = bisect_left
alias = bs
level = 0
In Import
module name = datetime
In Import
module name = time
In Import
module name = numpy, alias = np
In ImportFrom
module = re
name = findall
level = 0
In ImportFrom
module = re
name = compile
alias = cp
level = 0
module = re
name = finditer
alias = ft
level = 0
class Import(names)
An import statement. names is a list of alias nodes.
class ImportFrom(module, names, level)
Represents from x import y. module is a raw string of the ‘from’ name, without any leading dots, or None for statements such as from . import foo. level is an integer holding the level of the relative import (0 means absolute import).
The greentreesnakes documentation for me at least has a much better explanation of what all the nodes do and how to use the ast module than the actual ast documentation itself.
You can use also pass the module directly or open the py file and pass the content to ast.parse:
with open("temp_test.py") as f:
p = ast.parse(f.read(), filename="<ast>", mode="exec")
Imports().visit(p)
And passing the module:
import temp_test
p = ast.parse(inspect.getsource(temp_test))
Imports().visit(p)