why exception is rised on some java files using JavaLang python package
Question:
I am aiming to obtain all methods names in a java file and count theit lines of code.
I actually managed to obtain something using the javalang parser for python. It identifies all methods but not creator which make me think there is a specific property I should use to check weather the creator exists or not and retrieve its name (any idea?).
It works well (javalang python pkg) most of the fimes but it fails on some files like the following one, which rises an exception
/*
* Decompiled with CFR 0.152.
*/
package com.vaadin.flow.di;
import com.vaadin.flow.component.Component;
import com.vaadin.flow.component.HasElement;
import com.vaadin.flow.component.UI;
import com.vaadin.flow.i18n.I18NProvider;
import com.vaadin.flow.router.NavigationEvent;
import com.vaadin.flow.server.BootstrapListener;
import com.vaadin.flow.server.DependencyFilter;
import com.vaadin.flow.server.VaadinService;
import com.vaadin.flow.server.VaadinServiceInitListener;
import com.vaadin.flow.server.VaadinSession;
import com.vaadin.flow.server.communication.IndexHtmlRequestListener;
import java.io.Serializable;
import java.util.stream.Stream;
public interface Instantiator
extends Serializable {
@Deprecated
public boolean init(VaadinService var1);
public Stream<VaadinServiceInitListener> getServiceInitListeners();
@Deprecated
default public Stream<BootstrapListener> getBootstrapListeners(Stream<BootstrapListener> serviceInitListeners) {
return serviceInitListeners;
}
default public Stream<IndexHtmlRequestListener> getIndexHtmlRequestListeners(Stream<IndexHtmlRequestListener> indexHtmlRequestListeners) {
return indexHtmlRequestListeners;
}
default public Stream<DependencyFilter> getDependencyFilters(Stream<DependencyFilter> serviceInitFilters) {
return serviceInitFilters;
}
public <T> T getOrCreate(Class<T> var1);
default public <T extends HasElement> T createRouteTarget(Class<T> routeTargetType, NavigationEvent event) {
return (T)((HasElement)this.getOrCreate(routeTargetType));
}
public <T extends Component> T createComponent(Class<T> var1);
public static Instantiator get(UI ui) {
if (!1.$assertionsDisabled && ui == null) {
throw new AssertionError();
}
VaadinSession session = ui.getSession();
if (!1.$assertionsDisabled && session == null) {
throw new AssertionError();
}
return session.getService().getInstantiator();
}
default public I18NProvider getI18NProvider() {
return this.getOrCreate(I18NProvider.class);
}
static {
if (1.$assertionsDisabled) {
// empty if block
}
}
}
which causes an exception:
Traceback (most recent call last):
File "/Users/irene/PycharmProjects/pythonProject/main.py", line 113, in <module>
main()
File "/Users/irene/PycharmProjects/pythonProject/main.py", line 109, in main
jlca.start()
File "/Users/irene/PycharmProjects/pythonProject/JLCodeAnalyzer.py", line 80, in start
tree = javalang.parse.parse(code_text)
File "/usr/local/lib/python3.10/site-packages/javalang/parse.py", line 53, in parse
return parser.parse()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 110, in parse
return self.parse_compilation_unit()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 302, in parse_compilation_unit
type_declaration = self.parse_type_declaration()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 347, in parse_type_declaration
return self.parse_class_or_interface_declaration()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 360, in parse_class_or_interface_declaration
type_declaration = self.parse_normal_interface_declaration()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 436, in parse_normal_interface_declaration
body = self.parse_interface_body()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 966, in parse_interface_body
declaration = self.parse_interface_body_declaration()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 981, in parse_interface_body_declaration
declaration = self.parse_interface_member_declaration()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1008, in parse_interface_member_declaration
declaration = self.parse_interface_method_or_field_declaration()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1018, in parse_interface_method_or_field_declaration
member = self.parse_interface_method_or_field_rest()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1035, in parse_interface_method_or_field_rest
rest = self.parse_interface_method_declarator_rest()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1082, in parse_interface_method_declarator_rest
body = self.parse_block()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1274, in parse_block
statement = self.parse_block_statement()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1339, in parse_block_statement
return self.parse_statement()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1389, in parse_statement
condition = self.parse_par_expression()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 2032, in parse_par_expression
self.accept(')')
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 131, in accept
self.illegal("Expected '%s'" % (accept,))
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 119, in illegal
raise JavaSyntaxError(description, at)
javalang.parser.JavaSyntaxError
And here is the JLCodeAnalyzer:
import javalang
import os
from javalang import tree
class JLCodeAnalyzer:
def __init__(self, code_path):
self.code_path = code_path
self.codelines = None
def get_method_start_end(self, method_node, tree):
startpos = None
endpos = None
startline = None
endline = None
for path, node in tree:
if startpos is not None and method_node not in path:
endpos = node.position
endline = node.position.line if node.position is not None else None
break
if startpos is None and node == method_node:
startpos = node.position
startline = node.position.line if node.position is not None else None
return startpos, endpos, startline, endline
def get_method_text(self, startpos, endpos, startline, endline, last_endline_index, codelines, tree):
if startpos is None:
return "", None, None, None
else:
startline_index = startline - 1
endline_index = endline - 1 if endpos is not None else None
# 1. check for and fetch annotations
if last_endline_index is not None:
for line in codelines[(last_endline_index + 1):(startline_index)]:
if "@" in line:
startline_index = startline_index - 1
meth_text = "<ST>".join(codelines[startline_index:endline_index])
meth_text = meth_text[:meth_text.rfind("}") + 1]
# 2. remove trailing rbrace for last methods & any external content/comments
# if endpos is None and
if not abs(meth_text.count("}") - meth_text.count("{")) == 0:
# imbalanced braces
brace_diff = abs(meth_text.count("}") - meth_text.count("{"))
for _ in range(brace_diff):
meth_text = meth_text[:meth_text.rfind("}")]
meth_text = meth_text[:meth_text.rfind("}") + 1]
meth_lines = meth_text.split("<ST>")
meth_text = "".join(meth_lines)
last_endline_index = startline_index + (len(meth_lines) - 1)
return meth_text, (startline_index + 1), (last_endline_index + 1), last_endline_index
def get_java_files(self, directory):
'''
:param directory: path to the main directory of java files
:return: list of java files found
search for all .java files recursively in "directory"
'''
java_files = []
for root, _, files in os.walk(directory):
for file in files:
if file.endswith(".java"):
java_files.append(os.path.join(root, file))
return java_files
def start(self):
java_files = self.get_java_files(self.code_path)
for target_file in java_files:
with open(target_file, 'r') as r
codelines = r.readlines()
code_text = ''.join(codelines)
lex = None
print("working on ", target_file)
tree = javalang.parse.parse(code_text)
methods = {}
for _, method_node in tree.filter(javalang.tree.MethodDeclaration):
startpos, endpos, startline, endline = self.get_method_start_end(method_node, tree)
method_text, startline, endline, lex = self.get_method_text(startpos, endpos, startline, endline, lex, codelines, tree)
methods[method_node.name] = method_text
print(f"total methods in {target_file} = {len(methods)}")
I tried to compile the code on line and got some syntax errors.
For example:
error: ')' expected
if (!1.$assertionsDisabled && ui == null) {
^
but ths code has been downloaded from a maven reposotory, I would expect it has a correct syntax!
Or am I wrong ?
Answers:
That is not valid Java code. For example:
if (!1.$assertionsDisabled && ui == null) {
I suspect that what you have actually done is downloaded a .class
file and decompiled it1. Unfortunately, the decompiler has not worked properly, and what you have there cannot be compiled.
My suggestions are to try to get the actual Java source code (if it exists) or just exclude it from your analysis.
There are hints (e.g. in the import statements) that the ".class" file you have decompiled was originally compiled from Vaadin rather than Java.
1 – Note that a Maven repository will typically hold compiled artifacts rather that source code. You would typically get source code from a source code repository.
I am aiming to obtain all methods names in a java file and count theit lines of code.
I actually managed to obtain something using the javalang parser for python. It identifies all methods but not creator which make me think there is a specific property I should use to check weather the creator exists or not and retrieve its name (any idea?).
It works well (javalang python pkg) most of the fimes but it fails on some files like the following one, which rises an exception
/*
* Decompiled with CFR 0.152.
*/
package com.vaadin.flow.di;
import com.vaadin.flow.component.Component;
import com.vaadin.flow.component.HasElement;
import com.vaadin.flow.component.UI;
import com.vaadin.flow.i18n.I18NProvider;
import com.vaadin.flow.router.NavigationEvent;
import com.vaadin.flow.server.BootstrapListener;
import com.vaadin.flow.server.DependencyFilter;
import com.vaadin.flow.server.VaadinService;
import com.vaadin.flow.server.VaadinServiceInitListener;
import com.vaadin.flow.server.VaadinSession;
import com.vaadin.flow.server.communication.IndexHtmlRequestListener;
import java.io.Serializable;
import java.util.stream.Stream;
public interface Instantiator
extends Serializable {
@Deprecated
public boolean init(VaadinService var1);
public Stream<VaadinServiceInitListener> getServiceInitListeners();
@Deprecated
default public Stream<BootstrapListener> getBootstrapListeners(Stream<BootstrapListener> serviceInitListeners) {
return serviceInitListeners;
}
default public Stream<IndexHtmlRequestListener> getIndexHtmlRequestListeners(Stream<IndexHtmlRequestListener> indexHtmlRequestListeners) {
return indexHtmlRequestListeners;
}
default public Stream<DependencyFilter> getDependencyFilters(Stream<DependencyFilter> serviceInitFilters) {
return serviceInitFilters;
}
public <T> T getOrCreate(Class<T> var1);
default public <T extends HasElement> T createRouteTarget(Class<T> routeTargetType, NavigationEvent event) {
return (T)((HasElement)this.getOrCreate(routeTargetType));
}
public <T extends Component> T createComponent(Class<T> var1);
public static Instantiator get(UI ui) {
if (!1.$assertionsDisabled && ui == null) {
throw new AssertionError();
}
VaadinSession session = ui.getSession();
if (!1.$assertionsDisabled && session == null) {
throw new AssertionError();
}
return session.getService().getInstantiator();
}
default public I18NProvider getI18NProvider() {
return this.getOrCreate(I18NProvider.class);
}
static {
if (1.$assertionsDisabled) {
// empty if block
}
}
}
which causes an exception:
Traceback (most recent call last):
File "/Users/irene/PycharmProjects/pythonProject/main.py", line 113, in <module>
main()
File "/Users/irene/PycharmProjects/pythonProject/main.py", line 109, in main
jlca.start()
File "/Users/irene/PycharmProjects/pythonProject/JLCodeAnalyzer.py", line 80, in start
tree = javalang.parse.parse(code_text)
File "/usr/local/lib/python3.10/site-packages/javalang/parse.py", line 53, in parse
return parser.parse()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 110, in parse
return self.parse_compilation_unit()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 302, in parse_compilation_unit
type_declaration = self.parse_type_declaration()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 347, in parse_type_declaration
return self.parse_class_or_interface_declaration()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 360, in parse_class_or_interface_declaration
type_declaration = self.parse_normal_interface_declaration()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 436, in parse_normal_interface_declaration
body = self.parse_interface_body()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 966, in parse_interface_body
declaration = self.parse_interface_body_declaration()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 981, in parse_interface_body_declaration
declaration = self.parse_interface_member_declaration()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1008, in parse_interface_member_declaration
declaration = self.parse_interface_method_or_field_declaration()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1018, in parse_interface_method_or_field_declaration
member = self.parse_interface_method_or_field_rest()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1035, in parse_interface_method_or_field_rest
rest = self.parse_interface_method_declarator_rest()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1082, in parse_interface_method_declarator_rest
body = self.parse_block()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1274, in parse_block
statement = self.parse_block_statement()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1339, in parse_block_statement
return self.parse_statement()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 1389, in parse_statement
condition = self.parse_par_expression()
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 2032, in parse_par_expression
self.accept(')')
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 131, in accept
self.illegal("Expected '%s'" % (accept,))
File "/usr/local/lib/python3.10/site-packages/javalang/parser.py", line 119, in illegal
raise JavaSyntaxError(description, at)
javalang.parser.JavaSyntaxError
And here is the JLCodeAnalyzer:
import javalang
import os
from javalang import tree
class JLCodeAnalyzer:
def __init__(self, code_path):
self.code_path = code_path
self.codelines = None
def get_method_start_end(self, method_node, tree):
startpos = None
endpos = None
startline = None
endline = None
for path, node in tree:
if startpos is not None and method_node not in path:
endpos = node.position
endline = node.position.line if node.position is not None else None
break
if startpos is None and node == method_node:
startpos = node.position
startline = node.position.line if node.position is not None else None
return startpos, endpos, startline, endline
def get_method_text(self, startpos, endpos, startline, endline, last_endline_index, codelines, tree):
if startpos is None:
return "", None, None, None
else:
startline_index = startline - 1
endline_index = endline - 1 if endpos is not None else None
# 1. check for and fetch annotations
if last_endline_index is not None:
for line in codelines[(last_endline_index + 1):(startline_index)]:
if "@" in line:
startline_index = startline_index - 1
meth_text = "<ST>".join(codelines[startline_index:endline_index])
meth_text = meth_text[:meth_text.rfind("}") + 1]
# 2. remove trailing rbrace for last methods & any external content/comments
# if endpos is None and
if not abs(meth_text.count("}") - meth_text.count("{")) == 0:
# imbalanced braces
brace_diff = abs(meth_text.count("}") - meth_text.count("{"))
for _ in range(brace_diff):
meth_text = meth_text[:meth_text.rfind("}")]
meth_text = meth_text[:meth_text.rfind("}") + 1]
meth_lines = meth_text.split("<ST>")
meth_text = "".join(meth_lines)
last_endline_index = startline_index + (len(meth_lines) - 1)
return meth_text, (startline_index + 1), (last_endline_index + 1), last_endline_index
def get_java_files(self, directory):
'''
:param directory: path to the main directory of java files
:return: list of java files found
search for all .java files recursively in "directory"
'''
java_files = []
for root, _, files in os.walk(directory):
for file in files:
if file.endswith(".java"):
java_files.append(os.path.join(root, file))
return java_files
def start(self):
java_files = self.get_java_files(self.code_path)
for target_file in java_files:
with open(target_file, 'r') as r
codelines = r.readlines()
code_text = ''.join(codelines)
lex = None
print("working on ", target_file)
tree = javalang.parse.parse(code_text)
methods = {}
for _, method_node in tree.filter(javalang.tree.MethodDeclaration):
startpos, endpos, startline, endline = self.get_method_start_end(method_node, tree)
method_text, startline, endline, lex = self.get_method_text(startpos, endpos, startline, endline, lex, codelines, tree)
methods[method_node.name] = method_text
print(f"total methods in {target_file} = {len(methods)}")
I tried to compile the code on line and got some syntax errors.
For example:
error: ')' expected
if (!1.$assertionsDisabled && ui == null) {
^
but ths code has been downloaded from a maven reposotory, I would expect it has a correct syntax!
Or am I wrong ?
That is not valid Java code. For example:
if (!1.$assertionsDisabled && ui == null) {
I suspect that what you have actually done is downloaded a .class
file and decompiled it1. Unfortunately, the decompiler has not worked properly, and what you have there cannot be compiled.
My suggestions are to try to get the actual Java source code (if it exists) or just exclude it from your analysis.
There are hints (e.g. in the import statements) that the ".class" file you have decompiled was originally compiled from Vaadin rather than Java.
1 – Note that a Maven repository will typically hold compiled artifacts rather that source code. You would typically get source code from a source code repository.