Why "if condition" in two list comprehantions work faster then one cycle for with 2 two conditions?
Question:
import time
from random import random
from typing import List
def test(arr: List[int] | None = None) -> None:
if not arr:
raise TypeError("Variable arr must exist!")
opp = arr.pop()
def check_time(func, msg):
t0 = time.perf_counter()
func()
print(f"{msg}ntime - {time.perf_counter() - t0}")
def first_method():
more_arr = [e for e in arr if e > opp]
less_arr = [e for e in arr if e < opp]
return less_arr, more_arr
def second_method():
more_arr, less_arr = [], []
for e in arr:
if e > opp:
more_arr.append(e)
elif e < opp:
less_arr.append(e)
return less_arr, more_arr
check_time(first_method, "first_method")
check_time(second_method, "second_method")
"""
[RESULT]
first_method
time - 0.1035286999976961
second_method
time - 0.12783881399809616
"""
def main() -> None:
test([int(random() * 1000) for _ in range(1_000_000)])
if __name__ == '__main__':
main()
RESULT:
first_method: time – 0.10790603799978271
second_method: time – 0.1264369229975273
————————————————————-
I wont to know why first_method() faster then second_method()?
How work "if" condition in list_comprehension from the point of view of optimization?
Answers:
Add this third method to see that the retrieving of the append
methods needs a lot of time:
def third_method():
more_arr, less_arr = [], []
m = more_arr.append
l = less_arr.append
for e in arr:
if e > opp:
m(e)
elif e < opp:
l(e)
return less_arr, more_arr
This method is usually even slightly faster than the first one.
You can use the cProfile module to see information about functions or how long they take internally
Run once
if __name__ == '__main__':
import cProfile
cProfile.run('main()')
first_method
time - 0.06351780006662011
second_method
time - 0.1859593999106437
1999012 function calls in 0.493 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.493 0.493 <string>:1(<module>)
2 0.005 0.003 0.250 0.125 test.py:13(check_time)
1 0.000 0.000 0.061 0.061 test.py:18(first_method)
1 0.036 0.036 0.036 0.036 test.py:19(<listcomp>)
1 0.025 0.025 0.025 0.025 test.py:20(<listcomp>)
1 0.120 0.120 0.183 0.183 test.py:23(second_method)
1 0.006 0.006 0.493 0.493 test.py:44(main)
1 0.178 0.178 0.237 0.237 test.py:45(<listcomp>)
1 0.000 0.000 0.250 0.250 test.py:7(test)
1 0.000 0.000 0.493 0.493 {built-in method builtins.exec}
2 0.000 0.000 0.000 0.000 {built-in method builtins.print}
4 0.000 0.000 0.000 0.000 {built-in method time.perf_counter}
998993 0.064 0.000 0.064 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
1 0.000 0.000 0.000 0.000 {method 'pop' of 'list' objects}
1000000 0.059 0.000 0.059 0.000 {method 'random' of '_random.Random' objects}
ncalls
for the number of calls.
tottime
for the total time spent in the given function (and excluding time made in calls to sub-functions)
percall
is the quotient of tottime divided by ncalls
cumtime
is the cumulative time spent in this and all subfunctions (from invocation till exit). This figure is accurate even for recursive functions.
percall
is the quotient of cumtime divided by primitive calls
filename:lineno(function)
provides the respective data of each function
998993 0.064 0.000 0.064 0.000 {method ‘append’ of ‘list’ objects}
append takes more time than list comprehensions
Performance will vary depending on your hardware platform and Python version.
I have modified the original code to use randint() rather than the unnecessary calculation using random(). That has no effect on the timing.
On my machine first_method() is slower than second_method().
It’s worth noting that in first_method() the arr list is enumerated twice (once per comprehension) whereas in second_method() it’s only enumerated once
from random import randint
from time import perf_counter
def test(arr: list) -> None:
opp = arr.pop()
def check_time(func, msg):
t0 = perf_counter()
func()
print(f"{msg} duration = {perf_counter() - t0:.4f}s")
def first_method():
more_arr = [e for e in arr if e > opp]
less_arr = [e for e in arr if e < opp]
return less_arr, more_arr
def second_method():
more_arr, less_arr = [], []
for e in arr:
if e > opp:
more_arr.append(e)
elif e < opp:
less_arr.append(e)
return less_arr, more_arr
check_time(first_method, 'First')
check_time(second_method, 'Second')
def main() -> None:
test([randint(0, 1_000) for _ in range(1_000_000)])
if __name__ == '__main__':
main()
Output:
First duration = 0.0472s
Second duration = 0.0367s
Platform:
Python 3.11.2
macOS 13.2.1
CPU 3GHz 10-Core Intel Xeon W
RAM 32GB
import time
from random import random
from typing import List
def test(arr: List[int] | None = None) -> None:
if not arr:
raise TypeError("Variable arr must exist!")
opp = arr.pop()
def check_time(func, msg):
t0 = time.perf_counter()
func()
print(f"{msg}ntime - {time.perf_counter() - t0}")
def first_method():
more_arr = [e for e in arr if e > opp]
less_arr = [e for e in arr if e < opp]
return less_arr, more_arr
def second_method():
more_arr, less_arr = [], []
for e in arr:
if e > opp:
more_arr.append(e)
elif e < opp:
less_arr.append(e)
return less_arr, more_arr
check_time(first_method, "first_method")
check_time(second_method, "second_method")
"""
[RESULT]
first_method
time - 0.1035286999976961
second_method
time - 0.12783881399809616
"""
def main() -> None:
test([int(random() * 1000) for _ in range(1_000_000)])
if __name__ == '__main__':
main()
RESULT:
first_method: time – 0.10790603799978271
second_method: time – 0.1264369229975273
————————————————————-
I wont to know why first_method() faster then second_method()?
How work "if" condition in list_comprehension from the point of view of optimization?
Add this third method to see that the retrieving of the append
methods needs a lot of time:
def third_method():
more_arr, less_arr = [], []
m = more_arr.append
l = less_arr.append
for e in arr:
if e > opp:
m(e)
elif e < opp:
l(e)
return less_arr, more_arr
This method is usually even slightly faster than the first one.
You can use the cProfile module to see information about functions or how long they take internally
Run once
if __name__ == '__main__':
import cProfile
cProfile.run('main()')
first_method
time - 0.06351780006662011
second_method
time - 0.1859593999106437
1999012 function calls in 0.493 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.493 0.493 <string>:1(<module>)
2 0.005 0.003 0.250 0.125 test.py:13(check_time)
1 0.000 0.000 0.061 0.061 test.py:18(first_method)
1 0.036 0.036 0.036 0.036 test.py:19(<listcomp>)
1 0.025 0.025 0.025 0.025 test.py:20(<listcomp>)
1 0.120 0.120 0.183 0.183 test.py:23(second_method)
1 0.006 0.006 0.493 0.493 test.py:44(main)
1 0.178 0.178 0.237 0.237 test.py:45(<listcomp>)
1 0.000 0.000 0.250 0.250 test.py:7(test)
1 0.000 0.000 0.493 0.493 {built-in method builtins.exec}
2 0.000 0.000 0.000 0.000 {built-in method builtins.print}
4 0.000 0.000 0.000 0.000 {built-in method time.perf_counter}
998993 0.064 0.000 0.064 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
1 0.000 0.000 0.000 0.000 {method 'pop' of 'list' objects}
1000000 0.059 0.000 0.059 0.000 {method 'random' of '_random.Random' objects}
ncalls
for the number of calls.
tottime
for the total time spent in the given function (and excluding time made in calls to sub-functions)
percall
is the quotient of tottime divided by ncalls
cumtime
is the cumulative time spent in this and all subfunctions (from invocation till exit). This figure is accurate even for recursive functions.
percall
is the quotient of cumtime divided by primitive calls
filename:lineno(function)
provides the respective data of each function
998993 0.064 0.000 0.064 0.000 {method ‘append’ of ‘list’ objects}
append takes more time than list comprehensions
Performance will vary depending on your hardware platform and Python version.
I have modified the original code to use randint() rather than the unnecessary calculation using random(). That has no effect on the timing.
On my machine first_method() is slower than second_method().
It’s worth noting that in first_method() the arr list is enumerated twice (once per comprehension) whereas in second_method() it’s only enumerated once
from random import randint
from time import perf_counter
def test(arr: list) -> None:
opp = arr.pop()
def check_time(func, msg):
t0 = perf_counter()
func()
print(f"{msg} duration = {perf_counter() - t0:.4f}s")
def first_method():
more_arr = [e for e in arr if e > opp]
less_arr = [e for e in arr if e < opp]
return less_arr, more_arr
def second_method():
more_arr, less_arr = [], []
for e in arr:
if e > opp:
more_arr.append(e)
elif e < opp:
less_arr.append(e)
return less_arr, more_arr
check_time(first_method, 'First')
check_time(second_method, 'Second')
def main() -> None:
test([randint(0, 1_000) for _ in range(1_000_000)])
if __name__ == '__main__':
main()
Output:
First duration = 0.0472s
Second duration = 0.0367s
Platform:
Python 3.11.2
macOS 13.2.1
CPU 3GHz 10-Core Intel Xeon W
RAM 32GB