Transform list to list of consecutive elements
Question:
Any idea how i could do the following in python >3.7 ?
a = ["a","b","c","c","a","a","a"]
b = F(a) => [[0],[1],[2,3],[4,5,6]]
The use case is a list with roughly 10e6 elements.
Thx
Answers:
IIUC, one approach:
from itertools import groupby, count
a = ["a","b","c","c","a","a","a"]
counter = count()
res = [[next(counter) for _ in vs] for _, vs in groupby(a)]
print(res)
Output
[[0], [1], [2, 3], [4, 5, 6]]
An alternative using enumerate
:
from itertools import groupby
from operator import itemgetter
a = ["a","b","c","c","a","a","a"]
res = [[v for v, _ in vs] for k, vs in groupby(enumerate(a), key=itemgetter(1))]
print(res)
a = ["a","b","c","c","a","a","a"]
res = []
prev_item = None
for i, item in enumerate(a):
if item == prev_item:
res[-1].append(i)
else:
res.append([i])
prev_item = item
I was interested in comparing the three approaches (loop vs functional vs functional enumerate). It seemed to me that the functional approach traverses the list two times (first with groupby and then for reducing the group, not the whole list at once but piece by piece traversing two times). So just for your reference posting the results, I think they are interesting in educational purposes.
Here is the code for comparison:
from itertools import groupby, count
from operator import itemgetter
import random
random.seed(0)
input_letter = ["a", "b", "c"]
a = [random.choice(input_letter) for _ in range(100000)]
def loop_approach(a: list) -> list:
res = []
prev_item = None
for i, item in enumerate(a):
if item == prev_item:
res[-1].append(i)
else:
res.append([i])
prev_item = item
return res
def functional_approach(a:list) -> list:
counter = count()
return [[next(counter) for _ in vs] for _, vs in groupby(a)]
def functional_enumerate_approach(a:list) -> list:
return [[v for v, _ in vs] for k, vs in groupby(enumerate(a), key=itemgetter(1))]
assert loop_approach(a) == functional_approach(a)
assert loop_approach(a) == functional_enumerate_approach(a)
%timeit loop_approach(a)
%timeit functional_approach(a)
%timeit functional_enumerate_approach(a)
Output:
7.23 ms ± 93.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
12.7 ms ± 63.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
13 ms ± 150 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
In other words, although functional approach is no doubt more concise and beautiful, loop makes it roughly two times faster
Any idea how i could do the following in python >3.7 ?
a = ["a","b","c","c","a","a","a"]
b = F(a) => [[0],[1],[2,3],[4,5,6]]
The use case is a list with roughly 10e6 elements.
Thx
IIUC, one approach:
from itertools import groupby, count
a = ["a","b","c","c","a","a","a"]
counter = count()
res = [[next(counter) for _ in vs] for _, vs in groupby(a)]
print(res)
Output
[[0], [1], [2, 3], [4, 5, 6]]
An alternative using enumerate
:
from itertools import groupby
from operator import itemgetter
a = ["a","b","c","c","a","a","a"]
res = [[v for v, _ in vs] for k, vs in groupby(enumerate(a), key=itemgetter(1))]
print(res)
a = ["a","b","c","c","a","a","a"]
res = []
prev_item = None
for i, item in enumerate(a):
if item == prev_item:
res[-1].append(i)
else:
res.append([i])
prev_item = item
I was interested in comparing the three approaches (loop vs functional vs functional enumerate). It seemed to me that the functional approach traverses the list two times (first with groupby and then for reducing the group, not the whole list at once but piece by piece traversing two times). So just for your reference posting the results, I think they are interesting in educational purposes.
Here is the code for comparison:
from itertools import groupby, count
from operator import itemgetter
import random
random.seed(0)
input_letter = ["a", "b", "c"]
a = [random.choice(input_letter) for _ in range(100000)]
def loop_approach(a: list) -> list:
res = []
prev_item = None
for i, item in enumerate(a):
if item == prev_item:
res[-1].append(i)
else:
res.append([i])
prev_item = item
return res
def functional_approach(a:list) -> list:
counter = count()
return [[next(counter) for _ in vs] for _, vs in groupby(a)]
def functional_enumerate_approach(a:list) -> list:
return [[v for v, _ in vs] for k, vs in groupby(enumerate(a), key=itemgetter(1))]
assert loop_approach(a) == functional_approach(a)
assert loop_approach(a) == functional_enumerate_approach(a)
%timeit loop_approach(a)
%timeit functional_approach(a)
%timeit functional_enumerate_approach(a)
Output:
7.23 ms ± 93.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
12.7 ms ± 63.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
13 ms ± 150 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
In other words, although functional approach is no doubt more concise and beautiful, loop makes it roughly two times faster