Python list: Group by most often seen
Question:
I have a 2d list [index, value] with repeated indexes. I need to select unique indexes with most often occurring value or if values seen equal number of times – the last one.
[
[0,-1],
[1, 0],
[1, 1],
[2, 1],
[2,-1],
[2, 1],
]
=>
[
[0,-1],
[1, 1], # last seen
[2, 1], # most often seen
]
I can use numpy
or any other popular lib instead if it makes it easier
Answers:
You can do like this,
from itertools import groupby
from collections import Counter
result = []
for index, lst in groupby(l, key=lambda x:x[0]):
lst = [i[1] for i in lst]
if len(lst) == len(set(lst)) or len(set(Counter(lst).values())) == 1:
item = lst[-1]
else:
item = max(set(lst), key=lst.count)
result.append([index, item])
In [160]: result
Out[160]: [[0, -1], [1, 1], [2, 1]]
len(lst) == len(set(lst))
-> Idenity if the list doesn’t have any replication.
len(set(Counter(lst).values())) == 1
-> Handing the special condition mentioned by @sajad.
long_list=[[0,-1],[0,-1],[1, 0],[1, 1],[2, 1],[2,-1],[2, 1],]
short_list=[]
for element in long_list:
if element not in short_list:
short_list.append(element)
print(short_list)
Output: [[0, -1], [1, 0], [1, 1], [2, 1], [2, -1]]
from collections import defaultdict
input = [
[0,-1],
[1, 0],
[1, 1],
[2, 1],
[2,-1],
[2, 1],
]
out = []
counter_map = defaultdict(lambda: defaultdict(int))
for index, value in input:
counter_map[index][value] += 1
for index, value_count_maps in counter_map.items():
best = None
for value, count in value_count_maps.items():
if best == None or value >= best[0]:
best = [value, count]
out.append(best)
print(out) # [[0, -1], [1, 1], [2, -1]]
I would keep it simple instead, and note that the ordering in the answer matters; so it’s necessary to preserve it along with other bookkeeping structures.
from collections import defaultdict
l = [
[2, 1],
[2, 1],
[2, 2],
[2, 2],
[2,-1],
[2,-1]
]
result = []
groups = defaultdict(list)
for index, value in l:
groups[index].append(value) # keep the list (ordered series) of values for each index
for index, group in groups.items():
best_count = 0
best_value = None
counts = defaultdict(int)
for value in group:
counts[value] += 1 # count each value for the index
# we look for the most frequest value, and in case of ties,
# we prefer the one which has the last occurrence in the
# series (list) of values
if counts[value] >= best_count:
best_value = value
best_count = counts[value]
result.append([index, best_value])
print(result)
I have a 2d list [index, value] with repeated indexes. I need to select unique indexes with most often occurring value or if values seen equal number of times – the last one.
[
[0,-1],
[1, 0],
[1, 1],
[2, 1],
[2,-1],
[2, 1],
]
=>
[
[0,-1],
[1, 1], # last seen
[2, 1], # most often seen
]
I can use numpy
or any other popular lib instead if it makes it easier
You can do like this,
from itertools import groupby
from collections import Counter
result = []
for index, lst in groupby(l, key=lambda x:x[0]):
lst = [i[1] for i in lst]
if len(lst) == len(set(lst)) or len(set(Counter(lst).values())) == 1:
item = lst[-1]
else:
item = max(set(lst), key=lst.count)
result.append([index, item])
In [160]: result
Out[160]: [[0, -1], [1, 1], [2, 1]]
len(lst) == len(set(lst))
-> Idenity if the list doesn’t have any replication.
len(set(Counter(lst).values())) == 1
-> Handing the special condition mentioned by @sajad.
long_list=[[0,-1],[0,-1],[1, 0],[1, 1],[2, 1],[2,-1],[2, 1],]
short_list=[]
for element in long_list:
if element not in short_list:
short_list.append(element)
print(short_list)
Output: [[0, -1], [1, 0], [1, 1], [2, 1], [2, -1]]
from collections import defaultdict
input = [
[0,-1],
[1, 0],
[1, 1],
[2, 1],
[2,-1],
[2, 1],
]
out = []
counter_map = defaultdict(lambda: defaultdict(int))
for index, value in input:
counter_map[index][value] += 1
for index, value_count_maps in counter_map.items():
best = None
for value, count in value_count_maps.items():
if best == None or value >= best[0]:
best = [value, count]
out.append(best)
print(out) # [[0, -1], [1, 1], [2, -1]]
I would keep it simple instead, and note that the ordering in the answer matters; so it’s necessary to preserve it along with other bookkeeping structures.
from collections import defaultdict
l = [
[2, 1],
[2, 1],
[2, 2],
[2, 2],
[2,-1],
[2,-1]
]
result = []
groups = defaultdict(list)
for index, value in l:
groups[index].append(value) # keep the list (ordered series) of values for each index
for index, group in groups.items():
best_count = 0
best_value = None
counts = defaultdict(int)
for value in group:
counts[value] += 1 # count each value for the index
# we look for the most frequest value, and in case of ties,
# we prefer the one which has the last occurrence in the
# series (list) of values
if counts[value] >= best_count:
best_value = value
best_count = counts[value]
result.append([index, best_value])
print(result)