count similar values from list of dictionaries
Question:
I have a list of dictionaries and I need to count unique entries.
Then I need to sort the values based on the tuple that is part of the key "corrected_word" (2 < 3 < 33)
mylist = [
{'original_word': 'test1', 'corrected_word': ('test12', 3)},
{'original_word': 'test1', 'corrected_word': ('test12', 3)},
{'original_word': 'test2', 'corrected_word': ('test22', 2)},
{'original_word': 'test3', 'corrected_word': ('test3', 33)},
{'original_word': 'test3', 'corrected_word': ('test3', 33)},
{'original_word': 'test3', 'corrected_word': ('test3', 33)}
]
Expected Output:
mylist = [
{'original_word': 'test2', 'corrected_word': ('test22', 2, 1)},
{'original_word': 'test1', 'corrected_word': ('test12', 3, 2)},
{'original_word': 'test3', 'corrected_word': ('test3', 33, 3)}
]
I have tried this:
from collections import Counter
Counter([str(i) for i in mylist])
But it does not return the list of dictionaries.
Answers:
You can use a combination of defaultdict
, set, and sorted to achieve the desired result.
First, create a defaultdict
with set as the default factory to count the unique entries based on the original_word
and corrected_word
fields:
from collections import defaultdict
unique_entries = defaultdict(set)
for entry in mylist:
unique_entries[(entry['original_word'], entry['corrected_word'])].add(entry['corrected_word'][1])
Then, use a list comprehension to sort the unique entries based on the second element of the corrected_word tuple:
sorted_entries = [{'original_word': k[0], 'corrected_word': (*k[1], len(v))}
for k, v in sorted(unique_entries.items(), key=lambda x: x[0][1][1])]
The key argument of the sorted function specifies that the items should be sorted based on the second element of the corrected_word tuple.
The final result will be a list of dictionaries, where each dictionary contains the original_word
and corrected_word
fields, as well as a count of the number of times the unique entry appears in the original list.
full Code :
from collections import defaultdict
mylist = [
{'original_word': 'test1', 'corrected_word': ('test12', 3)},
{'original_word': 'test1', 'corrected_word': ('test12', 3)},
{'original_word': 'test2', 'corrected_word': ('test22', 2)},
{'original_word': 'test3', 'corrected_word': ('test3', 33)},
{'original_word': 'test3', 'corrected_word': ('test3', 33)},
{'original_word': 'test3', 'corrected_word': ('test3', 33)}
]
unique_entries = defaultdict(set)
for entry in mylist:
unique_entries[(entry['original_word'], entry['corrected_word'])].add(entry['corrected_word'][1])
sorted_entries = [{'original_word': k[0], 'corrected_word': (*k[1], len(v))}
for k, v in sorted(unique_entries.items(), key=lambda x: x[0][1][1])]
print(sorted_entries)
the output :
[{'original_word': 'test2', 'corrected_word': ('test22', 2, 1)}, {'original_word': 'test1', 'corrected_word': ('test12', 3, 2)}, {'original_word': 'test3', 'corrected_word': ('test3', 33, 3)}]
- convert to tuples before counter
- convert back to dicts and add the count
- sort based on number
def dict_and_add_count(item):
original_data, count = item
original_dict = dict(original_data)
original_dict['corrected_word'] = (*original_dict['corrected_word'], count)
return original_dict
counted_unique_tuples = Counter(tuple(d.items()) for d in mylist)
dict_with_count = map(dict_and_add_count, counted_unique_tuples.items())
sorted_dicts = sorted(dict_with_count, key=lambda x: x['corrected_word'][1])
sorted_dicts will be
[{'original_word': 'test2', 'corrected_word': ('test22', 2, 1)},
{'original_word': 'test1', 'corrected_word': ('test12', 3, 2)},
{'original_word': 'test3', 'corrected_word': ('test3', 33, 3)}]
Create a list of tuples, where the first element of each tuple is the original word, and the remaining elements are the elements in the corresponding corrected_word
tuple. Then put this list through Counter
from collections import Counter
ctr = Counter(((item['original_word'], *item['corrected_word']) for item in mylist))
This gives:
Counter({('test3', 'test3', 33): 3, ('test1', 'test12', 3): 2, ('test2', 'test22', 2): 1})
Then, build your result list and sort it by the value you want:
result = sorted([
{'original_word': ow, 'corrected_word': (*cw, count)} for (ow, *cw), count in ctr.items()
], key=lambda item: item['corrected_word'][1])
Which gives the desired result:
[
{'original_word': 'test2', 'corrected_word': ('test22', 2, 1)},
{'original_word': 'test1', 'corrected_word': ('test12', 3, 2)},
{'original_word': 'test3', 'corrected_word': ('test3', 33, 3)}
]
I have a list of dictionaries and I need to count unique entries.
Then I need to sort the values based on the tuple that is part of the key "corrected_word" (2 < 3 < 33)
mylist = [
{'original_word': 'test1', 'corrected_word': ('test12', 3)},
{'original_word': 'test1', 'corrected_word': ('test12', 3)},
{'original_word': 'test2', 'corrected_word': ('test22', 2)},
{'original_word': 'test3', 'corrected_word': ('test3', 33)},
{'original_word': 'test3', 'corrected_word': ('test3', 33)},
{'original_word': 'test3', 'corrected_word': ('test3', 33)}
]
Expected Output:
mylist = [
{'original_word': 'test2', 'corrected_word': ('test22', 2, 1)},
{'original_word': 'test1', 'corrected_word': ('test12', 3, 2)},
{'original_word': 'test3', 'corrected_word': ('test3', 33, 3)}
]
I have tried this:
from collections import Counter
Counter([str(i) for i in mylist])
But it does not return the list of dictionaries.
You can use a combination of defaultdict
, set, and sorted to achieve the desired result.
First, create a defaultdict
with set as the default factory to count the unique entries based on the original_word
and corrected_word
fields:
from collections import defaultdict
unique_entries = defaultdict(set)
for entry in mylist:
unique_entries[(entry['original_word'], entry['corrected_word'])].add(entry['corrected_word'][1])
Then, use a list comprehension to sort the unique entries based on the second element of the corrected_word tuple:
sorted_entries = [{'original_word': k[0], 'corrected_word': (*k[1], len(v))}
for k, v in sorted(unique_entries.items(), key=lambda x: x[0][1][1])]
The key argument of the sorted function specifies that the items should be sorted based on the second element of the corrected_word tuple.
The final result will be a list of dictionaries, where each dictionary contains the original_word
and corrected_word
fields, as well as a count of the number of times the unique entry appears in the original list.
full Code :
from collections import defaultdict
mylist = [
{'original_word': 'test1', 'corrected_word': ('test12', 3)},
{'original_word': 'test1', 'corrected_word': ('test12', 3)},
{'original_word': 'test2', 'corrected_word': ('test22', 2)},
{'original_word': 'test3', 'corrected_word': ('test3', 33)},
{'original_word': 'test3', 'corrected_word': ('test3', 33)},
{'original_word': 'test3', 'corrected_word': ('test3', 33)}
]
unique_entries = defaultdict(set)
for entry in mylist:
unique_entries[(entry['original_word'], entry['corrected_word'])].add(entry['corrected_word'][1])
sorted_entries = [{'original_word': k[0], 'corrected_word': (*k[1], len(v))}
for k, v in sorted(unique_entries.items(), key=lambda x: x[0][1][1])]
print(sorted_entries)
the output :
[{'original_word': 'test2', 'corrected_word': ('test22', 2, 1)}, {'original_word': 'test1', 'corrected_word': ('test12', 3, 2)}, {'original_word': 'test3', 'corrected_word': ('test3', 33, 3)}]
- convert to tuples before counter
- convert back to dicts and add the count
- sort based on number
def dict_and_add_count(item):
original_data, count = item
original_dict = dict(original_data)
original_dict['corrected_word'] = (*original_dict['corrected_word'], count)
return original_dict
counted_unique_tuples = Counter(tuple(d.items()) for d in mylist)
dict_with_count = map(dict_and_add_count, counted_unique_tuples.items())
sorted_dicts = sorted(dict_with_count, key=lambda x: x['corrected_word'][1])
sorted_dicts will be
[{'original_word': 'test2', 'corrected_word': ('test22', 2, 1)},
{'original_word': 'test1', 'corrected_word': ('test12', 3, 2)},
{'original_word': 'test3', 'corrected_word': ('test3', 33, 3)}]
Create a list of tuples, where the first element of each tuple is the original word, and the remaining elements are the elements in the corresponding corrected_word
tuple. Then put this list through Counter
from collections import Counter
ctr = Counter(((item['original_word'], *item['corrected_word']) for item in mylist))
This gives:
Counter({('test3', 'test3', 33): 3, ('test1', 'test12', 3): 2, ('test2', 'test22', 2): 1})
Then, build your result list and sort it by the value you want:
result = sorted([
{'original_word': ow, 'corrected_word': (*cw, count)} for (ow, *cw), count in ctr.items()
], key=lambda item: item['corrected_word'][1])
Which gives the desired result:
[
{'original_word': 'test2', 'corrected_word': ('test22', 2, 1)},
{'original_word': 'test1', 'corrected_word': ('test12', 3, 2)},
{'original_word': 'test3', 'corrected_word': ('test3', 33, 3)}
]