Create a unique id from each id in a list of dictionaries
Question:
I have a list of dictionaries with the key named id
, however, some of these are duplicates. This is a sample of my dataset, my actual dicts have multiple keys but I’m filtering my id
. Essentially, I do not want to remove the ids, instead I want to create a new id value by incrementing a number onto it to create a new number. However, this number must not be existing in the current list of ids. However, I find that one but all are transformed.
For example:
import copy
ids = [{'id': 44},{'id': 49},{'id': 48},{'id': 53},{'id': 46},{'id': 51},{'id': 45},{'id': 50},{'id': 47},{'id': 52},{'id': 5091},{'id': 5060},{'id': 5002},{'id': 5071},{'id': 5011},{'id': 5027},{'id': 26},{'id': 29},{'id': 5034},{'id': 5086},{'id': 5063},{'id': 5022},{'id': 5014},{'id': 74},{'id': 5061},{'id': 4},{'id': 5013},{'id': 5076},{'id': 5055},{'id': 5006},{'id': 5051},{'id': 5032},{'id': 5008},{'id': 14},{'id': 35},{'id': 5},{'id': 7},{'id': 64},{'id': 5049},{'id': 5021},{'id': 5059},{'id': 5029},{'id': 6},{'id': 30},{'id': 23},{'id': 31},{'id': 5017},{'id': 8},{'id': 17},{'id': 24},{'id': 5007},{'id': 5033},{'id': 5065},{'id': 5020},{'id': 5085},{'id': 5025},{'id': 5068},{'id': 5041},{'id': 5048},{'id': 5056},{'id': 5080},{'id': 5070},{'id': 5072},{'id': 5077},{'id': 5073},{'id': 5067},{'id': 5088},{'id': 5010},{'id': 5040},{'id': 5075},{'id': 5035},{'id': 5043},{'id': 5012},{'id': 5052},{'id': 5081},{'id': 5004},{'id': 57},{'id': 56},{'id': 63},{'id': 62},{'id': 55},{'id': 54},{'id': 22},{'id': 59},{'id': 58},{'id': 61},{'id': 60},{'id': 21},{'id': 5046},{'id': 5024},{'id': 5036},{'id': 5058},{'id': 5053},{'id': 5044},{'id': 38},{'id': 36},{'id': 5050},{'id': 5047},{'id': 5079},{'id': 5062},{'id': 37},{'id': 13},{'id': 3},{'id': 27},{'id': 5078},{'id': 5009},{'id': 5069},{'id': 5092},{'id': 5090},{'id': 66},{'id': 81},{'id': 82},{'id': 70},{'id': 67},{'id': 75},{'id': 78},{'id': 76},{'id': 5001},{'id': 68},{'id': 69},{'id': 79},{'id': 65},{'id': 71},{'id': 77},{'id': 73},{'id': 72},{'id': 5031},{'id': 5083},{'id': 5037},{'id': 5003},{'id': 15},{'id': 16},{'id': 25},{'id': 32},{'id': 5023},{'id': 2},{'id': 5038},{'id': 5030},{'id': 5019},{'id': 5087},{'id': 5089},{'id': 5082},{'id': 5028},{'id': 5054},{'id': 5074},{'id': 5018},{'id': 5015},{'id': 5064},{'id': 5045},{'id': 5057},{'id': 5084},{'id': 5026},{'id': 5016},{'id': 12},{'id': 11},{'id': 10},{'id': 5066},{'id': 5042},{'id': 5005},{'id': 28},{'id': 80},{'id': 17131},{'id': 6646},{'id': 6440},{'id': 11253},{'id': 6254},{'id': 6240},{'id': 10547},{'id': 10495},{'id': 8179},{'id': 8139},{'id': 10726},{'id': 17285},{'id': 6566},{'id': 10760},{'id': 16521},{'id': 10732},{'id': 17627},{'id': 10179},{'id': 17433},{'id': 17437},{'id': 17435},{'id': 6554},{'id': 6560},{'id': 6562},{'id': 6664},{'id': 12507},{'id': 12509},{'id': 11275},{'id': 6606},{'id': 17287},{'id': 17289},{'id': 12511},{'id': 12221},{'id': 8705},{'id': 17129},{'id': 8691},{'id': 11078},{'id': 11697},{'id': 6604},{'id': 6590},{'id': 17413},{'id': 17217},{'id': 11076},{'id': 10724},{'id': 11487},{'id': 5188},{'id': 6049},{'id': 6556},{'id': 6558},{'id': 6700},{'id': 6548},{'id': 5437},{'id': 4},{'id': 6244},{'id': 5061},{'id': 10085},{'id': 12707},{'id': 35},{'id': 64},{'id': 18003},{'id': 6442},{'id': 6710},{'id': 12709},{'id': 11255},{'id': 11273},{'id': 17279},{'id': 17277},{'id': 17975},{'id': 16981},{'id': 6676},{'id': 6550},{'id': 6842},{'id': 37},{'id': 11054},{'id': 5444},{'id': 6426},{'id': 70},{'id': 67},{'id': 75},{'id': 6234},{'id': 8880},{'id': 8899},{'id': 13835},{'id': 14759},{'id': 7112},{'id': 5017},{'id': 6236},{'id': 9923},{'id': 16817},{'id': 5228},{'id': 5029}]
aID = copy.deepcopy(ids)
uniques = set([ x.get('id') for x in ids ])
listOf = [ x.get('id') for x in ids ]
G = True
d= 0
for items in aID:
if items.get('id') in uniques and listOf.count(items.get('id')) > 1:
G = True
while(G):
d += 1
if(items.get('id') + d not in uniques):
items.update({'id': items.get('id')+ d})
G = False
else:
continue
When I check for any duplicate ids I get a few:
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
Where did I go wrong in my loop?
I have attempted the following, which works fine on the data above but may produce duplicates on my actual dataset:
aID = copy.deepcopy(ids)
uniques = set()
for item in aID:
d = 0
while item['id'] + d in uniques:
d += 1
item['id'] += d
uniques.add(item['id'])
while items.get('id') + d not in uniques:
d+=1
items.update({'id':items.get('id')+d})
break
Answers:
- You need to add the new ID to
uniques
- You should initialize
uniques
to an empty set, otherwise you’ll find a match for the first instance of each ID and increment it unnecessarily.
- You should restart
d
at 0
for each ID.
else: continue
is unnecessary. Loops continue automatically unless you break out of them. continue
is generally only needed if you want to go to the next iteration from the middle of the loop body.
aID = copy.deepcopy(ids)
uniques = set()
for item in aID:
d = 0
while item['id'] + d in uniques:
d += 1
item['id'] += d
uniques.add(item['id'])
I have a list of dictionaries with the key named id
, however, some of these are duplicates. This is a sample of my dataset, my actual dicts have multiple keys but I’m filtering my id
. Essentially, I do not want to remove the ids, instead I want to create a new id value by incrementing a number onto it to create a new number. However, this number must not be existing in the current list of ids. However, I find that one but all are transformed.
For example:
import copy
ids = [{'id': 44},{'id': 49},{'id': 48},{'id': 53},{'id': 46},{'id': 51},{'id': 45},{'id': 50},{'id': 47},{'id': 52},{'id': 5091},{'id': 5060},{'id': 5002},{'id': 5071},{'id': 5011},{'id': 5027},{'id': 26},{'id': 29},{'id': 5034},{'id': 5086},{'id': 5063},{'id': 5022},{'id': 5014},{'id': 74},{'id': 5061},{'id': 4},{'id': 5013},{'id': 5076},{'id': 5055},{'id': 5006},{'id': 5051},{'id': 5032},{'id': 5008},{'id': 14},{'id': 35},{'id': 5},{'id': 7},{'id': 64},{'id': 5049},{'id': 5021},{'id': 5059},{'id': 5029},{'id': 6},{'id': 30},{'id': 23},{'id': 31},{'id': 5017},{'id': 8},{'id': 17},{'id': 24},{'id': 5007},{'id': 5033},{'id': 5065},{'id': 5020},{'id': 5085},{'id': 5025},{'id': 5068},{'id': 5041},{'id': 5048},{'id': 5056},{'id': 5080},{'id': 5070},{'id': 5072},{'id': 5077},{'id': 5073},{'id': 5067},{'id': 5088},{'id': 5010},{'id': 5040},{'id': 5075},{'id': 5035},{'id': 5043},{'id': 5012},{'id': 5052},{'id': 5081},{'id': 5004},{'id': 57},{'id': 56},{'id': 63},{'id': 62},{'id': 55},{'id': 54},{'id': 22},{'id': 59},{'id': 58},{'id': 61},{'id': 60},{'id': 21},{'id': 5046},{'id': 5024},{'id': 5036},{'id': 5058},{'id': 5053},{'id': 5044},{'id': 38},{'id': 36},{'id': 5050},{'id': 5047},{'id': 5079},{'id': 5062},{'id': 37},{'id': 13},{'id': 3},{'id': 27},{'id': 5078},{'id': 5009},{'id': 5069},{'id': 5092},{'id': 5090},{'id': 66},{'id': 81},{'id': 82},{'id': 70},{'id': 67},{'id': 75},{'id': 78},{'id': 76},{'id': 5001},{'id': 68},{'id': 69},{'id': 79},{'id': 65},{'id': 71},{'id': 77},{'id': 73},{'id': 72},{'id': 5031},{'id': 5083},{'id': 5037},{'id': 5003},{'id': 15},{'id': 16},{'id': 25},{'id': 32},{'id': 5023},{'id': 2},{'id': 5038},{'id': 5030},{'id': 5019},{'id': 5087},{'id': 5089},{'id': 5082},{'id': 5028},{'id': 5054},{'id': 5074},{'id': 5018},{'id': 5015},{'id': 5064},{'id': 5045},{'id': 5057},{'id': 5084},{'id': 5026},{'id': 5016},{'id': 12},{'id': 11},{'id': 10},{'id': 5066},{'id': 5042},{'id': 5005},{'id': 28},{'id': 80},{'id': 17131},{'id': 6646},{'id': 6440},{'id': 11253},{'id': 6254},{'id': 6240},{'id': 10547},{'id': 10495},{'id': 8179},{'id': 8139},{'id': 10726},{'id': 17285},{'id': 6566},{'id': 10760},{'id': 16521},{'id': 10732},{'id': 17627},{'id': 10179},{'id': 17433},{'id': 17437},{'id': 17435},{'id': 6554},{'id': 6560},{'id': 6562},{'id': 6664},{'id': 12507},{'id': 12509},{'id': 11275},{'id': 6606},{'id': 17287},{'id': 17289},{'id': 12511},{'id': 12221},{'id': 8705},{'id': 17129},{'id': 8691},{'id': 11078},{'id': 11697},{'id': 6604},{'id': 6590},{'id': 17413},{'id': 17217},{'id': 11076},{'id': 10724},{'id': 11487},{'id': 5188},{'id': 6049},{'id': 6556},{'id': 6558},{'id': 6700},{'id': 6548},{'id': 5437},{'id': 4},{'id': 6244},{'id': 5061},{'id': 10085},{'id': 12707},{'id': 35},{'id': 64},{'id': 18003},{'id': 6442},{'id': 6710},{'id': 12709},{'id': 11255},{'id': 11273},{'id': 17279},{'id': 17277},{'id': 17975},{'id': 16981},{'id': 6676},{'id': 6550},{'id': 6842},{'id': 37},{'id': 11054},{'id': 5444},{'id': 6426},{'id': 70},{'id': 67},{'id': 75},{'id': 6234},{'id': 8880},{'id': 8899},{'id': 13835},{'id': 14759},{'id': 7112},{'id': 5017},{'id': 6236},{'id': 9923},{'id': 16817},{'id': 5228},{'id': 5029}]
aID = copy.deepcopy(ids)
uniques = set([ x.get('id') for x in ids ])
listOf = [ x.get('id') for x in ids ]
G = True
d= 0
for items in aID:
if items.get('id') in uniques and listOf.count(items.get('id')) > 1:
G = True
while(G):
d += 1
if(items.get('id') + d not in uniques):
items.update({'id': items.get('id')+ d})
G = False
else:
continue
When I check for any duplicate ids I get a few:
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
Where did I go wrong in my loop?
I have attempted the following, which works fine on the data above but may produce duplicates on my actual dataset:
aID = copy.deepcopy(ids)
uniques = set()
for item in aID:
d = 0
while item['id'] + d in uniques:
d += 1
item['id'] += d
uniques.add(item['id'])
while items.get('id') + d not in uniques:
d+=1
items.update({'id':items.get('id')+d})
break
- You need to add the new ID to
uniques
- You should initialize
uniques
to an empty set, otherwise you’ll find a match for the first instance of each ID and increment it unnecessarily. - You should restart
d
at0
for each ID. else: continue
is unnecessary. Loops continue automatically unless you break out of them.continue
is generally only needed if you want to go to the next iteration from the middle of the loop body.
aID = copy.deepcopy(ids)
uniques = set()
for item in aID:
d = 0
while item['id'] + d in uniques:
d += 1
item['id'] += d
uniques.add(item['id'])