python merge tuples elements with index/key
Question:
I’m trying to merge columns values from tuples with an index:
source tuples with a lot of timestamps (1440 ~):
tuples = [('2022-10-15 01:16:00', '5', '', '', 'hdd1', '1234'),
('2022-10-15 01:16:00', '', '4', '', 'hdd1', '1234'),
('2022-10-15 01:17:00', '10', '', '', 'hdd1', '1234'),
('2022-10-15 01:17:00', '', '25', '', 'hdd1', '1234'),
('2022-10-15 01:18:00', '1', '', '', 'hdd1', '1234'),
('2022-10-15 01:18:00', '', '2', '', 'hdd1', '1234'),
...]
the index is the first element.
desired tuples output:
[('2022-10-15 01:16:00', '5', '4', '', 'hdd1', '1234'),
('2022-10-15 01:17:00', '10', '25', '', 'hdd1', '1234'),
('2022-10-15 01:18:00', '1', '2', '', 'hdd1', '1234')]
my code:
tuples = [('2022-10-15 01:16:00', '5', '', '', 'hdd1', '1234'), ('2022-10-15 01:16:00', '', '4', '', 'hdd1', '1234'),('2022-10-15 01:17:00', '10', '', '', 'hdd1', '1234'), ('2022-10-15 01:17:00', '', '25', '', 'hdd1', '1234'), ('2022-10-15 01:18:00', '1', '', '', 'hdd1', '1234'), ('2022-10-15 01:18:00', '', '2', '', 'hdd1', '1234')]
result = []
key = lambda t: t[0]
for letter,items in itertools.groupby(sorted(tuples,key=key),key):
items = list(items)
if len(items) == 1:
result.append(items[0]+(0,0))
else:
result.append(items[0]+items[1][1:])
print(result)
many thanks for any help
Answers:
I think something like this is what you want:
from itertools import groupby
result = []
key = lambda t: t[0]
for _,items in groupby(sorted(tuples, key=key), key):
item = None
for i, it in enumerate(items):
# First item in group. Need to convert to list to edit.
if not item: item = list(it)
# Not first. Update item at correct index.
else: item[1 + i] = it[1 + i]
# Convert back to tuple and save.
result.append(tuple(item))
for item in result: print(item)
Output:
('2022-10-15 01:16:00', '5', '4', '', 'hdd1', '1234')
('2022-10-15 01:17:00', '10', '25', '', 'hdd1', '1234')
('2022-10-15 01:18:00', '1', '2', '', 'hdd1', '1234')
Here is a solution using a dictionary to store the date while iterating over the tuples.
#empty dict with date as key and a list placeholder as value
r = {t[0]:["", "", "", "", ""] for t in tuples}
#iterate over the tuples and populate the dict
for (date, *other_fields) in tuples:
for i, value in enumerate(other_fields):
if value: #skip if it's empty
r[date][i] = value
#convert the dictionary in a list of tuples
r = [tuple([k, *v]) for k,v in r.items()]
print(r)
#[('2022-10-15 01:16:00', '5', '4', '', 'hdd1', '1234'), ('2022-10-15 01:17:00', '10', '25', '', 'hdd1', '1234'), ('2022-10-15 01:18:00', '1', '2', '', 'hdd1', '1234')]
I’m trying to merge columns values from tuples with an index:
source tuples with a lot of timestamps (1440 ~):
tuples = [('2022-10-15 01:16:00', '5', '', '', 'hdd1', '1234'),
('2022-10-15 01:16:00', '', '4', '', 'hdd1', '1234'),
('2022-10-15 01:17:00', '10', '', '', 'hdd1', '1234'),
('2022-10-15 01:17:00', '', '25', '', 'hdd1', '1234'),
('2022-10-15 01:18:00', '1', '', '', 'hdd1', '1234'),
('2022-10-15 01:18:00', '', '2', '', 'hdd1', '1234'),
...]
the index is the first element.
desired tuples output:
[('2022-10-15 01:16:00', '5', '4', '', 'hdd1', '1234'),
('2022-10-15 01:17:00', '10', '25', '', 'hdd1', '1234'),
('2022-10-15 01:18:00', '1', '2', '', 'hdd1', '1234')]
my code:
tuples = [('2022-10-15 01:16:00', '5', '', '', 'hdd1', '1234'), ('2022-10-15 01:16:00', '', '4', '', 'hdd1', '1234'),('2022-10-15 01:17:00', '10', '', '', 'hdd1', '1234'), ('2022-10-15 01:17:00', '', '25', '', 'hdd1', '1234'), ('2022-10-15 01:18:00', '1', '', '', 'hdd1', '1234'), ('2022-10-15 01:18:00', '', '2', '', 'hdd1', '1234')]
result = []
key = lambda t: t[0]
for letter,items in itertools.groupby(sorted(tuples,key=key),key):
items = list(items)
if len(items) == 1:
result.append(items[0]+(0,0))
else:
result.append(items[0]+items[1][1:])
print(result)
many thanks for any help
I think something like this is what you want:
from itertools import groupby
result = []
key = lambda t: t[0]
for _,items in groupby(sorted(tuples, key=key), key):
item = None
for i, it in enumerate(items):
# First item in group. Need to convert to list to edit.
if not item: item = list(it)
# Not first. Update item at correct index.
else: item[1 + i] = it[1 + i]
# Convert back to tuple and save.
result.append(tuple(item))
for item in result: print(item)
Output:
('2022-10-15 01:16:00', '5', '4', '', 'hdd1', '1234')
('2022-10-15 01:17:00', '10', '25', '', 'hdd1', '1234')
('2022-10-15 01:18:00', '1', '2', '', 'hdd1', '1234')
Here is a solution using a dictionary to store the date while iterating over the tuples.
#empty dict with date as key and a list placeholder as value
r = {t[0]:["", "", "", "", ""] for t in tuples}
#iterate over the tuples and populate the dict
for (date, *other_fields) in tuples:
for i, value in enumerate(other_fields):
if value: #skip if it's empty
r[date][i] = value
#convert the dictionary in a list of tuples
r = [tuple([k, *v]) for k,v in r.items()]
print(r)
#[('2022-10-15 01:16:00', '5', '4', '', 'hdd1', '1234'), ('2022-10-15 01:17:00', '10', '25', '', 'hdd1', '1234'), ('2022-10-15 01:18:00', '1', '2', '', 'hdd1', '1234')]