Python: Merge two list of diferent objects by attribute
Question:
I’m trying to find an efficient way to merge two list of python objects (classes) with diferent structures and merge them into a new list of new object. The code:
from datetime import datetime
class StructureOne(object):
def __init__(self, date_time: datetime, name: str):
self.date_time: datetime = date_time
self.name: str = name
self.value = None
def set_value(self,value:float):
self.value = value
class StructureTwo(object):
def __init__(self, date_time: datetime, value: float):
self.date_time = date_time
self.value: float = value
def merge_lists(list_one: list[StructureOne], list_two: list[StructureTwo]) -> list[StructureOne]:
for element_one in list_one:
i = 0
while i < len(list_two) and element_one.value is None:
if element_one.date_time == list_two[i].date_time:
element_one.set_value(value=list_two[i].value)
i += 1
return list_one
list_one: list[StructureOne] = [
StructureOne(date_time=datetime(2022, 1, 1, 0), name='zero'),
StructureOne(date_time=datetime(2022, 1, 1, 1), name='one'),
StructureOne(date_time=datetime(2022, 1, 1, 2), name='two'),
StructureOne(date_time=datetime(2022, 1, 1, 3), name='three'),
]
list_two: list[StructureTwo] = [
StructureTwo(date_time=datetime(2022, 1, 1, 0), value=0),
StructureTwo(date_time=datetime(2022, 1, 1, 1), value=1),
StructureTwo(date_time=datetime(2022, 1, 1, 2), value=2),
StructureTwo(date_time=datetime(2022, 1, 1, 3), value=3),
]
merged_list: list[StructureOne] = merge_lists(list_one=list_one, list_two=list_two)
The desired result is
list_one: list[StructureOne] = [
StructureOne(date_time=datetime(2022, 1, 1, 0), name='zero', value=0),
StructureOne(date_time=datetime(2022, 1, 1, 1), name='one', value=1),
StructureOne(date_time=datetime(2022, 1, 1, 2), name='two', value=2),
StructureOne(date_time=datetime(2022, 1, 1, 3), name='three', value=3),
]
We are trying to not use external libraries like py-linq.
Answers:
Have you tried py_linq or some other library?
Thanks
Assumptions:
- The lists might not be sorted.
- The
date_time
field is unique within each list.
- It is okey if the output is sorted.
- You want to populate the
value
of the objects in list_one
with the value
from the objects in list_two
on matching date_time
.
- You only want to populate the
value
of an object in list_one
if it is None
.
- In your code, you only populate the
value
if it is not None
; I assume that you want to do the opposite.
def merge_lists(list_one: list[StructureOne], list_two: list[StructureTwo]) -> list[StructureOne]:
# sort lists by date_time
list_1 = sorted(list_one, key=lambda x: x.date_time)
list_2 = sorted(list_two, key=lambda x: x.date_time)
# merge sort
i, j = 0, 0
while i < len(list_1) and j < len(list_2):
if list_1[i].date_time < list_2[j].date_time:
i += 1
elif list_1[i].date_time > list_2[j].date_time:
j += 1
else:
# date_time matches
# populate value from list_two into list_one if value is None
if list_1[i].value is None:
list_1[i].value = list_2[j].value
i += 1
j += 1
return list_1
If all you want is to change the value
attribute of StructureOne
to the value
of StructureTwo
in the order of each list then you can do this to modify the elements in list_one
:
for elem_one, elem_two in zip(list_one,list_two):
elem_one.value = elem_two.value
You don’t even need to have the value
attribute in StructureOne
, it will work the same:
class StructureOne(object):
def __init__(self, date_time: datetime, name: str):
self.date_time: datetime = date_time
self.name: str = name
If by merging you mean to get rid of list_two
you can delete the variable so it will get discarded.
del list_two
Printing the elements after the merge will have your value members updated:
for e in list_one:
print(e.date_time,e.name,e.value)
# Output:
2022-01-01 00:00:00 zero 0
2022-01-01 01:00:00 one 1
2022-01-01 02:00:00 two 2
2022-01-01 03:00:00 three 3
Your __init__
function has a lot of duplication in names, that’s why it can be a good idea to use dataclasses
, it will work the same, but it’s easier to read and maintain:
from dataclasses import dataclass
@dataclass
class StructureOne:
date_time : datetime.datetime
name : str
@dataclass
class StructureTwo:
date_time : datetime.datetime
value : float
I’m trying to find an efficient way to merge two list of python objects (classes) with diferent structures and merge them into a new list of new object. The code:
from datetime import datetime
class StructureOne(object):
def __init__(self, date_time: datetime, name: str):
self.date_time: datetime = date_time
self.name: str = name
self.value = None
def set_value(self,value:float):
self.value = value
class StructureTwo(object):
def __init__(self, date_time: datetime, value: float):
self.date_time = date_time
self.value: float = value
def merge_lists(list_one: list[StructureOne], list_two: list[StructureTwo]) -> list[StructureOne]:
for element_one in list_one:
i = 0
while i < len(list_two) and element_one.value is None:
if element_one.date_time == list_two[i].date_time:
element_one.set_value(value=list_two[i].value)
i += 1
return list_one
list_one: list[StructureOne] = [
StructureOne(date_time=datetime(2022, 1, 1, 0), name='zero'),
StructureOne(date_time=datetime(2022, 1, 1, 1), name='one'),
StructureOne(date_time=datetime(2022, 1, 1, 2), name='two'),
StructureOne(date_time=datetime(2022, 1, 1, 3), name='three'),
]
list_two: list[StructureTwo] = [
StructureTwo(date_time=datetime(2022, 1, 1, 0), value=0),
StructureTwo(date_time=datetime(2022, 1, 1, 1), value=1),
StructureTwo(date_time=datetime(2022, 1, 1, 2), value=2),
StructureTwo(date_time=datetime(2022, 1, 1, 3), value=3),
]
merged_list: list[StructureOne] = merge_lists(list_one=list_one, list_two=list_two)
The desired result is
list_one: list[StructureOne] = [
StructureOne(date_time=datetime(2022, 1, 1, 0), name='zero', value=0),
StructureOne(date_time=datetime(2022, 1, 1, 1), name='one', value=1),
StructureOne(date_time=datetime(2022, 1, 1, 2), name='two', value=2),
StructureOne(date_time=datetime(2022, 1, 1, 3), name='three', value=3),
]
We are trying to not use external libraries like py-linq.
Have you tried py_linq or some other library?
Thanks
Assumptions:
- The lists might not be sorted.
- The
date_time
field is unique within each list. - It is okey if the output is sorted.
- You want to populate the
value
of the objects inlist_one
with thevalue
from the objects inlist_two
on matchingdate_time
. - You only want to populate the
value
of an object inlist_one
if it isNone
. - In your code, you only populate the
value
if it isnot None
; I assume that you want to do the opposite.
def merge_lists(list_one: list[StructureOne], list_two: list[StructureTwo]) -> list[StructureOne]:
# sort lists by date_time
list_1 = sorted(list_one, key=lambda x: x.date_time)
list_2 = sorted(list_two, key=lambda x: x.date_time)
# merge sort
i, j = 0, 0
while i < len(list_1) and j < len(list_2):
if list_1[i].date_time < list_2[j].date_time:
i += 1
elif list_1[i].date_time > list_2[j].date_time:
j += 1
else:
# date_time matches
# populate value from list_two into list_one if value is None
if list_1[i].value is None:
list_1[i].value = list_2[j].value
i += 1
j += 1
return list_1
If all you want is to change the value
attribute of StructureOne
to the value
of StructureTwo
in the order of each list then you can do this to modify the elements in list_one
:
for elem_one, elem_two in zip(list_one,list_two):
elem_one.value = elem_two.value
You don’t even need to have the value
attribute in StructureOne
, it will work the same:
class StructureOne(object):
def __init__(self, date_time: datetime, name: str):
self.date_time: datetime = date_time
self.name: str = name
If by merging you mean to get rid of list_two
you can delete the variable so it will get discarded.
del list_two
Printing the elements after the merge will have your value members updated:
for e in list_one:
print(e.date_time,e.name,e.value)
# Output:
2022-01-01 00:00:00 zero 0
2022-01-01 01:00:00 one 1
2022-01-01 02:00:00 two 2
2022-01-01 03:00:00 three 3
Your __init__
function has a lot of duplication in names, that’s why it can be a good idea to use dataclasses
, it will work the same, but it’s easier to read and maintain:
from dataclasses import dataclass
@dataclass
class StructureOne:
date_time : datetime.datetime
name : str
@dataclass
class StructureTwo:
date_time : datetime.datetime
value : float