How to make a nested dictionary from an SqlAlchemy output (Python)?
Question:
I have the following output from an SqlAlchemy select
operator.
| device_id | event_name | event_count |
| :--------:| :-----------------: |:-----------:|
| 123456 | speed more than 100 | 3 |
| 123456 | speed less than 12 | 0 |
| 334455 | out of NYC | 5 |
| 111111 | in UCSD campus | 1 |
Now I want to save this result into a nested dictionary with the following format, and I don’t know how to do it efficiently.
I need to have a dictionary in which its keys are the device_id
s, and the values are dictionaries in which the keys are event_name
s and the values are event_count
s.
{'123456' : {'speed more than 100' : 3,
'speed less than 12': 0},
'334455' : {'out of NYC' : 5},
'111111' : {'in UCSD campus' : 1}
}
Here is my code.
def count_per_event_json(self, count_per_event_query_result):
result = {}
print(count_per_event_query_result)
for item in enumerate(count_per_event_query_result):
if item[0] not in result.keys():
# result[item[0]] = {I don't know how to fill this inner dict'}
return result
I also calculate the input argument and pass it to the function like this:
def count_per_event(self):
count_per_event_query = select(EventsModel.device_id, EventsModel.event_name,
func.count(EventsModel.rule_table_id))
.where(EventsModel.timestamp <= self.max_utc_timestamp)
.where(EventsModel.timestamp >= self.min_utc_timestamp)
.group_by(EventsModel.device_id, EventsModel.rule_table_id)
.execution_options(synchronize_session="fetch")
return count_per_event_query
async def cube_calculator(self):
async with async_session() as session:
count_per_event_query_result = await session.execute(self.count_per_event())
json = self.count_per_event_json(count_per_event_query_result) # the type of count_per_event_query is <class 'sqlalchemy.engine.result.ChunkedIteratorResult'>
Answers:
You could do this with pandas:
import pandas as pd
# Use pandas.read_sql to read the query results into a dataframe
df = pd.read_sql(select_query, con=your_db_connection)
# Use the pandas groupby function to group the dataframe by device_id
grouped_df = df.groupby('device_id')
# Convert the grouped dataframe into a nested dictionary using the to_dict method
result_dict = grouped_df.apply(lambda x: x.set_index('event_name')['event_count'].to_dict()).to_dict()
I have the following output from an SqlAlchemy select
operator.
| device_id | event_name | event_count |
| :--------:| :-----------------: |:-----------:|
| 123456 | speed more than 100 | 3 |
| 123456 | speed less than 12 | 0 |
| 334455 | out of NYC | 5 |
| 111111 | in UCSD campus | 1 |
Now I want to save this result into a nested dictionary with the following format, and I don’t know how to do it efficiently.
I need to have a dictionary in which its keys are the device_id
s, and the values are dictionaries in which the keys are event_name
s and the values are event_count
s.
{'123456' : {'speed more than 100' : 3,
'speed less than 12': 0},
'334455' : {'out of NYC' : 5},
'111111' : {'in UCSD campus' : 1}
}
Here is my code.
def count_per_event_json(self, count_per_event_query_result):
result = {}
print(count_per_event_query_result)
for item in enumerate(count_per_event_query_result):
if item[0] not in result.keys():
# result[item[0]] = {I don't know how to fill this inner dict'}
return result
I also calculate the input argument and pass it to the function like this:
def count_per_event(self):
count_per_event_query = select(EventsModel.device_id, EventsModel.event_name,
func.count(EventsModel.rule_table_id))
.where(EventsModel.timestamp <= self.max_utc_timestamp)
.where(EventsModel.timestamp >= self.min_utc_timestamp)
.group_by(EventsModel.device_id, EventsModel.rule_table_id)
.execution_options(synchronize_session="fetch")
return count_per_event_query
async def cube_calculator(self):
async with async_session() as session:
count_per_event_query_result = await session.execute(self.count_per_event())
json = self.count_per_event_json(count_per_event_query_result) # the type of count_per_event_query is <class 'sqlalchemy.engine.result.ChunkedIteratorResult'>
You could do this with pandas:
import pandas as pd
# Use pandas.read_sql to read the query results into a dataframe
df = pd.read_sql(select_query, con=your_db_connection)
# Use the pandas groupby function to group the dataframe by device_id
grouped_df = df.groupby('device_id')
# Convert the grouped dataframe into a nested dictionary using the to_dict method
result_dict = grouped_df.apply(lambda x: x.set_index('event_name')['event_count'].to_dict()).to_dict()