Return value of key in nested dict when key not present in all nested dicts
Question:
Trying to get the values of a particular key from nested dictionaries, but the key is not always present. The key in question is ‘action’. I tried several ways but can’t get it right. I either get an error saying the key doesnt exist, or I get a partial return. My latest attempts are as follows.
def events_query():
query_res = {
'took': 52,
'timed_out': False,
'_shards': {
'total': 3,
'successful': 3,
'skipped': 1,
'failed': 0
},
'hits': {
'total': {'value': 10000, 'relation': 'gte'},
'max_score': None,
'hits': [
{
'_index': 'winlogbeat-dc-2023.01.16-000195',
'_type': '_doc',
'_id': '_LrxCYYBiABa0UinUkYt',
'_score': None,
'_source': {
'agent': {'hostname': 'SRVDCMI'},
'event': {
'code': '7036',
'provider': 'Service Control Manager',
'created': '2023-01-31T22:27:34.585Z',
'kind': 'event'
}
},
'sort': [-9223372036854775808]
},
{
'_index': 'winlogbeat-dc-2023.01.16-000195',
'_type': '_doc',
'_id': '_brxCYYBiABa0UinUkYt',
'_score': None,
'_source': {
'agent': {'hostname': 'SRVDCMI'},
'event': {
'code': '7036',
'provider': 'Service Control Manager',
'kind': 'event',
'created': '2023-01-31T22:27:34.585Z'
}
},
'sort': [-9223372036854775808]
},
{
'_index': 'winlogbeat-dc-2023.01.16-000195',
'_type': '_doc',
'_id': '_rrxCYYBiABa0UinUkYt',
'_score': None,
'_source': {
'agent': {'hostname': 'SRVDCMI'},
'event': {
'code': '7036',
'provider': 'Service Control Manager',
'kind': 'event',
'created': '2023-01-31T22:27:34.585Z'
}
},
'sort': [-9223372036854775808]
},
{
'_index': 'winlogbeat-dc-2023.01.16-000195',
'_type': '_doc',
'_id': '_7rxCYYBiABa0UinUkZI',
'_score': None,
'_source': {
'agent': {'hostname': 'SRVDC01'},
'event': {
'code': '4624',
'provider': 'Microsoft-Windows-Security-Auditing',
'created': '2023-01-31T22:27:34.622Z',
'kind': 'event',
'module': 'security',
'action': 'logged-in',
'category': ['authentication'],
'type': ['start'],
'outcome': 'success'
}
},
'sort': [-9223372036854775808]
},
{
'_index': 'winlogbeat-dc-2023.01.16-000195',
'_type': '_doc',
'_id': 'ALrxCYYBiABa0UinUkdI',
'_score': None,
'_source': {
'agent': {'hostname': 'SRVDC01'},
'event': {
'code': '4776',
'provider': 'Microsoft-Windows-Security-Auditing',
'created': '2023-01-31T22:27:34.622Z',
'kind': 'event',
'module': 'security',
'action': 'credential-validated',
'category': ['authentication'],
'type': ['start'],
'outcome': 'success'
}
},
'sort': [-9223372036854775808]
}]}}
for q in query_res:
if 'action' in query_res['hits']['hits'][0]['_source']['event']:
print(query_res['hits']['hits'][0]['_source']['event']['action'])
else:
print('not found')
#or
action = query_res['hits']['hits']
action_list = [a['_source']['event']['action'] for a in action]
print(action_list)
events_query()
Any help is appreciated.
Answers:
The query_res
dictionary has a nested dictionary with the key hits
which has a list with the key hits
. That’s the list you want to loop over. The dictionaries in that list contain _source
keys that point to dictionaries with the event
key. That’s where you need to look for action
s.
for hh in query_res['hits']['hits']:
if 'action' in hh['_source']['event']:
print(hh['_source']['event']['action'])
else:
print('not found')
Output:
not found
not found
not found
logged-in
credential-validated
Right now action
is the only key that’s being tested before it’s being accessed. If any of the other keys are optional, you might want to test for those as well, or use Python’s dict.get
method to provide a default.
You could write a little helper function that will recursively go through the hierarchy, validating the presence of keys as it drills down:
def findKey(D,key,*more,default=None):
try:
return findKey(D[key],*more,default=default) if more else D[key]
except:
return default
V = findKey(query_res,"hits","hits",0,"_source","event","action",default="Not Found")
print(V)
# Not Found
V = findKey(query_res,"hits","hits",0,"_source","event",default="Not Found")
print(V)
# {'code': '7036', 'provider': 'Service Control Manager',
'created': '2023-01-31T22:27:34.585Z', 'kind': 'event'}
Trying to get the values of a particular key from nested dictionaries, but the key is not always present. The key in question is ‘action’. I tried several ways but can’t get it right. I either get an error saying the key doesnt exist, or I get a partial return. My latest attempts are as follows.
def events_query():
query_res = {
'took': 52,
'timed_out': False,
'_shards': {
'total': 3,
'successful': 3,
'skipped': 1,
'failed': 0
},
'hits': {
'total': {'value': 10000, 'relation': 'gte'},
'max_score': None,
'hits': [
{
'_index': 'winlogbeat-dc-2023.01.16-000195',
'_type': '_doc',
'_id': '_LrxCYYBiABa0UinUkYt',
'_score': None,
'_source': {
'agent': {'hostname': 'SRVDCMI'},
'event': {
'code': '7036',
'provider': 'Service Control Manager',
'created': '2023-01-31T22:27:34.585Z',
'kind': 'event'
}
},
'sort': [-9223372036854775808]
},
{
'_index': 'winlogbeat-dc-2023.01.16-000195',
'_type': '_doc',
'_id': '_brxCYYBiABa0UinUkYt',
'_score': None,
'_source': {
'agent': {'hostname': 'SRVDCMI'},
'event': {
'code': '7036',
'provider': 'Service Control Manager',
'kind': 'event',
'created': '2023-01-31T22:27:34.585Z'
}
},
'sort': [-9223372036854775808]
},
{
'_index': 'winlogbeat-dc-2023.01.16-000195',
'_type': '_doc',
'_id': '_rrxCYYBiABa0UinUkYt',
'_score': None,
'_source': {
'agent': {'hostname': 'SRVDCMI'},
'event': {
'code': '7036',
'provider': 'Service Control Manager',
'kind': 'event',
'created': '2023-01-31T22:27:34.585Z'
}
},
'sort': [-9223372036854775808]
},
{
'_index': 'winlogbeat-dc-2023.01.16-000195',
'_type': '_doc',
'_id': '_7rxCYYBiABa0UinUkZI',
'_score': None,
'_source': {
'agent': {'hostname': 'SRVDC01'},
'event': {
'code': '4624',
'provider': 'Microsoft-Windows-Security-Auditing',
'created': '2023-01-31T22:27:34.622Z',
'kind': 'event',
'module': 'security',
'action': 'logged-in',
'category': ['authentication'],
'type': ['start'],
'outcome': 'success'
}
},
'sort': [-9223372036854775808]
},
{
'_index': 'winlogbeat-dc-2023.01.16-000195',
'_type': '_doc',
'_id': 'ALrxCYYBiABa0UinUkdI',
'_score': None,
'_source': {
'agent': {'hostname': 'SRVDC01'},
'event': {
'code': '4776',
'provider': 'Microsoft-Windows-Security-Auditing',
'created': '2023-01-31T22:27:34.622Z',
'kind': 'event',
'module': 'security',
'action': 'credential-validated',
'category': ['authentication'],
'type': ['start'],
'outcome': 'success'
}
},
'sort': [-9223372036854775808]
}]}}
for q in query_res:
if 'action' in query_res['hits']['hits'][0]['_source']['event']:
print(query_res['hits']['hits'][0]['_source']['event']['action'])
else:
print('not found')
#or
action = query_res['hits']['hits']
action_list = [a['_source']['event']['action'] for a in action]
print(action_list)
events_query()
Any help is appreciated.
The query_res
dictionary has a nested dictionary with the key hits
which has a list with the key hits
. That’s the list you want to loop over. The dictionaries in that list contain _source
keys that point to dictionaries with the event
key. That’s where you need to look for action
s.
for hh in query_res['hits']['hits']:
if 'action' in hh['_source']['event']:
print(hh['_source']['event']['action'])
else:
print('not found')
Output:
not found
not found
not found
logged-in
credential-validated
Right now action
is the only key that’s being tested before it’s being accessed. If any of the other keys are optional, you might want to test for those as well, or use Python’s dict.get
method to provide a default.
You could write a little helper function that will recursively go through the hierarchy, validating the presence of keys as it drills down:
def findKey(D,key,*more,default=None):
try:
return findKey(D[key],*more,default=default) if more else D[key]
except:
return default
V = findKey(query_res,"hits","hits",0,"_source","event","action",default="Not Found")
print(V)
# Not Found
V = findKey(query_res,"hits","hits",0,"_source","event",default="Not Found")
print(V)
# {'code': '7036', 'provider': 'Service Control Manager',
'created': '2023-01-31T22:27:34.585Z', 'kind': 'event'}