Python nested dictionary updating all dicts rather than specified
Question:
I am scraping results from a web table (link can be found in the code if needed) using BeautifulSoup. I then iterate through the table rows and subsequently iterate through the cells to produce a list of that row’s data. That list is indexed to pull out the name of a driver and their lap time which I want to add to a set of nested dictionaries.
However instead of just updating the values (two-levels down) for the dictionary I have specified (one-level down) it updates them for all the dictionaries.
My code (shortened and generalised the races and drivers):
#Imports
import requests
from bs4 import BeautifulSoup as bsoup
dataurls = {
'race1': 'https://fiaresultsandstatistics.motorsportstats.com/results/2022-bahrain-grand-prix/classification/2c65d50d-606e-4988-81c5-7a4976ef0e6f'
}
#Setup the dictionary
races = ['race1', 'race2']
drivers = ['driver1', 'driver2']
mainDict = dict.fromkeys(races, {})
#Scraping elements
def webscraper(mDict, race):
soup = bsoup(requests.get(dataurls[race]).content, "html.parser")
#Dict for races, will contain driver key and lap time value.
rDict = {}
#Parse the table by row, extract driver name and lap time
for tr in soup.find("table", class_="_2Q90P").find_all("tr", class_="_3AoAU"):
row = ([td.text for td in tr.find_all('td')])
dDriver = row[2]
dTime = row[6]
dDict = {dDriver: dTime}
rDict.update(dDict)
mDict[race].update(rDict)
webscraper(mainDict, races[0])
print(mainDict)
Expected output:
{
'race1': {'driver1': '1:30:100', 'driver2': '1:30.200'},
'race2': {}
}
Actual output:
{
'race1': {'driver1': '1:30:100', 'driver2': '1:30.200'},
'race2': {'driver1': '1:30:100', 'driver2': '1:30.200'}
}
I have tried so many different ways to get around it, the only other thing I can get to happen is that only the last drivers’ time is added to the correct race only.
Answers:
To solve this problem replace the update function used to update the mDict variable inside the webscraper function with an = sign
#Imports
import requests
from bs4 import BeautifulSoup as bsoup
dataurls = {
'race1': 'https://fiaresultsandstatistics.motorsportstats.com/results/2022-bahrain-grand-prix/classification/2c65d50d-606e-4988-81c5-7a4976ef0e6f'
}
#Setup the dictionary
races = ['race1', 'race2']
drivers = ['driver1', 'driver2']
mainDict = dict.fromkeys(races, {})
#Scraping elements
def webscraper(mDict, race):
soup = bsoup(requests.get(dataurls[race]).content, "html.parser")
#Dict for races, will contain driver key and lap time value.
rDict = {}
#Parse the table by row, extract driver name and lap time
for tr in soup.find("table", class_="_2Q90P").find_all("tr", class_="_3AoAU"):
row = ([td.text for td in tr.find_all('td')])
dDriver = row[2]
dTime = row[6]
dDict = {dDriver: dTime}
rDict.update(dDict)
mDict[race] = rDict
webscraper(mainDict, races[0])
print(mainDict)
or change the way you are using the update:
mDict.update({race: rDict})
https://www.w3schools.com/python/ref_dictionary_update.asp
Apparently the way you are using selects all keys in the dictionary
I am scraping results from a web table (link can be found in the code if needed) using BeautifulSoup. I then iterate through the table rows and subsequently iterate through the cells to produce a list of that row’s data. That list is indexed to pull out the name of a driver and their lap time which I want to add to a set of nested dictionaries.
However instead of just updating the values (two-levels down) for the dictionary I have specified (one-level down) it updates them for all the dictionaries.
My code (shortened and generalised the races and drivers):
#Imports
import requests
from bs4 import BeautifulSoup as bsoup
dataurls = {
'race1': 'https://fiaresultsandstatistics.motorsportstats.com/results/2022-bahrain-grand-prix/classification/2c65d50d-606e-4988-81c5-7a4976ef0e6f'
}
#Setup the dictionary
races = ['race1', 'race2']
drivers = ['driver1', 'driver2']
mainDict = dict.fromkeys(races, {})
#Scraping elements
def webscraper(mDict, race):
soup = bsoup(requests.get(dataurls[race]).content, "html.parser")
#Dict for races, will contain driver key and lap time value.
rDict = {}
#Parse the table by row, extract driver name and lap time
for tr in soup.find("table", class_="_2Q90P").find_all("tr", class_="_3AoAU"):
row = ([td.text for td in tr.find_all('td')])
dDriver = row[2]
dTime = row[6]
dDict = {dDriver: dTime}
rDict.update(dDict)
mDict[race].update(rDict)
webscraper(mainDict, races[0])
print(mainDict)
Expected output:
{
'race1': {'driver1': '1:30:100', 'driver2': '1:30.200'},
'race2': {}
}
Actual output:
{
'race1': {'driver1': '1:30:100', 'driver2': '1:30.200'},
'race2': {'driver1': '1:30:100', 'driver2': '1:30.200'}
}
I have tried so many different ways to get around it, the only other thing I can get to happen is that only the last drivers’ time is added to the correct race only.
To solve this problem replace the update function used to update the mDict variable inside the webscraper function with an = sign
#Imports
import requests
from bs4 import BeautifulSoup as bsoup
dataurls = {
'race1': 'https://fiaresultsandstatistics.motorsportstats.com/results/2022-bahrain-grand-prix/classification/2c65d50d-606e-4988-81c5-7a4976ef0e6f'
}
#Setup the dictionary
races = ['race1', 'race2']
drivers = ['driver1', 'driver2']
mainDict = dict.fromkeys(races, {})
#Scraping elements
def webscraper(mDict, race):
soup = bsoup(requests.get(dataurls[race]).content, "html.parser")
#Dict for races, will contain driver key and lap time value.
rDict = {}
#Parse the table by row, extract driver name and lap time
for tr in soup.find("table", class_="_2Q90P").find_all("tr", class_="_3AoAU"):
row = ([td.text for td in tr.find_all('td')])
dDriver = row[2]
dTime = row[6]
dDict = {dDriver: dTime}
rDict.update(dDict)
mDict[race] = rDict
webscraper(mainDict, races[0])
print(mainDict)
or change the way you are using the update:
mDict.update({race: rDict})
https://www.w3schools.com/python/ref_dictionary_update.asp
Apparently the way you are using selects all keys in the dictionary