Object has no attribute Python web scraping error
Question:
I’m looking to scrape a set of URLs – I want to visit each link on the given URL, and return the player’s pos1 pos2 and profile details.
I have two sets of URLs I’m looking at, G League players (which is working perfectly) and International Players (which I’m completely stuck on).
The sites seem to be almost identical, but not sure what’s going on.
WORKING G LEAGUE SCRIPT:
import requests
from bs4 import BeautifulSoup
import gspread
gc = gspread.service_account(filename='creds.json')
sh = gc.open_by_key('SSID')
worksheet = sh.get_worksheet(0)
# AddValue = ["Test", 25, "Test2"]
# worksheet.insert_row(AddValue, 3)
def get_links(url):
data = []
req_url = requests.get(url)
soup = BeautifulSoup(req_url.content, "html.parser")
for td in soup.find_all('td', {'data-th': 'Player'}):
a_tag = td.a
name = a_tag.text
player_url = a_tag['href']
pos = td.find_next_sibling('td').text
print(f"Getting {name}")
req_player_url = requests.get(
f"https://basketball.realgm.com{player_url}")
soup_player = BeautifulSoup(req_player_url.content, "html.parser")
div_profile_box = soup_player.find("div", class_="profile-box")
row = {"Name": name, "URL": player_url, "pos_option1": pos}
row['pos_option2'] = div_profile_box.h2.span.text
for p in div_profile_box.find_all("p"):
try:
key, value = p.get_text(strip=True).split(':', 1)
row[key.strip()] = value.strip()
except: # not all entries have values
pass
data.append(row)
return data
urls = [
'https://basketball.realgm.com/dleague/players/2022',
'https://basketball.realgm.com/dleague/players/2021',
'https://basketball.realgm.com/dleague/players/2020',
'https://basketball.realgm.com/dleague/players/2019',
'https://basketball.realgm.com/dleague/players/2018',
]
res = []
for url in urls:
print(f"Getting: {url}")
data = get_links(url)
res = [*res, *data]
if res != []:
header = list(res[0].keys())
values = [
header, *[[e[k] if e.get(k) else "" for k in header] for e in res]]
worksheet.append_rows(values, value_input_option="USER_ENTERED")
Like I stated, this prints the positions along with the rest of the profile details. I’m trying to recreate for a different set of URLs, but hitting the error:
This is the script I’m stuck on, any thoughts?
import requests
from bs4 import BeautifulSoup
import gspread
gc = gspread.service_account(filename='creds.json')
sh = gc.open_by_key('1DpasSS8yC1UX6WqAbkQ515BwEEjdDL-x74T0eTW8hLM')
worksheet = sh.get_worksheet(0)
# AddValue = ["Test", 25, "Test2"]
# worksheet.insert_row(AddValue, 3)
def get_links2(url):
data = []
req_url = requests.get(url)
soup = BeautifulSoup(req_url.content, "html.parser")
for td in soup.select('td.nowrap'):
a_tag = td.a
if a_tag:
name = a_tag.text
player_url = a_tag['href']
pos = td.find_next_sibling('td').text
print(f"Getting {name}")
req_player_url = requests.get(
f"https://basketball.realgm.com{player_url}")
soup_player = BeautifulSoup(req_player_url.content, "html.parser")
div_profile_box = soup_player.find("div", class_="profile-box")
row = {"Name": name, "URL": player_url, "pos_option1": pos}
row['pos_option2'] = div_profile_box.h2.span.text
for p in div_profile_box.find_all("p"):
try:
key, value = p.get_text(strip=True).split(':', 1)
row[key.strip()] = value.strip()
except: # not all entries have values
pass
data.append(row)
return data
urls2 = ["https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/player/All/desc","https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/2",
"https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/3",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/4",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/5",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/6",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/7",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/8",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/9",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/10",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/11",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/12",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/13",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/14",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/15",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/16",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/17",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/18",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/19",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/20",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/21",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/22",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/23",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/24",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/25",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/26",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/27",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/28",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/29",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/30",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/31",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/32",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/33",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/34",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/35",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/36",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/37",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/38",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/39",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/40",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/41",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/42",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/43",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/44",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/45",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/46",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/47",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/48",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/49",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/50",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/51",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/52",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/53",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/54",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/55",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/56",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/57",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/58",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/59",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/60",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/61",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/62",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/63",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/64",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/65",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/66",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/67",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/68",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/69",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/70",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/71",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/72",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/73",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/74",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/75",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/76",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/77",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/78",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/79",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/80",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/81",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/82",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/83",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/84",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/85",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/86",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/87",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/88",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/89",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/90",
# # "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/91",
# # "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/92",
# # "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/93",
# # "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/94",
# # "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/95",
# # "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/96"]
]
res2 = []
for url in urls2:
data = get_links2(url)
res2 = [*res2, *data]
# print(res2)
if res2 != []:
header = list(res2[0].keys())
values = [
header, *[[e[k] if e.get(k) else "" for k in header] for e in res2]]
worksheet.append_rows(values, value_input_option="USER_ENTERED")
Answers:
As mentioned there are differences in the HTML so be aware:
-
pos = td.find_next_sibling('td').text
will lead to wrong information, cause there is no position column in these tables of the new url set.
-
To get the position from the profile check if the element that holds the information is available before calling .text
row['pos_option2'] = div_profile_box.h2.span.text if div_profile_box.h2.span else None
So you would get:
Used this url https://basketball.realgm.com/international/league/119/VTB-Youth-United-League/team/1952/Avtodor-2/stats to start start the get_links2(url)
, because there was no indicator in your question, where the issue appears
{'Name': 'Klim Adaykin',
'URL': '/player/Klim-Adaykin/Summary/207122',
'pos_option1': 'AV2',
'pos_option2': None,
'Current Team': 'Avtodor-2',
'Nationality': 'Russia',
'Current NBA Status': 'Draft Eligible in 2023',
'Draft Entry': '2023 NBA Draft',
'Pre-Draft Team': 'Avtodor-2 (Russia)'}
I’m looking to scrape a set of URLs – I want to visit each link on the given URL, and return the player’s pos1 pos2 and profile details.
I have two sets of URLs I’m looking at, G League players (which is working perfectly) and International Players (which I’m completely stuck on).
The sites seem to be almost identical, but not sure what’s going on.
WORKING G LEAGUE SCRIPT:
import requests
from bs4 import BeautifulSoup
import gspread
gc = gspread.service_account(filename='creds.json')
sh = gc.open_by_key('SSID')
worksheet = sh.get_worksheet(0)
# AddValue = ["Test", 25, "Test2"]
# worksheet.insert_row(AddValue, 3)
def get_links(url):
data = []
req_url = requests.get(url)
soup = BeautifulSoup(req_url.content, "html.parser")
for td in soup.find_all('td', {'data-th': 'Player'}):
a_tag = td.a
name = a_tag.text
player_url = a_tag['href']
pos = td.find_next_sibling('td').text
print(f"Getting {name}")
req_player_url = requests.get(
f"https://basketball.realgm.com{player_url}")
soup_player = BeautifulSoup(req_player_url.content, "html.parser")
div_profile_box = soup_player.find("div", class_="profile-box")
row = {"Name": name, "URL": player_url, "pos_option1": pos}
row['pos_option2'] = div_profile_box.h2.span.text
for p in div_profile_box.find_all("p"):
try:
key, value = p.get_text(strip=True).split(':', 1)
row[key.strip()] = value.strip()
except: # not all entries have values
pass
data.append(row)
return data
urls = [
'https://basketball.realgm.com/dleague/players/2022',
'https://basketball.realgm.com/dleague/players/2021',
'https://basketball.realgm.com/dleague/players/2020',
'https://basketball.realgm.com/dleague/players/2019',
'https://basketball.realgm.com/dleague/players/2018',
]
res = []
for url in urls:
print(f"Getting: {url}")
data = get_links(url)
res = [*res, *data]
if res != []:
header = list(res[0].keys())
values = [
header, *[[e[k] if e.get(k) else "" for k in header] for e in res]]
worksheet.append_rows(values, value_input_option="USER_ENTERED")
Like I stated, this prints the positions along with the rest of the profile details. I’m trying to recreate for a different set of URLs, but hitting the error:
This is the script I’m stuck on, any thoughts?
import requests
from bs4 import BeautifulSoup
import gspread
gc = gspread.service_account(filename='creds.json')
sh = gc.open_by_key('1DpasSS8yC1UX6WqAbkQ515BwEEjdDL-x74T0eTW8hLM')
worksheet = sh.get_worksheet(0)
# AddValue = ["Test", 25, "Test2"]
# worksheet.insert_row(AddValue, 3)
def get_links2(url):
data = []
req_url = requests.get(url)
soup = BeautifulSoup(req_url.content, "html.parser")
for td in soup.select('td.nowrap'):
a_tag = td.a
if a_tag:
name = a_tag.text
player_url = a_tag['href']
pos = td.find_next_sibling('td').text
print(f"Getting {name}")
req_player_url = requests.get(
f"https://basketball.realgm.com{player_url}")
soup_player = BeautifulSoup(req_player_url.content, "html.parser")
div_profile_box = soup_player.find("div", class_="profile-box")
row = {"Name": name, "URL": player_url, "pos_option1": pos}
row['pos_option2'] = div_profile_box.h2.span.text
for p in div_profile_box.find_all("p"):
try:
key, value = p.get_text(strip=True).split(':', 1)
row[key.strip()] = value.strip()
except: # not all entries have values
pass
data.append(row)
return data
urls2 = ["https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/player/All/desc","https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/2",
"https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/3",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/4",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/5",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/6",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/7",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/8",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/9",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/10",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/11",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/12",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/13",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/14",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/15",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/16",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/17",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/18",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/19",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/20",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/21",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/22",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/23",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/24",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/25",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/26",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/27",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/28",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/29",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/30",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/31",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/32",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/33",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/34",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/35",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/36",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/37",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/38",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/39",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/40",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/41",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/42",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/43",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/44",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/45",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/46",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/47",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/48",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/49",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/50",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/51",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/52",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/53",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/54",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/55",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/56",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/57",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/58",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/59",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/60",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/61",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/62",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/63",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/64",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/65",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/66",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/67",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/68",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/69",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/70",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/71",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/72",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/73",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/74",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/75",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/76",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/77",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/78",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/79",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/80",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/81",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/82",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/83",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/84",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/85",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/86",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/87",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/88",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/89",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/90",
# # "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/91",
# # "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/92",
# # "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/93",
# # "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/94",
# # "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/95",
# # "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/96"]
]
res2 = []
for url in urls2:
data = get_links2(url)
res2 = [*res2, *data]
# print(res2)
if res2 != []:
header = list(res2[0].keys())
values = [
header, *[[e[k] if e.get(k) else "" for k in header] for e in res2]]
worksheet.append_rows(values, value_input_option="USER_ENTERED")
As mentioned there are differences in the HTML so be aware:
-
pos = td.find_next_sibling('td').text
will lead to wrong information, cause there is no position column in these tables of the new url set. -
To get the position from the profile check if the element that holds the information is available before calling
.text
row['pos_option2'] = div_profile_box.h2.span.text if div_profile_box.h2.span else None
So you would get:
Used this url https://basketball.realgm.com/international/league/119/VTB-Youth-United-League/team/1952/Avtodor-2/stats to start start the get_links2(url)
, because there was no indicator in your question, where the issue appears
{'Name': 'Klim Adaykin',
'URL': '/player/Klim-Adaykin/Summary/207122',
'pos_option1': 'AV2',
'pos_option2': None,
'Current Team': 'Avtodor-2',
'Nationality': 'Russia',
'Current NBA Status': 'Draft Eligible in 2023',
'Draft Entry': '2023 NBA Draft',
'Pre-Draft Team': 'Avtodor-2 (Russia)'}