Object has no attribute Python web scraping error

Question:

I’m looking to scrape a set of URLs – I want to visit each link on the given URL, and return the player’s pos1 pos2 and profile details.

I have two sets of URLs I’m looking at, G League players (which is working perfectly) and International Players (which I’m completely stuck on).

The sites seem to be almost identical, but not sure what’s going on.

WORKING G LEAGUE SCRIPT:

import requests
from bs4 import BeautifulSoup

import gspread
gc = gspread.service_account(filename='creds.json')
sh = gc.open_by_key('SSID')
worksheet = sh.get_worksheet(0)
# AddValue = ["Test", 25, "Test2"]
# worksheet.insert_row(AddValue, 3)


def get_links(url):
    data = []
    req_url = requests.get(url)
    soup = BeautifulSoup(req_url.content, "html.parser")

    for td in soup.find_all('td', {'data-th': 'Player'}):
        a_tag = td.a
        name = a_tag.text
        player_url = a_tag['href']
        pos = td.find_next_sibling('td').text
        print(f"Getting {name}")

        req_player_url = requests.get(
            f"https://basketball.realgm.com{player_url}")
        soup_player = BeautifulSoup(req_player_url.content, "html.parser")
        div_profile_box = soup_player.find("div", class_="profile-box")
        row = {"Name": name, "URL": player_url, "pos_option1": pos}
        row['pos_option2'] = div_profile_box.h2.span.text

        for p in div_profile_box.find_all("p"):
            try:
                key, value = p.get_text(strip=True).split(':', 1)
                row[key.strip()] = value.strip()
            except:     # not all entries have values
                pass

        data.append(row)

    return data


urls = [
    'https://basketball.realgm.com/dleague/players/2022',
    'https://basketball.realgm.com/dleague/players/2021',
    'https://basketball.realgm.com/dleague/players/2020',
    'https://basketball.realgm.com/dleague/players/2019',
    'https://basketball.realgm.com/dleague/players/2018',
]


res = []
for url in urls:
    print(f"Getting: {url}")
    data = get_links(url)
    res = [*res, *data]

if res != []:
    header = list(res[0].keys())
    values = [
        header, *[[e[k] if e.get(k) else "" for k in header] for e in res]]
    worksheet.append_rows(values, value_input_option="USER_ENTERED")

Like I stated, this prints the positions along with the rest of the profile details. I’m trying to recreate for a different set of URLs, but hitting the error:

error

This is the script I’m stuck on, any thoughts?

import requests
from bs4 import BeautifulSoup

import gspread
gc = gspread.service_account(filename='creds.json')
sh = gc.open_by_key('1DpasSS8yC1UX6WqAbkQ515BwEEjdDL-x74T0eTW8hLM')
worksheet = sh.get_worksheet(0)
# AddValue = ["Test", 25, "Test2"]
# worksheet.insert_row(AddValue, 3)


def get_links2(url):
    data = []
    req_url = requests.get(url)
    soup = BeautifulSoup(req_url.content, "html.parser")

    for td in soup.select('td.nowrap'):
        a_tag = td.a
        if a_tag:
            name = a_tag.text
            player_url = a_tag['href']
            pos = td.find_next_sibling('td').text
            print(f"Getting {name}")

            req_player_url = requests.get(
                f"https://basketball.realgm.com{player_url}")
            soup_player = BeautifulSoup(req_player_url.content, "html.parser")
            div_profile_box = soup_player.find("div", class_="profile-box")
            row = {"Name": name, "URL": player_url, "pos_option1": pos}
            row['pos_option2'] = div_profile_box.h2.span.text

            for p in div_profile_box.find_all("p"):
                try:
                    key, value = p.get_text(strip=True).split(':', 1)
                    row[key.strip()] = value.strip()
                except:     # not all entries have values
                    pass

            data.append(row)

    return data


urls2 = ["https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/player/All/desc","https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/2",
         "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/3",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/4",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/5",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/6",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/7",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/8",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/9",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/10",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/11",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/12",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/13",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/14",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/15",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/16",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/17",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/18",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/19",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/20",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/21",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/22",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/23",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/24",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/25",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/26",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/27",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/28",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/29",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/30",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/31",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/32",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/33",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/34",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/35",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/36",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/37",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/38",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/39",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/40",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/41",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/42",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/43",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/44",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/45",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/46",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/47",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/48",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/49",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/50",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/51",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/52",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/53",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/54",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/55",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/56",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/57",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/58",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/59",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/60",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/61",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/62",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/63",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/64",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/65",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/66",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/67",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/68",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/69",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/70",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/71",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/72",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/73",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/74",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/75",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/76",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/77",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/78",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/79",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/80",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/81",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/82",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/83",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/84",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/85",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/86",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/87",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/88",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/89",
         #  "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/90",
         #  #   "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/91",
         #  #   "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/92",
         #  #   "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/93",
         #  #   "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/94",
         #  #   "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/95",
         #  #   "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/96"]
         ]


res2 = []
for url in urls2:
    data = get_links2(url)
    res2 = [*res2, *data]

# print(res2)

if res2 != []:
    header = list(res2[0].keys())
    values = [
        header, *[[e[k] if e.get(k) else "" for k in header] for e in res2]]
    worksheet.append_rows(values, value_input_option="USER_ENTERED")
Asked By: Anthony Madle

||

Answers:

As mentioned there are differences in the HTML so be aware:

  • pos = td.find_next_sibling('td').text will lead to wrong information, cause there is no position column in these tables of the new url set.

  • To get the position from the profile check if the element that holds the information is available before calling .text

    row['pos_option2'] = div_profile_box.h2.span.text if div_profile_box.h2.span else None
    

So you would get:

Used this url https://basketball.realgm.com/international/league/119/VTB-Youth-United-League/team/1952/Avtodor-2/stats to start start the get_links2(url), because there was no indicator in your question, where the issue appears

{'Name': 'Klim Adaykin',
  'URL': '/player/Klim-Adaykin/Summary/207122',
  'pos_option1': 'AV2',
  'pos_option2': None,
  'Current Team': 'Avtodor-2',
  'Nationality': 'Russia',
  'Current NBA Status': 'Draft Eligible in 2023',
  'Draft Entry': '2023 NBA Draft',
  'Pre-Draft Team': 'Avtodor-2 (Russia)'}
Answered By: HedgeHog