How to fix this error during scraping using BeautifulSoup?


I am trying to do web scraping using BeautifulSoup and requests Python library. I want to filter the news titles from Hacker News website but its showing an error while implementing.

import requests
from bs4 import BeautifulSoup

res = requests.get('')
soup = BeautifulSoup(res.text, 'html.parser')
links ='.titleline a')
subtext ='.subtext')

def create_custom_hn(links, subtext):
    hn = []
    for index, item in enumerate(links):
        title = links[index].getText()
        href = links[index].get('href', None)
        votes = subtext[index].select('.score')
        if len(votes):
            points = int(votes[0].getText().replace(' points', ''))
            hn.append({'title': title, 'href': href})
    return hn

print(create_custom_hn(links, subtext))

The error says

votes = subtext[index].select('.score')
IndexError: list index out of range
Asked By: Hetarth7



Try to select your elements more specific, your selection of'.titleline a') includes more elements (60) as you may like to select (30):

[<a href="">Urllib3 in 2022</a>,
 <a href="from?"><span class="sitestr"></span></a>,...]

I would also recommend to iterate the elements in another way, so you would become able to handle missing values.


import requests
from bs4 import BeautifulSoup

res = requests.get('')
soup = BeautifulSoup(res.text)

data = []

for e in'tr.athing'):
        'title':e.select_one('.titleline a').get_text(),
        'url':e.select_one('.titleline a').get('href'),


[{'title': 'Urllib3 in 2022', 'url': '', 'votes': '93'}, {'title': 'First public release of Pushup: a new compiler for making web apps in Go', 'url': '', 'votes': '16'}, {'title': 'Intelligence – A good collection of great OSINT Resources', 'url': '', 'votes': '109'}, {'title': 'Microsoft is preparing to add ChatGPT to Bing', 'url': '', 'votes': '755'}, {'title': 'Juan Tamariz, the godfather of close-up card magic', 'url': '', 'votes': '31'}, {'title': 'The Expanding Dark Forest and Generative AI', 'url': '', 'votes': '223'}, {'title': 'Irreconcilable differences between local and distributed computing (1994)', 'url': '', 'votes': '29'},...]
Answered By: HedgeHog

Here is fixed version of the code from the question:

import requests
from bs4 import BeautifulSoup

res = requests.get("")
soup = BeautifulSoup(res.text, "html.parser")
links =".titleline > a")

def create_custom_hn(links):
    hn = []
    for link in links:
        title = link.getText()
        href = link.get("href", None)
        votes = link.find_next(class_="score")
        points = int(votes.getText().replace(" points", ""))

        hn.append({"title": title, "href": href, "points": points})
    return hn



        "title": "Urllib3 in 2022",
        "href": "",
        "points": 97,
        "title": "First public release of Pushup: a new compiler for making web apps in Go",
        "href": "",
        "points": 18,
        "title": "Intelligence – A good collection of great OSINT Resources",
        "href": "",
        "points": 113,
        "title": "Microsoft is preparing to add ChatGPT to Bing",
        "href": "",
        "points": 760,

...and so on.
Answered By: Andrej Kesely