Python Selenium Pagination with a Button on www.dub.de

Question

Im trying to get used to Python selenium because of a data literacy class at my university. I figured out a project for my class where i need some data of www.dub.de, no worries I won’t make lots of requests in a short period of time, I don’t want spam their or anyone elses servers. I’ve already figured out how to scrape alle the data i need, how to use proxies and stuff like that. But there is one problem i can’t solve for days now. There is a pagination button that just isn’t clickable for selenium.
I get the "selenium.common.exceptions.ElementNotInteractableException" exception.
I’ve researched a lot about this problem and tested anything, I’ve got rid of the cookie popup and the email notification popup which could cause that. I’ve identified the right element, which gets highlighted in the dom and grab it through its xpath, when I try to wait for it to get clickable it just times out…
The element i speak of is this one

 <button type="submit" name="tx_enterprisermarket_searchoffer[@widget_0][currentPage]" value="2">;</button>

And this is my code to click it.

def getNextOffers(proxy=None):

    # Erstellen Sie ein Proxy-Objekt und konfigurieren Sie es mit dem übergebenen Proxy-Server oder verwenden Sie keinen Proxy
    if proxy:
        ip_address, port = proxy.split(':')

        proxy_object = Proxy()
        proxy_object.proxy_type = ProxyType.MANUAL
        proxy_object.http_proxy = proxy
        proxy_object.ssl_proxy = proxy

        # capabilities = webdriver.DesiredCapabilities.CHROME.copy()
        # proxy_object.to_capabilities(capabilities) 

        # driver = webdriver.Chrome(desired_capabilities=capabilities)

        # Konfigurieren Sie die Chrome-Optionen mit dem Proxy
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("start-maximized")
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--disable-extensions")

        # Fügen Sie das Proxy-Objekt zu den Chrome-Optionen hinzu
        chrome_options.add_argument(f'--proxy-server={ip_address}')

        # Initialisieren Sie den Webdriver mit den konfigurierten Optionen
        driver = webdriver.Chrome(options=chrome_options)
        # # Webseite öffnen
        driver.get(url_udb_offers)



    else:
        # Wenn kein Proxy übergeben wurde, verwenden Sie den Webdriver ohne Proxy
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("start-maximized")
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--disable-extensions")
        driver = webdriver.Chrome(options=chrome_options)
        # Webseite öffnen
        driver.get(url_udb_offers)

    try:
        # Warte bis das Cookie-Popup geladen ist und die "Alle akzeptieren" Schaltfläche sichtbar ist
        wait = WebDriverWait(driver, 10)
        cookie_popup = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'om-cookie-panel.active')))

        # Hier kannst du das JavaScript-Script ausführen, um die Cookies zu akzeptieren
        with open('accept_cookies.js', 'r') as script_file:
            cookie_script = script_file.read()
            driver.execute_script(cookie_script)
        
        time.sleep(5)
        print("----------0-----------")
        email_notification_popup = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'simplemodal-container')))
        with open('reject_email_notification.js', 'r') as script_file:
            reject_email_notification_script = script_file.read()
            print("----------1-----------")

            driver.execute_script(reject_email_notification_script)
            print("----------2-----------")

        # Scrollen Sie zu dem "Nächste Seite" Button, um sicherzustellen, dass er sichtbar ist
        wait = WebDriverWait(driver, 3)
        # Maximale Anzahl von Versuchen, den Button zu finden
        max_attempts = 6
        attempts = 0
        next_button = None
        
        # xpath videokurs -> waytogo um elemente zu identifizieren
        # https://www.youtube.com/watch?v=jraDTvKLLvY
        
        xpath_query = "//ul[@class='f3-widget-paginator']/li[@class='next']/button"
        next_button = driver.find_element("xpath", xpath_query)
        next_button_visible = False
        # mit dem attribut value kann die anzahl der seiten über element last ermittelt werden
        print("Gefundenes Element:", next_button.get_attribute("value"))
        wait = WebDriverWait(driver, 1)
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)

        time.sleep(3)
        next_button = WebDriverWait(driver, 20).until((EC.element_to_be_clickable((By.XPATH, xpath_query))))
        next_button = driver.find_element("xpath", xpath_query)


        # while attempts < max_attempts and next_button_visible == False:
            
                
        #         next_button = driver.find_element("xpath", xpath_query)
        #         next_button_visible = driver.find_element("xpath", xpath_query).is_displayed()
        #         time.sleep(2)
            
        #         if next_button_visible == False:
        #             print("Element ist nicht sichtbar")
        #             driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)
        #             attempts += 1
                    
                

        
        next_button.click()
        print('finished')

    finally:
        # Den Webdriver schließen
        driver.quit()

It is my second webscraper (the first one was a real simple one) and I’m completely new to selenium, the code i commented out is the approach i want to go further with when I get the click done in a simple manner, which is why i simply made as many scrolls down the page that the element is visible in the browser. I couldn’t find any information if I really have to move the sight so that the element I want selenium to click on has to be visible, so I just did it. Also I don’t know if it will be placed on other systems with different screen sizes in the same way as on mine, so keep that in mind. Any ideas how I could get the pagination done for this website are really appreciated. I have to check all offers. Let me know if you need to see more of my code, but I guess the only one that matters is in this function. If you want to test this you can use the code down below.

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.proxy import Proxy, ProxyType

import requests
import numbers
import time
import random
import asyncio
from pyppeteer import launch

# Basis-URL
base_url = "https://www.dub.de"
url_udb_offers = 'https://www.dub.de/unternehmensboerse/unternehmen-kaufen//?tx_enterprisermarket_searchoffer%5Baction%5D=index&tx_enterprisermarket_searchoffer%5Bcontroller%5D=SearchOffer&cHash=ee9be169a1727becf83ca3e7fcb8e8bf'
filename = 'website.html'

# Proxy-Server Liste
PROXY_FILE = 'valid_proxies.txt'  # Datei mit den Proxy-Servern
PROXY_VALIDATION_TIMEOUT = 2



def choose_random_proxy():
    """
    Wählt zufällig einen Proxy-Server aus der Datei 'valid_proxies.txt' aus und gibt ihn zurück.

    :return: Der ausgewählte Proxy-Server.
    """
    # Die Liste der Proxy-Server aus der Datei 'valid_proxies.txt' laden
    with open(PROXY_FILE, 'r') as proxy_file:
        proxies = proxy_file.read().split('n')
    selected_proxy = random.choice(proxies)

   
    start_time = time.time()  # Startzeit für die Zeitmessung
    response = requests.get("http://ipinfo.io/json", proxies={'http': selected_proxy, "https": selected_proxy}, timeout=PROXY_VALIDATION_TIMEOUT)
    end_time = time.time()  # Endzeit für die Zeitmessung
    elapsed_time = end_time - start_time  # Gemessene Zeitdauer
    try:
        while response.status_code != 200 and elapsed_time > PROXY_VALIDATION_TIMEOUT:
            # Zufällig einen Proxy auswählen
            selected_proxy = random.choice(proxies)
            print(selected_proxy)
            start_time = time.time()  # Startzeit für die Zeitmessung
            response = requests.get("http://ipinfo.io/json", proxies={'http': selected_proxy, "https": selected_proxy}, timeout=PROXY_VALIDATION_TIMEOUT)
            end_time = time.time()  # Endzeit für die Zeitmessung
            elapsed_time = end_time - start_time  # Gemessene Zeitdauer
    except (requests.RequestException, requests.Timeout):



        return selected_proxy
def getNextOffers(proxy=None):

    # Erstellen Sie ein Proxy-Objekt und konfigurieren Sie es mit dem übergebenen Proxy-Server oder verwenden Sie keinen Proxy
    if proxy:
        ip_address, port = proxy.split(':')

        proxy_object = Proxy()
        proxy_object.proxy_type = ProxyType.MANUAL
        proxy_object.http_proxy = proxy
        proxy_object.ssl_proxy = proxy

        # capabilities = webdriver.DesiredCapabilities.CHROME.copy()
        # proxy_object.to_capabilities(capabilities) 

        # driver = webdriver.Chrome(desired_capabilities=capabilities)

        # Konfigurieren Sie die Chrome-Optionen mit dem Proxy
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("start-maximized")
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--disable-extensions")

        # Fügen Sie das Proxy-Objekt zu den Chrome-Optionen hinzu
        chrome_options.add_argument(f'--proxy-server={ip_address}')

        # Initialisieren Sie den Webdriver mit den konfigurierten Optionen
        driver = webdriver.Chrome(options=chrome_options)
        # # Webseite öffnen
        driver.get(url_udb_offers)



    else:
        # Wenn kein Proxy übergeben wurde, verwenden Sie den Webdriver ohne Proxy
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("start-maximized")
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--disable-extensions")
        driver = webdriver.Chrome(options=chrome_options)
        # Webseite öffnen
        driver.get(url_udb_offers)

    try:
        # Warte bis das Cookie-Popup geladen ist und die "Alle akzeptieren" Schaltfläche sichtbar ist
        wait = WebDriverWait(driver, 10)
        cookie_popup = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'om-cookie-panel.active')))

        # Hier kannst du das JavaScript-Script ausführen, um die Cookies zu akzeptieren
        with open('accept_cookies.js', 'r') as script_file:
            cookie_script = script_file.read()
            driver.execute_script(cookie_script)
        
        time.sleep(5)
        print("----------0-----------")
        email_notification_popup = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'simplemodal-container')))
        with open('reject_email_notification.js', 'r') as script_file:
            reject_email_notification_script = script_file.read()
            print("----------1-----------")

            driver.execute_script(reject_email_notification_script)
            print("----------2-----------")

        # Scrollen Sie zu dem "Nächste Seite" Button, um sicherzustellen, dass er sichtbar ist
        wait = WebDriverWait(driver, 3)
        # Maximale Anzahl von Versuchen, den Button zu finden
        max_attempts = 6
        attempts = 0
        next_button = None
        
        # xpath videokurs -> waytogo um elemente zu identifizieren
        # https://www.youtube.com/watch?v=jraDTvKLLvY
        
        xpath_query = "//ul[@class='f3-widget-paginator']/li[@class='next']/button"
        next_button = driver.find_element("xpath", xpath_query)
        next_button_visible = False
        # mit dem attribut value kann die anzahl der seiten über element last ermittelt werden
        print("Gefundenes Element:", next_button.get_attribute("value"))
        wait = WebDriverWait(driver, 1)
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)

        time.sleep(3)
        next_button = WebDriverWait(driver, 20).until((EC.element_to_be_clickable((By.XPATH, xpath_query))))
        next_button = driver.find_element("xpath", xpath_query)


        # while attempts < max_attempts and next_button_visible == False:
            
                
        #         next_button = driver.find_element("xpath", xpath_query)
        #         next_button_visible = driver.find_element("xpath", xpath_query).is_displayed()
        #         time.sleep(2)
            
        #         if next_button_visible == False:
        #             print("Element ist nicht sichtbar")
        #             driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)
        #             attempts += 1
                    
                

        
        next_button.click()
        print('finished')

    finally:
        # Den Webdriver schließen
        driver.quit()


def wait_random_time(min_time, max_time):
    """
    Erzeugt eine zufällige Wartezeit zwischen min_time und max_time in Sekunden
    und wartet für diese Zeit.
    
    :param min_time: Das minimale Wartezeitintervall in Sekunden
    :param max_time: Das maximale Wartezeitintervall in Sekunden
    """
    # Generieren einer zufälligen Wartezeit zwischen min_time und max_time
    random_time = random.uniform(min_time, max_time)
    
    # Warten für die zufällige Wartezeit
    time.sleep(random_time)

def main():
    result = getNextOffers(choose_random_proxy())
    





if __name__ == "__main__":
    main()

Beside this code you need a textfile "valid_proxies.txt" which contain proxy servers in this approach:

192.168.0.0:80
192.168.0.0:80
192.168.0.0:80
192.168.0.0:80

Asked By: Thanatos-Delta

||

Source

Answer 1

Apparently there are 2 elements with the xpath you posted.
I would bet the first element is the one which is not clickable, have you tried to click the 2nd element too?

this xpath looks more accurante: //div[@class='searchResponsiveResults']//ul[@class='f3-widget-paginator']/li[@class='next']/button

Answered By: alez21

Python Selenium Pagination with a Button on www.dub.de

Question:

Answers: