Page navigation click without using current_url python selenium

Question

How to navigate through each page without using driver.current_url? In my full code, I get a bunch of errors once I navigate through the page for a loop. Without it, it runs fine but can only go through one page. I want to navigate through as many pages. Any help appreciated, thanks.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service

driver_service = Service(executable_path="C:Program Files (x86)chromedriver.exe")
driver = webdriver.Chrome(service=driver_service)
driver.maximize_window()  # load web driver
wait = WebDriverWait(driver, 5)

url_test = driver.get('https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA')
url_template = driver.current_url
template = url_template+ '?page={}'
for page in range(2,5):
    link_job = [x.get_attribute('href') for x in driver.find_elements(By.XPATH, "//a[@data-automation='jobTitle']")]
    for job in link_job:
        driver.get(job)
        try:
            quick_apply = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "(//a[@data-automation='job-detail-apply' and @target='_self'])")))
            quick_apply.click()
            #sleep(3)
        except:
            print("No records found " + job)
            pass
        sleep(3)
    driver.get(template.format(page))

Asked By: jhgjhgkk

||

Source

Answer 1

Navigating each/individual page along with pagination.

import requests
from bs4 import BeautifulSoup
import pandas as pd

url = 'https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA?page={p}'

data = []
for p in range(1,20):
    soup = BeautifulSoup(requests.get(url.format(p=p)).text, 'lxml')
    
    for u in ['https://www.seek.com.au'+ a.get('href') for a in soup.select('div[class="yvsb870 _14uh9944u _14uh9944s"] a')]:
        #print(u)
        soup2 = BeautifulSoup(requests.get(u).text,'lxml')
        d = {
            'Title': soup2.h1.get_text(strip=True)
        }
       
        data.append(d)

df = pd.DataFrame(data)
print(df)

Output:

                      Title
0                Software Business Analyst
1                         Technical Writer
2             Specialist: Data & Analytics
3                             Data Analyst
4                             Data Analyst
..                                     ...
413               Analyst, Falcon Complete
414         Manufacturing Systems Engineer
415                   Consultant Technical
416  Senior Advisor Technology Integration
417            Senior Consultant Technical

[418 rows x 1 columns]

Answered By: F.Hoque

Answer 2

Seems your problem is with StaleElementException when you getting back from job page to jobs search results page.
The simplest approach to overcome this problem is to keep the jobs search results page url.
Actually I changed your code only with this point and it works.
I also changed driver.find_elements(By.XPATH, "//a[@data-automation='jobTitle']") with wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[@data-automation='jobTitle']"))) for better performance.
The code below works, but the web site itself responds badly.

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

options = Options()
options.add_argument("start-maximized")


webdriver_service = Service('C:webdriverschromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
wait = WebDriverWait(driver, 10)
url = 'https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA?page={p}'
for p in range(1,20):
    driver.get(url)
    link_job = [x.get_attribute('href') for x in wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[@data-automation='jobTitle']")))]
    for job in link_job:
        driver.get(job)
        try:
            wait.until(EC.element_to_be_clickable((By.XPATH, "(//a[@data-automation='job-detail-apply' and @target='_self'])"))).click()
            print("applied")
        except:
            print("No records found " + job)
            pass
    driver.get(url)

Answered By: Prophet

Answer 3

If I understand you correctly you want to determine dynamically how many pages there are and loop over each of them.
I have managed to achieve this by using a while loop and look on each page if the "Next" button at the bottom is visible. If not, the last page was reached and you can exit the loop.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from time import sleep

driver_service = Service(executable_path="C:\Users\Stefan\bin\chromedriver.exe")
driver = webdriver.Chrome(service=driver_service)
driver.maximize_window()  # load web driver
wait = WebDriverWait(driver, 5)

url_test = driver.get('https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA')
url_template = driver.current_url
template = url_template+ '?page={}'
page = 1
while True:

    # check if "Next" button is visible 
    # -> if not, the last page was reached
    try:
        driver.find_element(By.XPATH, "//a[@title='Next']")
    except:
        # last page reached
        break
    
    link_job = [x.get_attribute('href') for x in driver.find_elements(By.XPATH, "//a[@data-automation='jobTitle']")]
    for job in link_job:
        driver.get(job)
        try:
            quick_apply = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "(//a[@data-automation='job-detail-apply' and @target='_self'])")))
            quick_apply.click()
            #sleep(3)
        except:
            print("No records found " + job)
            pass
        sleep(3)
    page += 1
    driver.get(template.format(page))

driver.close()

Answered By: Stefan

Page navigation click without using current_url python selenium

Question:

Answers: