Multi-Threading with selenium and Python Background processing
Question:
I currently trying to auto login into a website and fill a form.
I’m using selenium in python and am trying to multi thread each username in a different driver.
Issue is that drivers does open parallel to each other in the background yet they seem to not process the data unless they are opened in the foreground which does delay the process alot to wait for one to finish then process the next.
for confidentiality reasons I cannot share the website URL yet here is the code and functions used.
import undetected_chromedriver as uc
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException,ElementClickInterceptedException,TimeoutException
from threading import Thread
import time
import pandas as pd
Chrome options:-
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--headless')
chrome_options.add_argument('--enable-javascript')
chrome_options.add_argument('--disable-gpu')
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Safari/605.1.15'
chrome_options.add_argument('User-Agent={0}'.format(user_agent))
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', True)
Functions used:-
def login_hbd(driver,username,password):
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.NAME,"username")))
username_field = driver.find_element(By.NAME,"username")
username_field.send_keys(username)
password_field = driver.find_element(By.NAME,"password")
password_field.send_keys(password)
submit_btn = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, "submitBtn")))
driver.execute_script("arguments[0].click();", submit_btn)
def get_ss_hbd(driver,username_list,password_list):
while True:
try:
login_hbd(driver,str(username_list),password_list)
WebDriverWait(driver, 1).until(EC.visibility_of_element_located((By.XPATH,"/html/body/div[1]/div/div/header/div/div[3]/nav/ul/li[7]/a")))
driver.get("*****")
driver.save_screenshot(f"{username_list}.png")
driver.close()
break
except (NoSuchElementException,ElementClickInterceptedException,TimeoutException):
continue
Threading code:-
# get the start time
st = time.time()
number_of_threads = len(df)
threads = []
for _ in range(number_of_threads):
username_list = df.loc[_][0]
password_list = df.loc[_][1]
driver = uc.Chrome(chrome_options=chrome_options,service_args=['--quiet'])
driver.get("****")
t = Thread(target=get_ss_hbd, args=(driver,username_list,password_list))
t.start()
threads.append(t)
for t in threads:
t.join()
et = time.time()
# get the execution time
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')
Answers:
After referencing to this issue https://github.com/ultrafunkamsterdam/undetected-chromedriver/issues/1051
and running the following code to fix the issues with undetected_chromedriver
code:
python -m pip uninstall undetected-chromedriver
python -m pip install git+https://github.com/ultrafunkamsterdam/undetected-chromedriver@fix-multiple-instance
python -m pip install --upgrade selenium
I currently trying to auto login into a website and fill a form.
I’m using selenium in python and am trying to multi thread each username in a different driver.
Issue is that drivers does open parallel to each other in the background yet they seem to not process the data unless they are opened in the foreground which does delay the process alot to wait for one to finish then process the next.
for confidentiality reasons I cannot share the website URL yet here is the code and functions used.
import undetected_chromedriver as uc
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException,ElementClickInterceptedException,TimeoutException
from threading import Thread
import time
import pandas as pd
Chrome options:-
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--headless')
chrome_options.add_argument('--enable-javascript')
chrome_options.add_argument('--disable-gpu')
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Safari/605.1.15'
chrome_options.add_argument('User-Agent={0}'.format(user_agent))
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', True)
Functions used:-
def login_hbd(driver,username,password):
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.NAME,"username")))
username_field = driver.find_element(By.NAME,"username")
username_field.send_keys(username)
password_field = driver.find_element(By.NAME,"password")
password_field.send_keys(password)
submit_btn = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, "submitBtn")))
driver.execute_script("arguments[0].click();", submit_btn)
def get_ss_hbd(driver,username_list,password_list):
while True:
try:
login_hbd(driver,str(username_list),password_list)
WebDriverWait(driver, 1).until(EC.visibility_of_element_located((By.XPATH,"/html/body/div[1]/div/div/header/div/div[3]/nav/ul/li[7]/a")))
driver.get("*****")
driver.save_screenshot(f"{username_list}.png")
driver.close()
break
except (NoSuchElementException,ElementClickInterceptedException,TimeoutException):
continue
Threading code:-
# get the start time
st = time.time()
number_of_threads = len(df)
threads = []
for _ in range(number_of_threads):
username_list = df.loc[_][0]
password_list = df.loc[_][1]
driver = uc.Chrome(chrome_options=chrome_options,service_args=['--quiet'])
driver.get("****")
t = Thread(target=get_ss_hbd, args=(driver,username_list,password_list))
t.start()
threads.append(t)
for t in threads:
t.join()
et = time.time()
# get the execution time
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')
After referencing to this issue https://github.com/ultrafunkamsterdam/undetected-chromedriver/issues/1051
and running the following code to fix the issues with undetected_chromedriver
code:
python -m pip uninstall undetected-chromedriver
python -m pip install git+https://github.com/ultrafunkamsterdam/undetected-chromedriver@fix-multiple-instance
python -m pip install --upgrade selenium