Xpath wrong using selenium
Question:
from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from csv import writer
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20)
URL = 'https://www.askgamblers.com/online-casinos/reviews/yukon-gold-casino-casino'
driver.get(URL)
data=driver.find_elements(By.XPATH,"//section[@class='review-text richtext']")
for row in data:
try:
para0= row.find_element(By.XPATH,"//h2[text()[contains(.,'Games')]]/following-sibling::p[following::h2[text()[contains(.,'Support')]]]").text
except:
pass
print(para0)
I want they collect the data of Games
only but they also get the data of Virtual Games
so how we restrict the contains method
that get only data of Games
only kindly recommend any solution for that these is page link https://www.askgamblers.com/online-casinos/reviews/yukon-gold-casino-casino
Want these only
Answers:
[contains(.,'Games')]
will match both Games
and Virtual Games
.
What you can do here is:
- Use
equals
instead of contains
, like this:
"[text()='Games']"
- or use
starts-with
:
"[starts-with(text(), 'Games')]"
So this line para0= row.find_element(By.XPATH,"//h2[text()[contains(.,'Games')]]/following-sibling::p[following::h2[text()[contains(.,'Support')]]]").text
can be changed to
para0= row.find_element(By.XPATH,"//h2[text()='Games']/following-sibling::p[following::h2[contains(.,'Support')]]").text
or
para0= row.find_element(By.XPATH,"//h2[starts-with(text(), 'Games')]/following-sibling::p[following::h2[contains(.,'Support')]]").text
from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from csv import writer
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20)
URL = 'https://www.askgamblers.com/online-casinos/reviews/yukon-gold-casino-casino'
driver.get(URL)
data=driver.find_elements(By.XPATH,"//section[@class='review-text richtext']")
for row in data:
try:
para0= row.find_element(By.XPATH,"//h2[text()[contains(.,'Games')]]/following-sibling::p[following::h2[text()[contains(.,'Support')]]]").text
except:
pass
print(para0)
I want they collect the data of Games
only but they also get the data of Virtual Games
so how we restrict the contains method
that get only data of Games
only kindly recommend any solution for that these is page link https://www.askgamblers.com/online-casinos/reviews/yukon-gold-casino-casino
Want these only
[contains(.,'Games')]
will match both Games
and Virtual Games
.
What you can do here is:
- Use
equals
instead ofcontains
, like this:
"[text()='Games']"
- or use
starts-with
:
"[starts-with(text(), 'Games')]"
So this line para0= row.find_element(By.XPATH,"//h2[text()[contains(.,'Games')]]/following-sibling::p[following::h2[text()[contains(.,'Support')]]]").text
can be changed to
para0= row.find_element(By.XPATH,"//h2[text()='Games']/following-sibling::p[following::h2[contains(.,'Support')]]").text
or
para0= row.find_element(By.XPATH,"//h2[starts-with(text(), 'Games')]/following-sibling::p[following::h2[contains(.,'Support')]]").text