How to scrape data after scrolling the page down, a page which only loads 10 items and then when we scroll down it will add the items
Question:
https://www.fynd.com/brands/
I am trying to scrape the data from this page and get all the data in the title div tag, but there are many title tags as you scroll down. Initially, when we load the page it will only show a few brands and then when we manually scroll down it will keep on adding brands. The below code is what I am using :
from bs4 import BeautifulSoup
from selenium import webdriver
import requests
import pandas as pd
import time
from selenium.common.exceptions import ElementClickInterceptedException
url = "https://www.fynd.com/brands/"
driver = webdriver.Chrome(executable_path ="D:\chromedriver_win32chromedriver.exe")
driver.get(url)
driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
soup = BeautifulSoup(driver.page_source,"html.parser")
title = soup.find_all('span', class_="ukt-title clrWhite")
all_titles = list()
for jelly in title:
all_titles.append(jelly.text.strip())
print(all_titles)
Answers:
Try the below code :
It’s an infinite while loop for list of webelements
. Make sure to have the same indentation :
driver.get("https://www.fynd.com/brands/")
while True:
for item in driver.find_elements(By.XPATH, "//div[@data-cardtype='BRANDS']"):
ActionChains(driver).move_to_element(item).perform()
sleep(0.1)
print(item.text)
You can optimize this script by removing 0.1
, I just put to have a visual experience.
I think you should try this.
driver.get("https://www.fynd.com/brands/")
while True:
for item in driver.find_elements(By.XPATH, "//div[@data-cardtype='BRANDS']"):
ActionChains(driver).move_to_element(item).perform()
sleep(0.5)
print(item.text)
its a little bit different but working
from selenium import webdriver
import chromedriver_autoinstaller
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.common.action_chains import ActionChains
chromedriver_autoinstaller.install()
driver = webdriver.Chrome()
i=0
f = open('yeshivaLinks.txt','w')
driver.get("https://www.yeshiva.org.il/ask/filter")
print (len(driver.find_elements(By.XPATH, '//*[@id="myLessonsScroller"]/ul/content-preview/a')))
for a in driver.find_elements(By.XPATH, '//*[@id="myLessonsScroller"]/ul/content-preview/a'):
print(a.get_attribute('href'))
while True:
for a in driver.find_elements(By.XPATH, '//*[@id="myLessonsScroller"]/ul/content-preview/a'):
ActionChains(driver).move_to_element(a).perform()
print(a.get_attribute('href'))
f.write(a.get_attribute('href')+'n')
i= i+1
if(i == (len(driver.find_elements(By.XPATH, '//*[@id="myLessonsScroller"]/ul/content-preview/a')[i:])-15)):
for i in range(10):
lastHeight = driver.execute_script("return document.body.scrollHeight")
print(lastHeight)
driver.execute_script('window.scrollTo(0, document.body.scrollHeight-50);')
time.sleep(1)
https://www.fynd.com/brands/
I am trying to scrape the data from this page and get all the data in the title div tag, but there are many title tags as you scroll down. Initially, when we load the page it will only show a few brands and then when we manually scroll down it will keep on adding brands. The below code is what I am using :
from bs4 import BeautifulSoup
from selenium import webdriver
import requests
import pandas as pd
import time
from selenium.common.exceptions import ElementClickInterceptedException
url = "https://www.fynd.com/brands/"
driver = webdriver.Chrome(executable_path ="D:\chromedriver_win32chromedriver.exe")
driver.get(url)
driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
soup = BeautifulSoup(driver.page_source,"html.parser")
title = soup.find_all('span', class_="ukt-title clrWhite")
all_titles = list()
for jelly in title:
all_titles.append(jelly.text.strip())
print(all_titles)
Try the below code :
It’s an infinite while loop for list of webelements
. Make sure to have the same indentation :
driver.get("https://www.fynd.com/brands/")
while True:
for item in driver.find_elements(By.XPATH, "//div[@data-cardtype='BRANDS']"):
ActionChains(driver).move_to_element(item).perform()
sleep(0.1)
print(item.text)
You can optimize this script by removing 0.1
, I just put to have a visual experience.
I think you should try this.
driver.get("https://www.fynd.com/brands/")
while True:
for item in driver.find_elements(By.XPATH, "//div[@data-cardtype='BRANDS']"):
ActionChains(driver).move_to_element(item).perform()
sleep(0.5)
print(item.text)
its a little bit different but working
from selenium import webdriver
import chromedriver_autoinstaller
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.common.action_chains import ActionChains
chromedriver_autoinstaller.install()
driver = webdriver.Chrome()
i=0
f = open('yeshivaLinks.txt','w')
driver.get("https://www.yeshiva.org.il/ask/filter")
print (len(driver.find_elements(By.XPATH, '//*[@id="myLessonsScroller"]/ul/content-preview/a')))
for a in driver.find_elements(By.XPATH, '//*[@id="myLessonsScroller"]/ul/content-preview/a'):
print(a.get_attribute('href'))
while True:
for a in driver.find_elements(By.XPATH, '//*[@id="myLessonsScroller"]/ul/content-preview/a'):
ActionChains(driver).move_to_element(a).perform()
print(a.get_attribute('href'))
f.write(a.get_attribute('href')+'n')
i= i+1
if(i == (len(driver.find_elements(By.XPATH, '//*[@id="myLessonsScroller"]/ul/content-preview/a')[i:])-15)):
for i in range(10):
lastHeight = driver.execute_script("return document.body.scrollHeight")
print(lastHeight)
driver.execute_script('window.scrollTo(0, document.body.scrollHeight-50);')
time.sleep(1)