Having difficulty printing items from different page together
Question:
I’ve written a script in python in combination with selenium to parse the link of different retaurants from it’s landing page and then scrape the name
and address
of each restaurant after navigating to it’s target page. There are few restaurants having green colored Featured
icon attached to their links such as the image below.
What I want to do is scrape that information from the landing page (whether a restaurant is featured)
but print that very information along with name
and address
when my browser is at target page.
How can I print the name
, address
and whether a restaurant is Featured
at the same in my current print
command?
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def fetch_info(driver,link):
driver.get(link)
itemlinks = [item.get_attribute("href") for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"a.restaurant-header")))]
for itemlink in itemlinks:
driver.get(itemlink)
name = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.name"))).text
address = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,".address-text-rest-menu span"))).text
print(f'{name}n{address}')
if __name__ == '__main__':
url = "https://eatstreet.com/madison-wi/restaurants"
driver = webdriver.Chrome()
wait = WebDriverWait(driver,10)
try:
fetch_info(driver,url)
finally:
driver.quit()
Expected results (Featured
is available in the landing page):
Doughboy's Pizza - Cottage Grove
447 W. Cottage Grove Rd Cottage Grove WI, 53527
Not Featured
Silver Mine Subs - Beltline
2601 W Beltline Hwy Madison WI, 53713
Not Featured
Adamah Neighborhood Table
611 Langdon St Madison WI, 53703
Featured
One such Featured
icon attached to some links in landing page.
Answers:
Something like as follows? I have parsed the required info into a list you can then loop and navigate to as required. Print at the page if you want etc.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import re
url = 'https://eatstreet.com/madison-wi/restaurants'
d = webdriver.Chrome()
d.get(url)
featured = ['featured' if re.search('ng-if="::restaurant.featured"',ad.get_attribute('innerHTML')) is not None else 'No' for ad in WebDriverWait(d,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".rest-list-information")))]
titles = [[title.text, title.get_attribute('href')] for title in d.find_elements_by_css_selector(".rest-list-information a")]
results = list(zip(titles,featured))
for result in results:
# if result[1] == 'featured':
# print(result[0][1]) #navigate if required etc
print(result[0][0], result[1])
#d.get(result[0][1]) ##do what you want here
You should look for the div that contains both the restaurant link and the related Featured button, instead of just the restaurant link:
<div class="rest-list-information">
<a href="/madison-wi/restaurants/adamah-neighborhood-table-madison">Adamah Neighborhood Table</a>
<div class="featured-border featured-border--green featured-border-left" style="">
<span>Featured</span>
</div>
</div>
This way you can get the two related items, restaurant name and Featured button.
Note: This is not tested. I don’t remember very well Selenium/Python syntax, but it should give you a start.
restaurants = [driver.find_elements(By.CLASS_NAME, "rest-list-information")]
for restaurant in restaurants
restaurant_name = restaurant.get_attribute('href').text
try:
featured = name.find_element(By.CSS_SELECTOR, "div[class*='featured-border--green']").text
except:
featured = "No"
If you want to print name along with Featured (if it’s found), try
def fetch_info(driver,link):
driver.get(link)
items = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"a.restaurant-header")))
featured = []
for item in items:
try:
item.find_element_by_xpath('./following-sibling::div//span[.="Featured"]')
featured.append('Featured')
except:
featured.append('Not featured')
itemlinks = [item.get_attribute("href") for item in items]
for itemlink, is_featured in zip(itemlinks, featured):
driver.get(itemlink)
name = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.name"))).text
print(f'{name}n{is_featured}')
I would do the following way instead:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def fetch_info(driver,link):
driver.get(link)
all_items = []
for item in WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".rest-list-information > .information"))):
item_link = item.find_element(By.CSS_SELECTOR, "a.restaurant-header").get_attribute("href")
try:
featured = item.find_element(By.CSS_SELECTOR, ".featured-container[ng-if='::restaurant.featured']").text
except Exception: featured = "Not Featured"
all_items.append([item_link,featured])
for container in all_items:
driver.get(container[0])
name = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.name"))).text
address = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,".address-text-rest-menu span"))).text
featured = container[1]
print(name,address,featured)
if __name__ == '__main__':
url = "https://eatstreet.com/madison-wi/restaurants"
driver = webdriver.Chrome()
try:
fetch_info(driver,url)
finally:
driver.quit()
I’ve written a script in python in combination with selenium to parse the link of different retaurants from it’s landing page and then scrape the name
and address
of each restaurant after navigating to it’s target page. There are few restaurants having green colored Featured
icon attached to their links such as the image below.
What I want to do is scrape that information from the landing page (whether a restaurant is featured)
but print that very information along with name
and address
when my browser is at target page.
How can I print the name
, address
and whether a restaurant is Featured
at the same in my current print
command?
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def fetch_info(driver,link):
driver.get(link)
itemlinks = [item.get_attribute("href") for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"a.restaurant-header")))]
for itemlink in itemlinks:
driver.get(itemlink)
name = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.name"))).text
address = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,".address-text-rest-menu span"))).text
print(f'{name}n{address}')
if __name__ == '__main__':
url = "https://eatstreet.com/madison-wi/restaurants"
driver = webdriver.Chrome()
wait = WebDriverWait(driver,10)
try:
fetch_info(driver,url)
finally:
driver.quit()
Expected results (Featured
is available in the landing page):
Doughboy's Pizza - Cottage Grove
447 W. Cottage Grove Rd Cottage Grove WI, 53527
Not Featured
Silver Mine Subs - Beltline
2601 W Beltline Hwy Madison WI, 53713
Not Featured
Adamah Neighborhood Table
611 Langdon St Madison WI, 53703
Featured
One such Featured
icon attached to some links in landing page.
Something like as follows? I have parsed the required info into a list you can then loop and navigate to as required. Print at the page if you want etc.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import re
url = 'https://eatstreet.com/madison-wi/restaurants'
d = webdriver.Chrome()
d.get(url)
featured = ['featured' if re.search('ng-if="::restaurant.featured"',ad.get_attribute('innerHTML')) is not None else 'No' for ad in WebDriverWait(d,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".rest-list-information")))]
titles = [[title.text, title.get_attribute('href')] for title in d.find_elements_by_css_selector(".rest-list-information a")]
results = list(zip(titles,featured))
for result in results:
# if result[1] == 'featured':
# print(result[0][1]) #navigate if required etc
print(result[0][0], result[1])
#d.get(result[0][1]) ##do what you want here
You should look for the div that contains both the restaurant link and the related Featured button, instead of just the restaurant link:
<div class="rest-list-information">
<a href="/madison-wi/restaurants/adamah-neighborhood-table-madison">Adamah Neighborhood Table</a>
<div class="featured-border featured-border--green featured-border-left" style="">
<span>Featured</span>
</div>
</div>
This way you can get the two related items, restaurant name and Featured button.
Note: This is not tested. I don’t remember very well Selenium/Python syntax, but it should give you a start.
restaurants = [driver.find_elements(By.CLASS_NAME, "rest-list-information")]
for restaurant in restaurants
restaurant_name = restaurant.get_attribute('href').text
try:
featured = name.find_element(By.CSS_SELECTOR, "div[class*='featured-border--green']").text
except:
featured = "No"
If you want to print name along with Featured (if it’s found), try
def fetch_info(driver,link):
driver.get(link)
items = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"a.restaurant-header")))
featured = []
for item in items:
try:
item.find_element_by_xpath('./following-sibling::div//span[.="Featured"]')
featured.append('Featured')
except:
featured.append('Not featured')
itemlinks = [item.get_attribute("href") for item in items]
for itemlink, is_featured in zip(itemlinks, featured):
driver.get(itemlink)
name = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.name"))).text
print(f'{name}n{is_featured}')
I would do the following way instead:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def fetch_info(driver,link):
driver.get(link)
all_items = []
for item in WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".rest-list-information > .information"))):
item_link = item.find_element(By.CSS_SELECTOR, "a.restaurant-header").get_attribute("href")
try:
featured = item.find_element(By.CSS_SELECTOR, ".featured-container[ng-if='::restaurant.featured']").text
except Exception: featured = "Not Featured"
all_items.append([item_link,featured])
for container in all_items:
driver.get(container[0])
name = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.name"))).text
address = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,".address-text-rest-menu span"))).text
featured = container[1]
print(name,address,featured)
if __name__ == '__main__':
url = "https://eatstreet.com/madison-wi/restaurants"
driver = webdriver.Chrome()
try:
fetch_info(driver,url)
finally:
driver.quit()