Having difficulty printing items from different page together

Question

I’ve written a script in python in combination with selenium to parse the link of different retaurants from it’s landing page and then scrape the name and address of each restaurant after navigating to it’s target page. There are few restaurants having green colored Featured icon attached to their links such as the image below.

Link to the landing page

What I want to do is scrape that information from the landing page (whether a restaurant is featured) but print that very information along with name and address when my browser is at target page.

How can I print the name, address and whether a restaurant is Featured at the same in my current print command?

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def fetch_info(driver,link):
    driver.get(link)
    itemlinks = [item.get_attribute("href") for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"a.restaurant-header")))]

    for itemlink in itemlinks:
        driver.get(itemlink)
        name = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.name"))).text
        address = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,".address-text-rest-menu span"))).text

        print(f'{name}n{address}')

if __name__ == '__main__':
    url = "https://eatstreet.com/madison-wi/restaurants"
    driver = webdriver.Chrome()
    wait = WebDriverWait(driver,10)
    try:
        fetch_info(driver,url)
    finally:  
        driver.quit()

Expected results (Featured is available in the landing page):

Doughboy's Pizza - Cottage Grove
447 W. Cottage Grove Rd Cottage Grove WI, 53527
Not Featured

Silver Mine Subs - Beltline
2601 W Beltline Hwy Madison WI, 53713
Not Featured

Adamah Neighborhood Table
611 Langdon St Madison WI, 53703
Featured

One such Featured icon attached to some links in landing page.

Asked By: SIM

||

Source

Answer 1

Something like as follows? I have parsed the required info into a list you can then loop and navigate to as required. Print at the page if you want etc.

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import re

url = 'https://eatstreet.com/madison-wi/restaurants'
d  = webdriver.Chrome()
d.get(url)
featured = ['featured' if re.search('ng-if="::restaurant.featured"',ad.get_attribute('innerHTML')) is not None else 'No' for ad in WebDriverWait(d,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".rest-list-information")))]
titles = [[title.text, title.get_attribute('href')] for title in d.find_elements_by_css_selector(".rest-list-information a")]
results = list(zip(titles,featured))
for result in results:
#     if result[1] == 'featured':
#         print(result[0][1]) #navigate if required etc
    print(result[0][0], result[1])
    #d.get(result[0][1])  ##do what you want here

Answered By: QHarr

Answer 2

You should look for the div that contains both the restaurant link and the related Featured button, instead of just the restaurant link:

<div class="rest-list-information">
  <a href="/madison-wi/restaurants/adamah-neighborhood-table-madison">Adamah Neighborhood Table</a>
  <div class="featured-border featured-border--green featured-border-left" style="">
    <span>Featured</span>
  </div>
</div>

This way you can get the two related items, restaurant name and Featured button.

Note: This is not tested. I don’t remember very well Selenium/Python syntax, but it should give you a start.

restaurants = [driver.find_elements(By.CLASS_NAME, "rest-list-information")]

for restaurant in restaurants
 restaurant_name = restaurant.get_attribute('href').text
 try:
  featured = name.find_element(By.CSS_SELECTOR, "div[class*='featured-border--green']").text
 except:
  featured = "No"

Answered By: return

Answer 3

If you want to print name along with Featured (if it’s found), try

def fetch_info(driver,link):
    driver.get(link)
    items = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"a.restaurant-header")))
    featured = []
    for item in items:
        try:
            item.find_element_by_xpath('./following-sibling::div//span[.="Featured"]')
            featured.append('Featured')
        except:
            featured.append('Not featured')
    itemlinks = [item.get_attribute("href") for item in items]

    for itemlink, is_featured in zip(itemlinks, featured):
        driver.get(itemlink)
        name = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.name"))).text

        print(f'{name}n{is_featured}')

Answered By: Andersson

Answer 4

I would do the following way instead:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def fetch_info(driver,link):
    driver.get(link)
    all_items = []
    for item in WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".rest-list-information > .information"))):
        item_link = item.find_element(By.CSS_SELECTOR, "a.restaurant-header").get_attribute("href")
        
        try:
            featured =  item.find_element(By.CSS_SELECTOR, ".featured-container[ng-if='::restaurant.featured']").text
        except Exception: featured = "Not Featured"
        
        all_items.append([item_link,featured])
    
    for container in all_items:
        driver.get(container[0])
        name = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.name"))).text
        address = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,".address-text-rest-menu span"))).text
        featured = container[1]
        print(name,address,featured)


if __name__ == '__main__':
    url = "https://eatstreet.com/madison-wi/restaurants"
    driver = webdriver.Chrome()
    try:
        fetch_info(driver,url)
    finally:  
        driver.quit()

Answered By: MITHU

Having difficulty printing items from different page together

Question:

Answers: