Playwright times out in while-loop

Question:

I am using Playwright with Python to scrape data from an apartment leasing website. The following code handles a pop-up in a while-loop that appears only when an availability date for that unit is some time in the future and asks to "Change Desired Move-in Date." This pop-up only shows up for units not immediately available, so I need my script to handle both cases (when the pop-up appears and when it doesn’t). This is the first time using Playwright.

Here’s the code snippet that handles the pop-up:

def click_change_date_request(page):
    try:
        page.frame_locator('iframe[title="Online Leasing"]').locator(
            "xpath=//h2[contains(@class, 'MuiTypography-h6') and contains(text(), 'Change Desired Move-in   Date')]"
        )
        page.frame_locator('iframe[title="Online Leasing"]').get_by_role("button", name="Yes").click()
        print("Change date button found. Clicking 'Yes'")
    except TimeoutError:
        print("Change date button not found. Continuing...")

On the first iteration, it clicks the pop-up when it appears, but looks for the pop-up again on the second iteration. The problem is that when the pop-up does not appear, the script times out and stops executing. I want the script to continue executing the rest of the code if the pop-up is not found. I am using a try-except block to catch the TimeoutError, but it seems that the script still stops after the timeout occurs.

Here’s the entire script:

from playwright.sync_api import sync_playwright, Error


# Click the "Start" button on the first page when the script executes, if found
def click_start_button(page):
    start_button = page.frame_locator('iframe[title="Online Leasing"]').get_by_role(
        "button", name="Start"
    )
    if start_button:
        print("Start button found. Clicking 'Start'")
        start_button.click()
    else:
        print("Start button not found. Continuing...")


# fmt: off
# Check if the "Change Desired Move-in Date" pop-up window is present, then click "Yes"
def click_change_date_request(page):
    try:
        page.frame_locator('iframe[title="Online Leasing"]').locator(
            "xpath=//h2[contains(@class, 'MuiTypography-h6') and contains(text(), 'Change Desired Move-in Date')]"
        )
        page.frame_locator('iframe[title="Online Leasing"]').get_by_role("button", name="Yes").click()
        print("Change date button found. Clicking 'Yes'")
    except TimeoutError:
        print("Change date button not found. Continuing...")


# Locate the target element on the page
def target_element(page, label):
    target_element = page.frame_locator('iframe[title="Online Leasing"]').locator(
        f"div.d-flex.pb-1.mb-1.border-bottom.border-white.px-2.justify-content-between:has-text('{label}') >> div.fw-bold"
    )
    return target_element


def start_requests(url):
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=False)
        page = browser.new_page()
        page.goto(url)
        click_start_button(page)

        # Enter the iframe
        frame = page.frame_locator('iframe[title="Online Leasing"]').locator(
            "css=button.primary.w-100"
        )

        # Get the total number of buttons in the frame
        total_buttons = len(frame.all())

        processed_buttons = 0

        # Enter the iframe, continue to the next page, then extract the data
        while processed_buttons < total_buttons:
            try:
                page.wait_for_selector('iframe[title="Online Leasing"]')

                button = frame.nth(processed_buttons)

                button_text = button.inner_text()

                if button_text == "Check Availability":
                    print("Check availability button found. Skipping to next...")
                else:
                    button.click()
                    page.frame_locator('iframe[title="Online Leasing"]').get_by_role(
                        "button", name="Continue"
                    ).click()

                    # If a request to change the move-in date is made, click "Yes"
                    click_change_date_request(page)

                    # fmt: off
                    data = {
                        "floor plan": page.frame_locator('iframe[title="Online Leasing"]').locator("h2[role='button']:is([aria-label*='collapse details for floor plan'])").inner_text(),
                        "unit": target_element(page, "Apartment").inner_text(),
                        "floor": target_element(page, "Floor Level").inner_text(),
                        "bedrooms": target_element(page, "Bedrooms").inner_text(),
                        "bathrooms": target_element(page, "Bathrooms").inner_text(),
                        "sqft": target_element(page, "Square Feet").inner_text(),
                        "term": target_element(page, "Term").inner_text(),
                        "availability": target_element(page, "Move-In").inner_text(),
                        "price": target_element(page, "Rent").inner_text(),
                    }

                    print(data)

                    # Click the "Back" button twice to return to the floor plan page
                    page.frame_locator(
                        'iframe[title="Online Leasing"]'
                    ).get_by_role("button", name="Back").click()

                    page.frame_locator(
                        'iframe[title="Online Leasing"]'
                    ).get_by_role("button", name="Back").click()

                processed_buttons += 1
                page.reload()

            except Error as e:
                print(e)


if __name__ == "__main__":
    url = "https://broadwaytowers.com/floor-plans/"
    start_requests(url)

Update (2023-03-24 07:43:00): I tried the code below and found that the problem is when it gets to the ‘change_date_request’ value on the second iteration, it does not find that value and times out. The weird thing is that it never reaches the TimeOutError.

# fmt: off
# Check if the "Change Desired Move-in Date" pop-up window is present, then click "Yes"
def click_change_date_request(page):
    try:
        change_date_request = page.frame_locator('iframe[title="Online Leasing"]').locator(
            "xpath=//h2[contains(@class, 'MuiTypography-h6') and contains(text(), 'Change Desired Move-in Date')]"
        )
        if change_date_request is not None:
            page.frame_locator('iframe[title="Online Leasing"]').get_by_role("button", name="Yes").click()
            print("Change date button found. Clicking 'Yes'")
        else:
            print("Change date button not found. Continuing...")
    except TimeoutError as e:
        print(e)
Asked By: iron502

||

Answers:

Update (2023-03-30 01:55:00): I figured out what the issue was. When you use Playwright’s ‘locator’ method, it returns a locator object, even if the specified element does not exist on the page. This is because the locator object is not the actual element itself, but an object representing a query for an element.

The reason it returns a locator object is that Playwright allows you to perform actions on the locator, like waiting for the element to appear or disappear. The actions are performed on the matching element when it appears on the page, or they will time out if the element does not appear within the specified timeout period.

To check if the element actually exists on the page or not, I used the count() method on the locator object instead of seeing if it evaluated to True. Here’s my updated code:

from playwright.sync_api import sync_playwright, TimeoutError
import logging

# Set up logger at the module level
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

formatter = logging.Formatter(
    "%(asctime)s - %(levelname)s - %(name)s - %(message)s - [Line %(lineno)d]"
)

# Configure file handler
file_handler = logging.FileHandler("scraper.log")
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(formatter)

# Configure stream handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(formatter)

# Add handlers to the logger
logger.addHandler(file_handler)
logger.addHandler(console_handler)


def enter_iframe(page):
    """
    Enters the iframe
    """
    try:
        return page.wait_for_selector("#rp-leasing-widget").content_frame()
    except TimeoutError as e:
        logger.error(f"Error getting iframe: {e}")
        return None

def click_back_button(iframe):
    """
    Clicks the back button
    """
    try:
        # iframe.wait_for_selector("#button.footer-back-button.secondary").click()
        iframe.get_by_role("button", name="Back").click()
    except TimeoutError as e:
        logger.error(f"Error clicking back button: {e}")

def get_target_element(iframe, label):
    """
    Gets the target element that has the data we want
    """
    selector = f"div.d-flex.pb-1.mb-1.border-bottom.border-white.px-2.justify-content-between:has-text('{label}') >> div.fw-bold"
    try:
        return iframe.locator(selector)
    except TimeoutError as e:
        logger.error(f"Error getting target element '{label}': {e}")
        return None

def get_leasing_buttons(iframe):
    """
    Gets leasing buttons on main page to click through
    """
    try:
        return iframe.locator("css=button.primary.w-100")
    except TimeoutError as e:
        logger.error(f"Error getting leasing buttons: {e}")
        return None

def handle_change_date_request(iframe):
    """
    Handles change date request pop-up window
    """
    try:
        change_date_request = iframe.locator(
            "h2.MuiTypography-h6:has-text('Change Desired Move-in Date')"
        )
        change_date_request_count = change_date_request.count()
    except TimeoutError as e:
        logger.error(f"Error handling change date request: {e}")
        return

    if change_date_request_count > 0:
        iframe.get_by_role("button", name="Yes").click()
        logger.info("Change date request window found. Clicking 'Yes'")
    else:
        logger.info("Change date request window not found. Continuing...")

def scrape_floor_plan_data(page):
    """
    Iterates through leasing buttons and scrapes data
    """
    iframe = enter_iframe(page)
    iframe.wait_for_timeout(2000)

    # Check if there is a start button
    start_button = iframe.get_by_role("button", name="Start")
    if start_button:
        print("Start button found. Clicking 'Start'")
        start_button.click()
    else:
        print("Start button not found. Continuing...")

    # Get leasing buttons from main page to iterate through
    leasing_buttons = get_leasing_buttons(iframe)
    total_buttons = len(leasing_buttons.all())
    
    processed_buttons = 0

    while processed_buttons < total_buttons:
        iframe = enter_iframe(page)
        iframe.wait_for_timeout(2000)
        
        # Get current leasing button to click on
        leasing_buttons = get_leasing_buttons(iframe)
        iframe.wait_for_timeout(2000)
        button = leasing_buttons.nth(processed_buttons)
        button_text = button.inner_text()

        # Check if button shows "Check Availability"
        if button_text == "Check Availability":
            logger.info("Check availability button found. Skipping to next or ending if no other leases available...")
        else:
            button.click()
            iframe.wait_for_timeout(2000)

            # Click "Continue" button and go to next page
            iframe = enter_iframe(page)
            iframe.get_by_role("button", name="Continue").click()
            iframe.wait_for_timeout(2000)
            logger.info("Continuing to next page...")

            iframe = enter_iframe(page)
            
            # Check if there is a change date request window
            handle_change_date_request(iframe)

            logger.info("Getting leasing data...")

            data = {
                "floor plan": iframe.locator("h2[role='button']:is([aria-label*='collapse details for floor plan'])").inner_text(),
                "unit": get_target_element(iframe, "Apartment").inner_text(),
                "floor": get_target_element(iframe, "Floor Level").inner_text(),
                "bedrooms": get_target_element(iframe, "Bedrooms").inner_text(),
                "bathrooms": get_target_element(iframe, "Bathrooms").inner_text(),
                "sqft": get_target_element(iframe, "Square Feet").inner_text(),
                "term": get_target_element(iframe, "Term").inner_text(),
                "availability": get_target_element(iframe, "Move-In").inner_text(),
                "price": get_target_element(iframe, "Rent").inner_text(),
            }

            logger.info(f"Floor plan data: {data}")

            click_back_button(iframe)
            iframe.wait_for_timeout(2000)
            click_back_button(enter_iframe(page))
            iframe.wait_for_timeout(2000)

        processed_buttons += 1
        page.reload()

def start_requests(url):
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page()
        page.goto(url)
        logger.info(f"Loading page: {url}")

        scrape_floor_plan_data(page)
        

if __name__ == "__main__":
    url = "https://broadwaytowers.com/floor-plans/"
    start_requests(url)
Answered By: iron502
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.