Python, inability to calculate the remaining number of days with Schedule correctly and to obtain a dataframe related to it

Question:

from time import sleep
import os
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import schedule
import time
import pandas as pd
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

options = uc.ChromeOptions()
options.add_argument('--blink-settings=imagesEnabled=false') # disable images for loading of page faster
options.add_argument('--disable-notifications')
prefs = {"profile.default_content_setting_values.notifications" : 2}
options.add_experimental_option("prefs",prefs)
driver = uc.Chrome(options=options)
from math import ceil

tracking_days = 1
tracking_period = ['10:16','10:17','10:18']  # Add additional tracking times here
completed_days = 0
price_data = []
day_price_data = []  # Move day_price_data outside of track_product function

def get_price():
    try:
        price_element = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, 'offering-price')))
        price = float(price_element.find_element_by_xpath('./span[1]').text + '.' + price_element.find_element_by_xpath('./span[2]').text)
        return price
    except:
        return None

def get_time():
    return datetime.now()

def track_product():
    global current_price, day_price_data

    current_time = datetime.now()
    print(f"Time just before tracking_period: {current_time}")
    if current_time.strftime('%H:%M') in tracking_period:
        driver.get('https://www.hepsiburada.com/pinar-tam-yagli-sut-4x1-lt-pm-zypinar153100004')
        current_price = get_price()

        if current_price is not None:
            day_price_data.append([get_time(), current_price])

def job():
    global completed_days, day_price_data, price_data

    track_product()

    completed_days += 1

    if completed_days == tracking_days * len(tracking_period):
        remaining_days = 0
        print(f"Remaining days: {remaining_days}")
        print("Product tracking finished.")
    else:
        day_in_tracking_period = completed_days % len(tracking_period)

        completed_periods = completed_days // len(tracking_period)

        remaining_periods = (tracking_days - completed_periods) * len(tracking_period) - day_in_tracking_period

        remaining_days = ceil(remaining_periods / len(tracking_period))

        print(f"Waiting for the next tracking period... Remaining days: {remaining_days}")

    if day_in_tracking_period == 0:
        day_price_df = pd.DataFrame(day_price_data, columns=['date', 'price'])
        day_price_df['date'] = pd.to_datetime(day_price_df['date'])
        price_data.append(day_price_df)
        day_price_data = []

for time_str in tracking_period:
    if ':' in time_str:
        hour, minute = time_str.split(':')
    else:
        hour, minute = time_str, '00'
    print("time_str:", time_str)
    schedule.every().day.at(f"{hour}:{minute}").do(job)

print("Schedule started...")
while completed_days < tracking_days * len(tracking_period):
    if datetime.now().strftime('%H:%M') >= tracking_period[-1]:
        print("Product tracking finished for today.")
        break
    schedule.run_pending()
    time.sleep(1)

Hello friends, I am trying to create a generic piece of code that will get a price for a product on an e-commerce site and collect it in a data frame called price_data on the desired days and hours. However, I cannot collect the prices in the price_data data frame because I cannot calculate the number of days remaining in the program correctly. I just want the program to run so that the tracking_period consists of 3 fixed hours ([‘hour1:min1,’hour2:min2′,’hour3:min3’]) so that the code is not too complicated. So the program will only get the price in the given 3 hours and this will not change. The only parameter that can change will be the number of days. For example: tracking_days = 1/2/10/365 etc. But even if the tracking_days changes, it should be able to calculate the remaining days correctly and collect the prices correctly in price_data.
How can I get an accurate result like the image below? (Correct Result)

Incorrect and Correct Results For tracking days=1 as compared:

enter image description here

Correct results I would expect if tracking days=2:

enter image description here

Asked By: murat taşçı

||

Answers:

Please add comments if there is anything missing/wrong and needs to be better.

from time import sleep
import os
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import schedule
import time
import pandas as pd
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

options = uc.ChromeOptions()
options.add_argument('--blink-settings=imagesEnabled=false') # disable images for loading of page faster
options.add_argument('--disable-notifications')
prefs = {"profile.default_content_setting_values.notifications" : 2}
options.add_experimental_option("prefs",prefs)
driver = uc.Chrome(options=options)




tracking_days = 2
tracking_period = ['20:55', '20:58', '21:00']

completed_days = 0
tracking_count = 0
day_price_data = []
price_data = []

def get_price():
    try:
        price_element = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, 'offering-price')))
        price = float(price_element.find_element_by_xpath('./span[1]').text + '.' + price_element.find_element_by_xpath('./span[2]').text)
        return price
    except:
        return None

def get_time():
    return datetime.now()
    
    
def remaining_days_str():
    remaining_days = tracking_days - completed_days - 1
    return str(remaining_days)


def job():
    global completed_days, tracking_count
    
    current_time = datetime.now()
    print(f"Time just before tracking_period: {current_time}")
    if current_time.strftime('%H:%M') in tracking_period:
        driver.get('https://www.hepsiburada.com/pinar-tam-yagli-sut-4x1-lt-pm-zypinar153100004')
        current_price = get_price()

        if current_price is not None:
            day_price_data.append([get_time(), current_price])
            print(f"Price added at {get_time()}: {current_price}")
    
    tracking_count += 1
    print(f"Remaining days: {remaining_days_str()}")

    if tracking_count == len(tracking_period):
        completed_days += 1
        tracking_count = 0
        print(f"Day {completed_days} completed.")

    if completed_days == tracking_days:
        print("Product tracking finished.")
        return schedule.CancelJob



for time_str in tracking_period:
    if ':' in time_str:
        hour, minute = time_str.split(':')
    else:
        hour, minute = time_str, '00'
    print("time_str:", time_str)
    schedule.every().day.at(f"{hour}:{minute}").do(job)

print("Schedule started...")
while completed_days < tracking_days:
    schedule.run_pending()
    print("Waiting for the next job...")
    time.sleep(50)  
print("before df")
day_price_df = pd.DataFrame(day_price_data, columns=['date', 'price'])
day_price_df['date'] = pd.to_datetime(day_price_df['date'])
price_data.append(day_price_df)

for 1 period 2 day:

enter image description here

for 3 period 2 day: (not waited along 2 days)
enter image description here

for 3 period 2 day: (waited along 2 days)
enter image description here

In addition, I ran the code for 1 day, 2 and 3 periods. The results are correct.

Answered By: murat taşçı