Python – How to overcome cookies expiration API data extraction?

Question:

I am scraping this website https://www.woolworths.com.au/shop/browse/pet/dog-puppy basically it is more of a API data extraction because all the data coming from the POST request. Script is working but I have to update cookies manually every hour which is not suitable so I am looking for a work around to avoid cookie expiration or update cookie some how. Here is the code:

import httpx
import pandas as pd
import math
import re
from datetime import datetime
from datetime import date

now = datetime.now()
today = date.today()

TAG_RE = re.compile(r"<[^>]+>")


def remove_tags(text):
    return TAG_RE.sub(" ", text)


class WoolsWorthScraper:

    all_info = []

    cookies = {
        "_abck": "1AF9FA9968986E01D95DE635CE5CA49A~0~YAAQxKwwF4/SWjCGAQAAKl1xbwk/ATaJRzCaF4YKQ1lcfE4ZbDlWge8dxv5TNcUiQxDjsod+ZRyl0Z22ciftnXmNrmKNDj6gG7GkdAMvW7tJNTdSA3mpw3BJad78c8gWEi7xF7gmDdPmvDZcpBvT68TE8xi5YS+Y/o7+nnwbwmxRN6sHuTFQ3Mxr08gmypK0p7UXYKX5w8wigTHaIkUKl6GBTc3eVRBz87wXz6VSvhMgu3lsAyqX+hbQwmSPmlbnKeZHYRpnmcO7mK/apgy0lLtHJrISokAIBIPMT2Ocq/yuZG6zjGrzAMzoH8D2g4JhY6TUXLz94DMH/nMDX+4JnxAL+iqxWp1T5oAb/Z4YeH6l7x9ZARUzQfFGkHR3GC+XNzLWTaYYc2ernQF1WH6jyVo+HKkBcShBkUJ4VitieA==~-1~-1~-1",
        "AMCV_4353388057AC8D357F000101%40AdobeOrg": "870038026%7CMCIDTS%7C19408%7CMCMID%7C39487458205068416855238848785487881282%7CMCOPTOUT-1676913803s%7CNONE%7CvVersion%7C5.0.0",
        "ai_user": "fL4KLTsKGOkSYGQVYJwdIm|2023-02-17T10:51:04.008Z",
        "utag_main": "v_id:01865effc7310001995bec5e16700504600370090086e",
        "AKA_A2": "A",
        "akaalb_woolworths.com.au": "~op=www_woolworths_com_au_ZoneB:PROD-ZoneB|www_woolworths_com_au_BFF_MEL_Launch:WOW-BFF-MEL|~rv=57~m=PROD-ZoneB:0|WOW-BFF-MEL:0|~os=43eb3391333cc20efbd7f812851447e6~id=590a8239185204a5780d732b4c9292bd",
        "ak_bmsc": "CD2C640DE68D42B2754BD2302BC2A37C~000000000000000000000000000000~YAAQxKwwFxGlWjCGAQAAhixsbxJUZ8zXdaHVjTY1WO1sXuLqJXIDKRZL9aAbj0FinAr1ldLs7cMxdizMJT1xbbY5bO3780nEgbXnLd15tdyhxY+RSbZK9AyRmB6wNQO6MAykh9/POXagsbq1qC6ssFfWTYpB55o8FWbh5ksJoXWiYoHrgXpVDI1P6s4Sg6vvt5N123DjwHWKp5aaP9qsRApj8F5b+YMXM5LxWxhczsj4s2IttET1qFP7EZ6qG9N/5HPL6BfrSwY8XChmWWjgUw8tZO6fSp5tDn6gAi16kikTaZfIFPmHAt9MZPBae5i/kOrosUt5DOe7EQJko31jHinK55rPmC59QBF7JjO9kp4DkpQ15sMvWW8Lwvxj7KU7dnMAgmob3WoK0PZx3I4Fa8g1AAiBMB0IKbOGjU9qmDpW/cbwwdCE0NTTsIIm/CECEK0qbutJra1ufQDH95brcwf5phlpqOT+ioED+JjMPx2MAm8X/xJKnhvR2B5eY1REZUuTokKbBYKKDY8MuSABQ+c1iDu+fg==",
        "bm_sz": "4EFADCBD9227B61CBF2058518F420B83~YAAQxKwwFzSkWjCGAQAAZx9sbxL7XiBGlPH7LjomD0zmd16wBZq+n/3CeaS3Mfp3Y8afqdHE3/DXVElgVcJY2BgO5O7wRh09eQxdGkOWsb3W+H7vTLUsBlLp7tfJos+LD7CJEzFdjznn6Me849d8nOHeo1oXWh++oS1MQvbizyhVJljH8Dk+QfXDGmn1TT8bCiI6eoQsXD/NScXqTMoHgvzHTZVYgXNGiBku8YNdqisNqFEU7OKwr43tH41OZPv8qYAoKaN4rbiChlbM+ADUhxgW7Y+DO0jqC0vxlZqtc0iKKVmdhsNkbWTi~4538673~3753012",
        "dtCookie": "v_4_srv_-2D23_sn_4S74KMN57JFMT0RRTPLB6J1VIJGF2ARD",
        "rxVisitor": "1676906602556SGGP1KLPFDLV7SQNQEJDFR2CERF411V0",
        "dtPC": "-23$506602547_710h1vCHCJHUMUHKLVGLEONNHBUCFQJHBAIIGD-0e0",
        "rxvt": "1676908402560|1676906602560",
        "INGRESSCOOKIE": "1676906603.861.44.354018|37206e05370eb151ee9f1b6a1c80a538",
        "at_check": "true",
        "mbox": "session#503c621d6b474fe78bd8b03aefb53740#1676908464",
        "w-rctx": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYmYiOjE2NzY5MDY2MDIsImV4cCI6MTY3NjkxMDIwMiwiaWF0IjoxNjc2OTA2NjAyLCJpc3MiOiJXb29sd29ydGhzIiwiYXVkIjoid3d3Lndvb2x3b3J0aHMuY29tLmF1Iiwic2lkIjoiMCIsInVpZCI6IjdiMGI2NzBkLWNlNzQtNDRjOS1hNDZjLWNlNjA1NTdlOTgyYyIsIm1haWQiOiIwIiwiYXV0IjoiU2hvcHBlciIsImF1YiI6IjAiLCJhdWJhIjoiMCIsIm1mYSI6IjEifQ.PqCTLDVRHjNI6tIitPL2cMM49KKiGpTtndpKQnWdiQ9IBT-yt3R7TXRvy_mYD7Pwl1SeiIc_opXlK8Wz5X7Obiz6ZmyF4qLgCwZTrDmss8RXrEADSMdOSTrAfUh4fGvc71YOFJpXlxQDDCZJ0F69wK7ihd7gEBTC8gK3PoEJ8ZJukZ-AC27_23Y6ZsTgFqcMrObcJzxEmVOoLiRnJsgTnWe5Bn-bMF_IZ7k5cXlBZavB-nsVcu_WokOsmu3USnqiO6YhbtJSe6Xt7g7WqbY3o6-1AhdEkFwyTG_lOz1Ffu-NzIOozRp_Dmf0yXjgofRVgeMYC9bVipCUCH4MYq5G9A",
        "wow-auth-token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYmYiOjE2NzY5MDY2MDIsImV4cCI6MTY3NjkxMDIwMiwiaWF0IjoxNjc2OTA2NjAyLCJpc3MiOiJXb29sd29ydGhzIiwiYXVkIjoid3d3Lndvb2x3b3J0aHMuY29tLmF1Iiwic2lkIjoiMCIsInVpZCI6IjdiMGI2NzBkLWNlNzQtNDRjOS1hNDZjLWNlNjA1NTdlOTgyYyIsIm1haWQiOiIwIiwiYXV0IjoiU2hvcHBlciIsImF1YiI6IjAiLCJhdWJhIjoiMCIsIm1mYSI6IjEifQ.PqCTLDVRHjNI6tIitPL2cMM49KKiGpTtndpKQnWdiQ9IBT-yt3R7TXRvy_mYD7Pwl1SeiIc_opXlK8Wz5X7Obiz6ZmyF4qLgCwZTrDmss8RXrEADSMdOSTrAfUh4fGvc71YOFJpXlxQDDCZJ0F69wK7ihd7gEBTC8gK3PoEJ8ZJukZ-AC27_23Y6ZsTgFqcMrObcJzxEmVOoLiRnJsgTnWe5Bn-bMF_IZ7k5cXlBZavB-nsVcu_WokOsmu3USnqiO6YhbtJSe6Xt7g7WqbY3o6-1AhdEkFwyTG_lOz1Ffu-NzIOozRp_Dmf0yXjgofRVgeMYC9bVipCUCH4MYq5G9A",
        "prodwow-auth-token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYmYiOjE2NzY5MDY2MDIsImV4cCI6MTY3NjkxMDIwMiwiaWF0IjoxNjc2OTA2NjAyLCJpc3MiOiJXb29sd29ydGhzIiwiYXVkIjoid3d3Lndvb2x3b3J0aHMuY29tLmF1Iiwic2lkIjoiMCIsInVpZCI6IjdiMGI2NzBkLWNlNzQtNDRjOS1hNDZjLWNlNjA1NTdlOTgyYyIsIm1haWQiOiIwIiwiYXV0IjoiU2hvcHBlciIsImF1YiI6IjAiLCJhdWJhIjoiMCIsIm1mYSI6IjEifQ.PqCTLDVRHjNI6tIitPL2cMM49KKiGpTtndpKQnWdiQ9IBT-yt3R7TXRvy_mYD7Pwl1SeiIc_opXlK8Wz5X7Obiz6ZmyF4qLgCwZTrDmss8RXrEADSMdOSTrAfUh4fGvc71YOFJpXlxQDDCZJ0F69wK7ihd7gEBTC8gK3PoEJ8ZJukZ-AC27_23Y6ZsTgFqcMrObcJzxEmVOoLiRnJsgTnWe5Bn-bMF_IZ7k5cXlBZavB-nsVcu_WokOsmu3USnqiO6YhbtJSe6Xt7g7WqbY3o6-1AhdEkFwyTG_lOz1Ffu-NzIOozRp_Dmf0yXjgofRVgeMYC9bVipCUCH4MYq5G9A",
        "bm_sv": "6D916601877CE397455B41021E7D90B0~YAAQxKwwF8fPWjCGAQAA4i9xbxJMcZZX3SRiJc8H/2OWxVd6CQKk49gEl0O0wT3mv4+D6A9wsdylfS0Y+a8L3oq1HcOqqjcvYU2Q69nMyy5p47DMz3Y4LOTax4rtpeUyPdIBnepo4hvgW7IpZflzwEtZ7wGZlcGTt07hQYIq7y3h89qLI2WJI/qlneu5K86vwFZoo/ifvURyN/omDAT7B18VdC/VY2LOPr5OMuZ9zjcwqd19kpuDWiOUbduqD3HG2qyoscaDLQ==~1",
        "bm_mi": "7BC42F9FC8843FEAE717820962D55720~YAAQxKwwF3WkWjCGAQAAHSJsbxJWLCaVDLVlZUXANvKNhiit4WdPAGQCneOh1lvEi3vhSkbYt8C8J2AvBiXarO3BLab8YBDQBlZMRXGCuBFOr1a4kFeC1MFzs2YE7u60q8MitSBSNsJ3yQDL73Jr4Mxd8K48NuB5nETVz/tJ0zeLL2aIKTnPXwBCe93mrtR0VS2gRm7hw7JxVHB1R6Pvkph9Kt1H2TkIhpsHvMiG3JpPxoUjtFb1XKWcoXy0hahc+oyqRzS4M690sU6r9U8my5d3vy7WXpJR6Kk/ypsTh8f/RZCwAraNSJpCjdmDvZm2VQgRdPeKGkAxQBjHOX2mqWWCrC9F~1",
        "AMCVS_4353388057AC8D357F000101%40AdobeOrg": "1",
        "fullstoryEnabled": "false",
        "ai_session": "8N6Low4Vw9N3j4n63nJUgM|1676906605359|1676906930898",
    }

    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0",
        "Accept": "application/json, text/plain, */*",
        "Accept-Language": "en-US,en;q=0.5",
        # 'Accept-Encoding': 'gzip, deflate, br',
        "Content-Type": "application/json",
        "Request-Id": "|ff65d6a2b5ef40deba161436fc928041.6fdc6ae6448243a2",
        "Request-Context": "appId=cid-v1:4601595d-64c0-46e0-be60-45622438acb3",
        "traceparent": "00-ff65d6a2b5ef40deba161436fc928041-6fdc6ae6448243a2-01",
        "Origin": "https://www.woolworths.com.au",
        "Connection": "keep-alive",
        "Referer": "https://www.woolworths.com.au/shop/browse/pet/dog-puppy?pageNumber=2",
        # "Cookie": "_abck=1AF9FA9968986E01D95DE635CE5CA49A~0~YAAQxKwwF9AxTzCGAQAAxiD3bAmsSwB2l0fu0Qkxwjxnj0eBYY2KO8HjhItiu5sN7xFgjkvdQqKgyv/hu4VkBsWJ3oYiyNXI14J3VvZGyn8YlAXhwkpxUFARbsS77w/DPoYunsl9ebanNTa5tkHlGnljdRYEP3t/wbKcd/nsI4HMtUFPK6ue8otsWnYwo1Bh36es48ACGX36BGjeA499YjAIltTnyPWNVRWm6QtaGoxoU2mixG6z2Z5Lk+GoizGD7EwOeYR5kXJTFSC+OhfiQmGOLafeGxFYrrw7yqiy2PCRQlcq2/uTk1LSaTFkBifkg8UVXHraNU6s0gZbtDRhaix8+ioePm0gsd3IQDsFM5HwLS2AtyLaT9B+QdlydT50nft3XKx2gOXXBTXqEiXdRCVTLrrlpVXzvoMuPvRNaA==~-1~-1~-1; AMCV_4353388057AC8D357F000101%40AdobeOrg=870038026%7CMCIDTS%7C19408%7CMCMID%7C39487458205068416855238848785487881282%7CMCOPTOUT-1676872577s%7CNONE%7CvVersion%7C5.0.0; ai_user=fL4KLTsKGOkSYGQVYJwdIm|2023-02-17T10:51:04.008Z; utag_main=v_id:01865effc7310001995bec5e16700504600370090086e; AKA_A2=A; akaalb_woolworths.com.au=~op=www_woolworths_com_au_BFF_MEL:WOW-BFF-MEL|www_woolworths_com_au_ZoneB:PROD-ZoneB|www_woolworths_com_au_BFF_MEL_Launch:WOW-BFF-MEL|~rv=38~m=PROD-ZoneB:0|WOW-BFF-MEL:0|~os=43eb3391333cc20efbd7f812851447e6~id=994ae2ad8d40ae4f894208c8efa8f90b; ak_bmsc=66320F9DDA68208B9C0D55A35DCB1E9C~000000000000000000000000000000~YAAQxKwwF8kxTzCGAQAAjRz3bBJVwgNzhzJMpljtUkZOOQeE+27f7XbTT9ERG1i7v88IxKavwFLbIeq3sYSIvpgsMxN5oS/ZpPGz46kuEEHSI1t6RSCSjBRJG1O0pxwlhXCKmwup688hxpv0aBM+fPfXSVbh5VJhenskXxcHHnyBQju3rFwLfPDzA0VuoEt9Nu5esXFBci+C+ZQ5TCCoUFoWqPi77a0hR43VmaoTnnPQHnxuUbQMN68MT0+HdgEEKDos8h887II1whD69+vIei9yDQFh/BJ8pLXSijwY7uJveNXS9iO/oUfzu3pSOhiFBssHijHtMwuAC1HG9OSPhAg/huTbTubMengbhfNa/q2+Q/JJnGo6Tiz84dltIYcnr6TVzVVdFwyRjQkr5TVa9RsMxhVzfFzpZP1mk0Ya5hAfjV9qzGIiO4EMi7HtD7yOqSL+z8wpgdB+OJPCSdWG83LV3l1frAKwgW2MccKQOmcBTSHE0UvazB6LGOJNhTqD3lqYNIsvcdvFTDjbbIHCoxT9tOo22A==; bm_sz=2BE74D2F94FD38AA4F1E55F6C1F9188D~YAAQxKwwF70xTzCGAQAA+RD3bBKyUXhOFZJhgV4xph4IyzASZFnO7x0YSadZ/ShjLYP8dZgY3quZjGYLMTExGlimRcHMNYN4vOrIet4GQrogX8VjrQO4w8a7oTGJAbEsRoblyp9rm/0f2fmYIfVHEKZ/zbAHMsNgwpXV+bavPMAT6HS6bk3AFWT1OFYwWQjqwWvXCI1PRTXrCTV0gwqZavteliAcnE2o+mLHvM+xmTIK4H0LAz4PREpgrutV4xdKJdPqjWDmza/0nkHbl8ZAYXjWJaxZwzhzhGiWBnoloK0eXQjJXWS030hm~4473668~4405558; dtCookie=v_4_srv_-2D56_sn_9E6PQ066TSE6586TQN5508G53GNOV6JD; rxVisitor=16768653768100HJM7JQLRTRONBMC8TQOR7A4LNIEL8GJ; dtPC=-56$465376777_250h1vVVUHQOATEPUJHRKQFCCCKRJPMTHKRECQ-0e0; rxvt=1676867176818|1676865376818; INGRESSCOOKIE=1676865377.813.45.481628|37206e05370eb151ee9f1b6a1c80a538; at_check=true; mbox=session#7f5b89582cf94c6f80497a78f463dff4#1676867240; w-rctx=eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYmYiOjE2NzY4NjUzNzcsImV4cCI6MTY3Njg2ODk3NywiaWF0IjoxNjc2ODY1Mzc3LCJpc3MiOiJXb29sd29ydGhzIiwiYXVkIjoid3d3Lndvb2x3b3J0aHMuY29tLmF1Iiwic2lkIjoiMCIsInVpZCI6IjI4ZTcwYjVlLTcxNDAtNGZhMy05Y2E2LTc2YzFlYWVjZTgxYiIsIm1haWQiOiIwIiwiYXV0IjoiU2hvcHBlciIsImF1YiI6IjAiLCJhdWJhIjoiMCIsIm1mYSI6IjEifQ.UEK0YeJ96xgcYOkQUPhlpEssxXDujbZatvUKtFJ9HK7zn3IFeOxRfZ5pb5UOTsXH4-6FoEB5YK3fqJuF4Xq88C-0T_XiuClaUGL5fIU3E8iOxQYgfMBJT5pMlYqZ5v6mD2V9DIjfF8Np15nIbPdxW_imN7BXhI6Fa7RNI5xow3SkxMzpiDiD_SgHjfZKPq-ifcpZHKbUSotpXKjRlHCQC8MM1fwMX4v1FPepI8r0YM4_ZZCeidJsHpoZPqhqWq_n822s_Ubmoi-fAJ3nQa_pUR3O20HmwOhmVaUvkqDIahYugDbbmF7bDojDawU4YP8qm2uf_immjaS-FbkHU1k9mg; wow-auth-token=eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYmYiOjE2NzY4NjUzNzcsImV4cCI6MTY3Njg2ODk3NywiaWF0IjoxNjc2ODY1Mzc3LCJpc3MiOiJXb29sd29ydGhzIiwiYXVkIjoid3d3Lndvb2x3b3J0aHMuY29tLmF1Iiwic2lkIjoiMCIsInVpZCI6IjI4ZTcwYjVlLTcxNDAtNGZhMy05Y2E2LTc2YzFlYWVjZTgxYiIsIm1haWQiOiIwIiwiYXV0IjoiU2hvcHBlciIsImF1YiI6IjAiLCJhdWJhIjoiMCIsIm1mYSI6IjEifQ.UEK0YeJ96xgcYOkQUPhlpEssxXDujbZatvUKtFJ9HK7zn3IFeOxRfZ5pb5UOTsXH4-6FoEB5YK3fqJuF4Xq88C-0T_XiuClaUGL5fIU3E8iOxQYgfMBJT5pMlYqZ5v6mD2V9DIjfF8Np15nIbPdxW_imN7BXhI6Fa7RNI5xow3SkxMzpiDiD_SgHjfZKPq-ifcpZHKbUSotpXKjRlHCQC8MM1fwMX4v1FPepI8r0YM4_ZZCeidJsHpoZPqhqWq_n822s_Ubmoi-fAJ3nQa_pUR3O20HmwOhmVaUvkqDIahYugDbbmF7bDojDawU4YP8qm2uf_immjaS-FbkHU1k9mg; prodwow-auth-token=eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYmYiOjE2NzY4NjUzNzcsImV4cCI6MTY3Njg2ODk3NywiaWF0IjoxNjc2ODY1Mzc3LCJpc3MiOiJXb29sd29ydGhzIiwiYXVkIjoid3d3Lndvb2x3b3J0aHMuY29tLmF1Iiwic2lkIjoiMCIsInVpZCI6IjI4ZTcwYjVlLTcxNDAtNGZhMy05Y2E2LTc2YzFlYWVjZTgxYiIsIm1haWQiOiIwIiwiYXV0IjoiU2hvcHBlciIsImF1YiI6IjAiLCJhdWJhIjoiMCIsIm1mYSI6IjEifQ.UEK0YeJ96xgcYOkQUPhlpEssxXDujbZatvUKtFJ9HK7zn3IFeOxRfZ5pb5UOTsXH4-6FoEB5YK3fqJuF4Xq88C-0T_XiuClaUGL5fIU3E8iOxQYgfMBJT5pMlYqZ5v6mD2V9DIjfF8Np15nIbPdxW_imN7BXhI6Fa7RNI5xow3SkxMzpiDiD_SgHjfZKPq-ifcpZHKbUSotpXKjRlHCQC8MM1fwMX4v1FPepI8r0YM4_ZZCeidJsHpoZPqhqWq_n822s_Ubmoi-fAJ3nQa_pUR3O20HmwOhmVaUvkqDIahYugDbbmF7bDojDawU4YP8qm2uf_immjaS-FbkHU1k9mg; bm_sv=A384BB54394BFFBBF9277F772238C50F~YAAQxKwwFwsyTzCGAQAAWkX3bBJJoDs/gJ3TG6zMp3HVW2g4HQ5+8iD06O4UWV6ZYHK5Nd00Q20lrEutmUYcZSV07OuemApEpZ+25As+xUEhAUcoh1JCsrkjbsdBMUQdlBj5LNR0WaR76d4aLEAszXmBGDkrZmK98Q7OpYDMmfyaff8fw6u/qI9MBYUBAwuRFxR+xo1kX8gdKE0FiFYHGoyB2FA8iMP4MYzKGH/xegz+C7Ei271MNXB+crWZOORODApP0kDLhQ==~1; bm_mi=80BA8C6828A8644CDA09DB11ECB2499B~YAAQxKwwF8ExTzCGAQAA6hP3bBL0glag+IIL0mU0/EVmDU10GjOmkUqLhxwefohLPypRGAkN0BATviz0N8DW0x78quux78pOX1xFPZn5zf3ISMnxvgl6aBy6qjmcCrsgXtM6dIUmRruZm0ipb/Ef2e0v50SKsgiPlZEdbbPF7YK5Ywd8u6Hd+ER6p4dfwC+KfinYHfaAg7E60tN8l2YHlwAVI8CwoXoB5oN2Qkodt/dQRG6fX5CUVYd09x4CKF2CTi5PZr+8lBwPuNAgW+0tWU1hFspAbRgTpa84Z//zEZnqMPUAgxprfJUipIOtt7FfibG7TNLgmphBdctA6LV72JEY/RQb~1; AMCVS_4353388057AC8D357F000101%40AdobeOrg=1; ai_session=uEFiMV9iVA98/Qzf+BLd+R|1676865379549|1676865379549; fullstoryEnabled=false",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "Sec-GPC": "1",
        # Requests doesn't support trailers
        # 'TE': 'trailers',
    }

    dog_json_data = {
        "categoryId": "1_EF205FA",
        "pageNumber": 1,
        "pageSize": 36,
        "sortType": "TraderRelevance",
        "url": "/shop/browse/pet/dog-puppy?pageNumber=1",
        "location": "/shop/browse/pet/dog-puppy?pageNumber=1",
        "formatObject": '{"name":"Dog & Puppy"}',
        "isSpecial": False,
        "isBundle": False,
        "isMobile": True,
        "filters": [],
        "token": "",
        "enableGp": False,
        "isHideUnavailableProducts": False,
    }

    cat_json_data = {
        "categoryId": "1_1969229",
        "pageNumber": 2,
        "pageSize": 36,
        "sortType": "TraderRelevance",
        "url": "/shop/browse/pet/cat-kitten?pageNumber=2",
        "location": "/shop/browse/pet/cat-kitten?pageNumber=2",
        "formatObject": '{"name":"Cat & Kitten"}',
        "isSpecial": False,
        "isBundle": False,
        "isMobile": True,
        "filters": [],
        "token": "",
        "enableGp": False,
        "isHideUnavailableProducts": False,
    }

    list_json_data = [dog_json_data, cat_json_data]

    base_url = "https://www.woolworths.com.au/apis/ui/browse/category"

    def return_json_data(self):
        for json_data in self.list_json_data:
            return json_data

    def fetch(self, url):
        print(f"HTTP POST request to URL: {url}", end="n")
        with httpx.Client(headers=self.headers) as client:
            for json_data in self.list_json_data:
                resp = client.post(
                    self.base_url,
                    cookies=self.cookies,
                    json=json_data,
                    timeout=40,
                )
                print(f" | Status Code: {resp.status_code}")
                return resp

    def pagination(self, response):
        json_blob = response.json()
        products = json_blob["Bundles"]
        total_items = json_blob["TotalRecordCount"]
        total_pages = round(math.ceil(total_items / len(products)))
        for json_data in self.list_json_data:
            for page_no in range(1, total_pages + 1):
                json_data["pageNumber"] = page_no
                print(
                    f"HTTP POST request page {page_no}",
                    end="n",
                )
                with httpx.Client(headers=self.headers) as client:
                    resp = client.post(
                        self.base_url,
                        cookies=self.cookies,
                        json=json_data,
                        timeout=40,
                    )
                    self.parse(resp)

    def parse(self, response):
        products = response.json()["Bundles"]
        for prod in products:
            item = {}
            product = prod["Products"][0]
            item["Scraped_Date"] = now.strftime("%m/%d/%Y, %H:%M:%S").split(",")[0]
            item["Scraped_Time"] = now.strftime("%m/%d/%Y, %H:%M:%S").split(",")[1]
            item["Stock_Code"] = product["Stockcode"]
            item["Product_Name"] = product["Name"]
            item["Product_Category"] = (
                product["AdditionalAttributes"]["piescategorynamesjson"]
                .strip("][")
                .strip('"')
            )
            item["Sub_Category"] = (
                product["AdditionalAttributes"]["piessubcategorynamesjson"]
                .strip("][")
                .strip('"')
            )
            item["Brand"] = product["Brand"]
            item["Price/100g"] = product["CupPrice"]
            item["Price"] = product["Price"]
            item["Was_Price"] = product["WasPrice"]
            item["Save"] = product["SavingsAmount"]
            item["Size"] = product["PackageSize"]
            try:
                item["Description"] = remove_tags(
                    product["AdditionalAttributes"]["description"]
                    .replace("r", "")
                    .replace("n", "")
                    .strip()
                )
            except:
                item["Description"] = "N/A"
            item["Ingredients"] = product["AdditionalAttributes"]["ingredients"]
            item["Availability"] = (
                "InStock" if product["IsAvailable"] else "Out of Stock"
            )
            item["Image"] = product["LargeImageFile"]

            self.all_info.append(item)

    def to_csv(self):
        df = pd.DataFrame(self.all_info).fillna("N/A")

        df.to_csv(f"woolsworth.csv", index=False)

        print('Stored results to "woolsworth.csv"')

    def run(self):

        init_response = self.fetch(self.base_url)

        self.pagination(init_response)
        self.to_csv()


if __name__ == "__main__":
    scraper = WoolsWorthScraper()
    scraper.run()

After every hour I have to copy the curl the curl request from developer tools and copy the cookies to make it work. Is there any way or work around this manual cookie copy pasting?

Asked By: X-somtheing

||

Answers:

You could run real browser via playwright or selenium to get cookies from there and then continue parsing using requests

Here is example how to get cookies of given website using playwright:

from playwright.sync_api import sync_playwright

def get_site_cookies(url: str) -> dict:
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=False)
        page = browser.new_page()
        page.goto(url)
        cookies_list = page.context.cookies()
        cookies_dict = {el['name']: el['value'] for el in cookies_list}
        browser.close()

    return cookies_dict

url = 'https://www.woolworths.com.au/shop/browse/pet/dog-puppy'
print(get_site_cookies(url))

This code will print something like this:

{'AKA_A2': 'A', 'bm_sz': 'D9250E4256DA8BEC219C6350D3972AFD~....
Answered By: Alex Kosh
Categories: questions Tags: ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.