How can I crawl the product items from shopee website?

Question:

I try to use python to get the product information like Name and Price.
But this time doesn’t work, even I check the html code via web-browser programmer mode to get the class name and try to use this name to get anything what i want.

But I got the result like that, I cannot find any the items of "class_="col-xs-2-4 shopee-search-item-result__item", should I add more header information?

the print result

import requests
import re
import pandas as pd
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import json

url = 'https://shopee.tw/shop/1819984/search?shopCollection=9271157'
headers = {
'Host': 'shopee.tw',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0',
'Cookie':'SPC_IA=-1; SPC_EC=-; SPC_F=L07IMDECRHjifEKyg7XuNCJ00GNdJGTA; REC_T_ID=246cfcdc-18fa-11ea-b254-f8f21e2be0b8; SPC_T_ID="Fyr1skVDq7FDiJOuTYHBmMfMr2Cw1eZyPbYJhBYoRmf/gvfvkOf5zgjIVXLrYYlg32aSx1PfmhWq7QsQzwM86mdeXG8VU7ERK4N+gfPFd14="; SPC_U=-; SPC_T_IV="/oJN8EB7iQwg7+n5mXd6cw=="; _gcl_au=1.1.788704691.1575727322; _fbp=fb.1.1575727322914.443117835; _ga=GA1.2.1422761069.1575727324; __BWfp=c1575727332595xf5a099d8b; cto_lwid=7ea874b3-f31f-47d7-aef9-60eed0156d33; cto_bundle=0tgQ7V9rU3JlRTU4aWlTc09JNXRaN014Y3ZXa1BtVVcwT2RhOU1UZ0tweUFvWUo2WHRPQjd0JTJCM1duaG5iWXFFRWxpbHZkTFluWUZLSEFudTFreGJueFoxU0EyanhnMWN6ZEVIUVV6cFlhd050emhFMWQ4bmhVelZwVSUyRmwwQUp5c29lOEhPT2ZobE10S1dvT09HYWNhVXV1YWx5R3dSOGw0MHcwZWpiZ2pXU2VHSzdrJTNE; _med=refer; G_ENABLED_IDPS=google; fbm_382498665271383=base_domain=.shopee.tw; SPC_SI=jq6hwq6ju6hig9hfulumcagdqaiopatc; _gid=GA1.2.143857303.1577796150; csrftoken=3Pya3o5WYEvhLOj9FqCqbV3angfwBlko; AMP_TOKEN=%24NOT_FOUND; _dc_gtm_UA-61915057-6=1'
}

r = requests.get(url,headers=headers,allow_redirects=True)
print(r.status_code)
print(r.history)
print(r.url)

soup = BeautifulSoup(r.text, 'html.parser')
items = soup.find_all("div", class_="col-xs-2-4 shopee-search-item-result__item")
print(len(items))
```**strong text**
Asked By: Frederic Chang

||

Answers:

This page uses JavaScript to display items but BeautifulSoup/requests can’t run JavaScipt.

Using DevTools in Firefox/Chrome (tab "Network") I found url used by JavaScript to get data from server as JSON so it doesn’t even need BeautifulSoup.

To work correctly it needs all theses headers.

Without User-Agent and X-Requested-With it sends empty data.
Without Referer it doesn’t send prices.

import requests

url = 'https://shopee.tw/api/v2/search_items/?by=pop&limit=30&match_id=1819984&newest=0&order=desc&page_type=shop&shop_categoryids=9271157&version=2'

headers = {
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0',
    'X-Requested-With': 'XMLHttpRequest',
    'Referer': 'https://shopee.tw/shop/1819984/search?shopCollection=9271157',
}    

r = requests.get(url, headers=headers)

data = r.json()

#print(data['items'][0].keys())

for item in data['items']:
    print('name:', item['name'])
    print('prince:', item['price'])
    print('sold:', item['historical_sold'])
    print('---')

#print(data['items'][0]) # for test only 

Result:

name: 『現貨+預購』 Balea 精華膠囊 7 入
prince: 4900000
sold: 5104
---
name:  現貨供應  德國 Invisibobble 神奇魔髮圈流線魔髮圈
prince: 7500000
sold: 26
---

BTW: for test to see all values you can use json to format it with indentations

import json

print(json.dumps(data['items'][0], indent=4))

Result:

{
    "itemid": 1212735748,
    "welcome_package_info": null,
    "liked": false,
    "recommendation_info": null,
    "bundle_deal_info": null,
    "price_max_before_discount": -1,
    "image": "338673ff6f2b23d63514e5af85269d46",
    "is_cc_installment_payment_eligible": false,
    "shopid": 1819984,
    "can_use_wholesale": true,
    "group_buy_info": null,
    "reference_item_id": "",
    "currency": "TWD",
    "raw_discount": null,
    "show_free_shipping": false,
    "video_info_list": [],
    "ads_keyword": null,
    "collection_id": null,
    "images": [
        "338673ff6f2b23d63514e5af85269d46"
    ],
    "match_type": null,
    "price_before_discount": 0,
    "is_category_failed": false,
    "show_discount": 0,
    "cmt_count": 306,
    "view_count": 93,
    "display_name": null,
    "catid": 67,
    "json_data": null,
    "upcoming_flash_sale": null,
    "is_official_shop": false,
    "brand": "Dm Ebelin",
    "price_min": 4900000,
    "liked_count": 136,
    "can_use_bundle_deal": false,
    "show_official_shop_label": false,
    "coin_earn_label": null,
    "price_min_before_discount": -1,
    "cb_option": 0,
    "sold": 0,
    "deduction_info": null,
    "stock": 3647,
    "status": 1,
    "price_max": 4900000,
    "add_on_deal_info": null,
    "is_group_buy_item": null,
    "flash_sale": null,
    "price": 4900000,
    "shop_location": "u53f0u4e2du5e02u6f6du5b50u5340",
    "item_rating": {
        "rating_star": 4.996732,
        "rating_count": [
            306,
            0,
            0,
            0,
            1,
            305
        ],
        "rcount_with_image": 11,
        "rcount_with_context": 139
    },
    "show_official_shop_label_in_title": false,
    "tier_variations": [],
    "is_adult": null,
    "discount": null,
    "flag": 65536,
    "is_non_cc_installment_payment_eligible": false,
    "has_lowest_price_guarantee": false,
    "has_group_buy_stock": false,
    "preview_info": null,
    "welcome_package_type": 0,
    "name": "u300eu73feu8ca8+u9810u8cfcu300f Balea u7cbeu83efu81a0u56ca 7 u5165",
    "distance": null,
    "adsid": null,
    "ctime": 1527866201,
    "wholesale_tier_list": [
        {
            "min_count": 150,
            "price": 4700000,
            "max_count": 300
        },
        {
            "min_count": 301,
            "price": 4600000,
            "max_count": 1000
        },
        {
            "min_count": 1001,
            "price": 4500000,
            "max_count": null
        }
    ],
    "show_shopee_verified_label": false,
    "campaignid": null,
    "show_official_shop_label_in_normal_position": null,
    "item_status": "normal",
    "shopee_verified": false,
    "hidden_price_display": null,
    "size_chart": null,
    "item_type": 0,
    "shipping_icon_type": null,
    "campaign_stock": null,
    "label_ids": [],
    "service_by_shopee_flag": 0,
    "badge_icon_type": 0,
    "historical_sold": 5104,
    "transparent_background_image": ""
}
Answered By: furas