Get and print meta content from a website
Question:
How do I get and print the meta content?
Like the profile picture(avator circle)title, username(subTitle), description(des), followers(val), following(val), instagram(profile-link_media), website(profile-link_href) and list of video urls if available.
Code:
from bs4 import BeautifulSoup
import requests
url = "https://likee.video/@HouseofB"
req = requests.get(url)
soup = BeautifulSoup(req.content, "html.parser")
print(soup)
I am trying to do something like:
from bs4 import BeautifulSoup
import requests
url = "https://likee.video/@HouseofB"
req = requests.get(url)
soup = BeautifulSoup(req.content, "html.parser")
des = soup.find('meta', attrs={'name':'description'})
print(des)
But I can’t quite get it to work getting the information as sorted above mentioned.
I want to print it like this:
Profile picture: url
User: HouseofBrooklyn
Userid: @HouseofB
Followers: 8.6M
Likes: 64.0M
Instagram: url
Website: url
Videos:
list of urls
In advance, thank you for your kind help at teaching me!
Answers:
A metta tag will typically look like
<meta name="description" content="description info"/>
You should try to grab the content attribute.
des = soup.find('meta', attrs={'name':'description'})
print(des['content'])
You can get the desired data from API.As data is loaded dynamically by JS via API which is POST
method.
Example:
import requests
payload = {"likeeId":"HouseofB"}
headers= {
'content-type': 'application/json'}
api_url='https://api.like-video.com/likee-activity-flow-micro/official_website/WebView/getProfileDetail'
req=requests.post(api_url,headers=headers,json=payload).json()
print(req)
out=[]
out.append({
'User Name':req['data']['userinfo']['user_name'],
'Followers': req['data']['userinfo']['fansCount'],
'Link':req['data']['userinfo']['link'],
'Likes':req['data']['userinfo']['allLikeCount'],
'img_link':req['data']['userinfo']['data1']
})
print(out)
building on @F.Hoque ‘s answer.
This code makes an extra POST
request for videos
import requests
likeeID = "HouseofB"
detail_api = "https://api.like-video.com/likee-activity-flow-micro/official_website/WebView/getProfileDetail"
videos_api = "https://api.like-video.com/likee-activity-flow-micro/videoApi/getUserVideo"
# Details Fetch
headers = {'content-type': 'application/json'}
payload = {'likeeId': likeeID}
details = requests.post(detail_api, headers=headers, json=payload).json()['data']['userinfo']
# Videos Fetch
payload = {"uid": details['uid'], "lastPostId": "", "tabType": 0, "count": 99}
videos_res = requests.post(videos_api, headers=headers, json=payload).json()
videos = [i['videoUrl'] for i in videos_res['data']['videoList']]
# Merge
details = {
"Profile Picture": details['data1'], # Profile Picture URL
"User": details['nick_name'], # Name
"UserID": f"@{details['user_name']}", # username/likeeID
"Followers": details['fansCount'], # exact count
"Likes": details['allLikeCount'], # exact count
"Website": details['link'], # website
"Videos": videos
}
from pprint import pprint
pprint(details)
How do I get and print the meta content?
Like the profile picture(avator circle)title, username(subTitle), description(des), followers(val), following(val), instagram(profile-link_media), website(profile-link_href) and list of video urls if available.
Code:
from bs4 import BeautifulSoup
import requests
url = "https://likee.video/@HouseofB"
req = requests.get(url)
soup = BeautifulSoup(req.content, "html.parser")
print(soup)
I am trying to do something like:
from bs4 import BeautifulSoup
import requests
url = "https://likee.video/@HouseofB"
req = requests.get(url)
soup = BeautifulSoup(req.content, "html.parser")
des = soup.find('meta', attrs={'name':'description'})
print(des)
But I can’t quite get it to work getting the information as sorted above mentioned.
I want to print it like this:
Profile picture: url
User: HouseofBrooklyn
Userid: @HouseofB
Followers: 8.6M
Likes: 64.0M
Instagram: url
Website: url
Videos:
list of urls
In advance, thank you for your kind help at teaching me!
A metta tag will typically look like
<meta name="description" content="description info"/>
You should try to grab the content attribute.
des = soup.find('meta', attrs={'name':'description'})
print(des['content'])
You can get the desired data from API.As data is loaded dynamically by JS via API which is POST
method.
Example:
import requests
payload = {"likeeId":"HouseofB"}
headers= {
'content-type': 'application/json'}
api_url='https://api.like-video.com/likee-activity-flow-micro/official_website/WebView/getProfileDetail'
req=requests.post(api_url,headers=headers,json=payload).json()
print(req)
out=[]
out.append({
'User Name':req['data']['userinfo']['user_name'],
'Followers': req['data']['userinfo']['fansCount'],
'Link':req['data']['userinfo']['link'],
'Likes':req['data']['userinfo']['allLikeCount'],
'img_link':req['data']['userinfo']['data1']
})
print(out)
building on @F.Hoque ‘s answer.
This code makes an extra POST
request for videos
import requests
likeeID = "HouseofB"
detail_api = "https://api.like-video.com/likee-activity-flow-micro/official_website/WebView/getProfileDetail"
videos_api = "https://api.like-video.com/likee-activity-flow-micro/videoApi/getUserVideo"
# Details Fetch
headers = {'content-type': 'application/json'}
payload = {'likeeId': likeeID}
details = requests.post(detail_api, headers=headers, json=payload).json()['data']['userinfo']
# Videos Fetch
payload = {"uid": details['uid'], "lastPostId": "", "tabType": 0, "count": 99}
videos_res = requests.post(videos_api, headers=headers, json=payload).json()
videos = [i['videoUrl'] for i in videos_res['data']['videoList']]
# Merge
details = {
"Profile Picture": details['data1'], # Profile Picture URL
"User": details['nick_name'], # Name
"UserID": f"@{details['user_name']}", # username/likeeID
"Followers": details['fansCount'], # exact count
"Likes": details['allLikeCount'], # exact count
"Website": details['link'], # website
"Videos": videos
}
from pprint import pprint
pprint(details)