How to fix data storing?
Question:
im beginner and I just create my pagination loop for YouTube Data Api search list that return me 100 YouTube search results but when it need to be converted into the Pandas data frame it using only the last part of returned data.
For example if my max results will be 40 (not 50) it will return me only the 30 lines.
Please, how can I fix data storing in my var’s?
#import
from google.colab import auth
auth.authenticate_user()
import gspread
from google.auth import default
creds, _ = default()
gc = gspread.authorize(creds)
!pip install google-api-python-client
from googleapiclient.discovery import build
import pandas as pd
import seaborn as sb
import csv
import re
import requests
import numpy as np
from google.colab import data_table
data_table.enable_dataframe_formatter()
from google.colab import drive
api_key = "***"
from googleapiclient.discovery import build
from pprint import PrettyPrinter
from google.colab import files
youtube = build('youtube','v3',developerKey = api_key)
#print(type(youtube))
pp = PrettyPrinter()
nextPageToken = ''
for x in range(2):
request = youtube.search().list(
q = query,
part='id',
maxResults=50,
order="date",
# publishedAfter='2022-05-09T00:00:00.000Z',
# publishedBefore='2022-07-09T00:00:00.000Z',
pageToken=nextPageToken,
type='video')
print(type(request))
res = request.execute()
pp.pprint(res)
if 'nextPageToken' in res:
nextPageToken = res['nextPageToken']
ids = [item['id']['videoId'] for item in res['items']]
results = youtube.videos().list(id=ids, part='snippet').execute()
for result in results.get('items', []):
print(result ['id'])
print(result ['snippet']['channelTitle'])
print(result ['snippet']['title'])
print(result ['snippet']['description'])
Answers:
Your issue doesn’t seem related to pandas
.
Python keeps out of the for loop scope variables assigned for the last time. This is the reason why
print(result ['id'])
print(result ['snippet']['channelTitle'])
print(result ['snippet']['title'])
print(result ['snippet']['description'])
is only executed 50 times (that is the maxResults
you pass to Search: list and not 100 times (even if you called twice Search: list
).
If you want to call Videos: list with id
s that you just retrieved from Search: list
, then just indent your last code snippet should do the trick. In that way you would have:
ids = [item['id']['videoId'] for item in res['items']]
results = youtube.videos().list(id=ids, part='snippet').execute()
for result in results.get('items', []):
print(result ['id'])
print(result ['snippet']['channelTitle'])
print(result ['snippet']['title'])
print(result ['snippet']['description'])
im beginner and I just create my pagination loop for YouTube Data Api search list that return me 100 YouTube search results but when it need to be converted into the Pandas data frame it using only the last part of returned data.
For example if my max results will be 40 (not 50) it will return me only the 30 lines.
Please, how can I fix data storing in my var’s?
#import
from google.colab import auth
auth.authenticate_user()
import gspread
from google.auth import default
creds, _ = default()
gc = gspread.authorize(creds)
!pip install google-api-python-client
from googleapiclient.discovery import build
import pandas as pd
import seaborn as sb
import csv
import re
import requests
import numpy as np
from google.colab import data_table
data_table.enable_dataframe_formatter()
from google.colab import drive
api_key = "***"
from googleapiclient.discovery import build
from pprint import PrettyPrinter
from google.colab import files
youtube = build('youtube','v3',developerKey = api_key)
#print(type(youtube))
pp = PrettyPrinter()
nextPageToken = ''
for x in range(2):
request = youtube.search().list(
q = query,
part='id',
maxResults=50,
order="date",
# publishedAfter='2022-05-09T00:00:00.000Z',
# publishedBefore='2022-07-09T00:00:00.000Z',
pageToken=nextPageToken,
type='video')
print(type(request))
res = request.execute()
pp.pprint(res)
if 'nextPageToken' in res:
nextPageToken = res['nextPageToken']
ids = [item['id']['videoId'] for item in res['items']]
results = youtube.videos().list(id=ids, part='snippet').execute()
for result in results.get('items', []):
print(result ['id'])
print(result ['snippet']['channelTitle'])
print(result ['snippet']['title'])
print(result ['snippet']['description'])
Your issue doesn’t seem related to pandas
.
Python keeps out of the for loop scope variables assigned for the last time. This is the reason why
print(result ['id'])
print(result ['snippet']['channelTitle'])
print(result ['snippet']['title'])
print(result ['snippet']['description'])
is only executed 50 times (that is the maxResults
you pass to Search: list and not 100 times (even if you called twice Search: list
).
If you want to call Videos: list with id
s that you just retrieved from Search: list
, then just indent your last code snippet should do the trick. In that way you would have:
ids = [item['id']['videoId'] for item in res['items']]
results = youtube.videos().list(id=ids, part='snippet').execute()
for result in results.get('items', []):
print(result ['id'])
print(result ['snippet']['channelTitle'])
print(result ['snippet']['title'])
print(result ['snippet']['description'])