How Does the api.get_retweeter_ids() Actually Work (Tweepy Python)?
Question:
I am really new to twitter api, and I’ve been trying to get a list of IDs of everyone that retweeted a specific tweet.
After several attempts i can’t get the ‘api.get_retweeter_ids’ to get every id. It always seems to get a few. I know there is a limit of 100 per request, but the function just ends there after getting around 50-90 IDs on a tweet with 30k retweets or so.
Here is my code
def get_user_ids_by_retweets(tweetid):
retweeters_ids = []
for i, _id in enumerate(tweepy.Cursor(api.get_retweeter_ids, id=tweetid).items()):
retweeters_ids.append(_id)
print(i, _id)
df = pd.DataFrame(retweeters_ids)
# print(df)
return retweeters_ids
Answers:
Demo for get all of re tweeter user list (name, id and username)
https://twitter.com/Nike/status/1582388225049780230/retweets
code
import tweepy
import json
def get_user_ids_by_retweets(tweet_id):
# get client with token
bearer_token ="*************************"
client = tweepy.Client(bearer_token=bearer_token)
listUser = []
# get first paging retweet users
retweeters = client.get_retweeters(id=tweet_id)
for retweeter in retweeters.data:
listUser.append({
"name": retweeter.name,
"id": retweeter.id,
"username": retweeter.username
})
next_token = retweeters.meta['next_token']
# get til end of paging retweet users
while next_token != None:
retweeters = client.get_retweeters(id=tweet_id, pagination_token=next_token)
if retweeters.data is not None:
for retweeter in retweeters.data:
listUser.append({
"name": retweeter.name,
"id": retweeter.id,
"username": retweeter.username
})
next_token = retweeters.meta['next_token']
else:
next_token = None
return listUser
def obj_dict(obj):
return obj.__dict__
tweet_id="1582388225049780230"
listUser = get_user_ids_by_retweets(tweet_id)
print(json.dumps(listUser, indent=4, default=obj_dict))
Result
[
{
"name": "valmig",
"id": 1594136795905593344,
"username": "AngelVa00615402"
},
{
"name": "Wyatt Jones",
"id": 764734669434871808,
"username": "TheGhostZeus"
},
{
"name": "Prime Projects",
"id": 1603887705242435584,
"username": "PrimeProjects4"
},
... removed
{
"name": "Ryan Maldonado",
"id": 1419009007688224768,
"username": "RyanMal87509518"
},
{
"name": "Jimmy Daugherty",
"id": 20888017,
"username": "JimmyDaugherty"
},
{
"name": "Nike Basketball",
"id": 5885732,
"username": "nikebasketball"
}
]
Main Idea
Get tweeter API return limited number of tweeters with next_token
.
It can be next paging’s tweeter by assign to pagination_token
.
It can be all of tweeter until ‘next_token` is null.
So #1 and #2 get two tweeters with next_token
, those sum tweeters are same as #3 tweeters.
import tweepy
bearer_token ="*************************"
client = tweepy.Client(bearer_token=bearer_token)
tweet_id="1582388225049780230"
print("#1 -------- Get first two tweeter -------------------------")
retweeters = client.get_retweeters(id=tweet_id, max_results=2)
print("#2 -------- Show Meta --------------------")
print(retweeters.meta)
print(" ")
print("#3 -------- print two -------------------------")
for retweeter in retweeters.data:
print(retweeter.name, " -> ",retweeter.id,",",retweeter.username)
print(" ")
print("#4 ---------Get Next two tweeter ---------------------------")
retweeters = client.get_retweeters(id=tweet_id, pagination_token=retweeters.meta['next_token'] ,max_results=2)
print(retweeters.meta)
print(" ")
print("#5 -------- print two -------------------------")
for retweeter in retweeters.data:
print(retweeter.name, " -> ",retweeter.id,",",retweeter.username)
print(" ")
print("#6 --- Get First four tweeter == are same #1 + #2 ---------")
retweeters = client.get_retweeters(id=tweet_id, max_results=4)
print(" ")
print("#7 -------- print four -------------------------")
for retweeter in retweeters.data:
print(retweeter.name, " -> ",retweeter.id,",",retweeter.username)
$ python retweet.py
#1 -------- Get first two tweeter -------------------------
#2 -------- Show Meta --------------------
{'result_count': 2, 'next_token': '7140dibdnow9c7btw4827c3yb0pfg7mg4qq12dn59ot9s'}
#3 -------- print two -------------------------
valmig -> 1594136795905593344 , AngelVa00615402
Wyatt Jones -> 764734669434871808 , TheGhostZeus
#4 ---------Get Next two tweeter ---------------------------
{'result_count': 2, 'next_token': '7140dibdnow9c7btw4827c3nilr9nqckqkuxdzj3u7pkn', 'previous_token': '77qpymm88g5h9vqkluxdnrmaxhecakrtbzn80cd5hizht'}
#5 -------- print two -------------------------
Prime Projects -> 1603887705242435584 , PrimeProjects4
Joshua Paul Hudson -> 847275330 , JoshswiftJoshua
#6 --- Get First four tweeter == are same #1 + #2 ---------
#7 -------- print four -------------------------
valmig -> 1594136795905593344 , AngelVa00615402
Wyatt Jones -> 764734669434871808 , TheGhostZeus
Prime Projects -> 1603887705242435584 , PrimeProjects4
Joshua Paul Hudson -> 847275330 , JoshswiftJoshua
References
I would avoid managing the tokens manually, if not needed. The Paginator
is the tool for it (it’s the API V2 version of the API V1.1 Cursor
that you’ve tried to use). If you are sure that the amount of retweets is covered by the currently available number of requests (default is 100 retweeters per request) then you could try the following (it’s the equivalent to the other answer):
def get_user_ids_by_retweets(tweet_id):
client = tweepy.Client(BEARER_TOKEN, return_type=dict)
return list(tweepy.Paginator(client.get_retweeters, tweet_id).flatten())
If you’re not sure about it but just want to give it a try without loosing any retrieved retweeters, then you could use this variation which catches the resp. tweepy.errors.TooManyRequests
exception:
def get_user_ids_by_retweets(tweet_id):
client = tweepy.Client(BEARER_TOKEN, return_type=dict)
users = []
try:
for page in tweepy.Paginator(client.get_retweeters, tweet_id):
users.extend(page.get("data", []))
except tweepy.errors.TooManyRequests:
print("Too many requests, couldn't retrieve all retweeters.")
return users
If you want to make sure that you get all retweeters, then you could add a waiting period that is tailored to your access level (if you’re using the free version then you should have 75 requests per 15 minutes, i.e. after reaching the limit you need to wait 60 * 15 seconds). Here you need to use the token to re-enter at the point where you left in case the rate limit was reached:
from time import sleep
DURATION = 60 * 15 + 5
def get_user_ids_by_retweets(tweet_id):
client = tweepy.Client(BEARER_TOKEN, return_type=dict)
users, token = [], None
while True:
pages = tweepy.Paginator(
client.get_retweeters, tweet_id, pagination_token=token
)
try:
for page in pages:
users.extend(page.get("data", []))
token = page["meta"].get("next_token", None)
if token is None:
break
except tweepy.errors.TooManyRequests:
print("Request rate limit reached, taking a nap.")
sleep(DURATION)
return users
I am really new to twitter api, and I’ve been trying to get a list of IDs of everyone that retweeted a specific tweet.
After several attempts i can’t get the ‘api.get_retweeter_ids’ to get every id. It always seems to get a few. I know there is a limit of 100 per request, but the function just ends there after getting around 50-90 IDs on a tweet with 30k retweets or so.
Here is my code
def get_user_ids_by_retweets(tweetid):
retweeters_ids = []
for i, _id in enumerate(tweepy.Cursor(api.get_retweeter_ids, id=tweetid).items()):
retweeters_ids.append(_id)
print(i, _id)
df = pd.DataFrame(retweeters_ids)
# print(df)
return retweeters_ids
Demo for get all of re tweeter user list (name, id and username)
https://twitter.com/Nike/status/1582388225049780230/retweets
code
import tweepy
import json
def get_user_ids_by_retweets(tweet_id):
# get client with token
bearer_token ="*************************"
client = tweepy.Client(bearer_token=bearer_token)
listUser = []
# get first paging retweet users
retweeters = client.get_retweeters(id=tweet_id)
for retweeter in retweeters.data:
listUser.append({
"name": retweeter.name,
"id": retweeter.id,
"username": retweeter.username
})
next_token = retweeters.meta['next_token']
# get til end of paging retweet users
while next_token != None:
retweeters = client.get_retweeters(id=tweet_id, pagination_token=next_token)
if retweeters.data is not None:
for retweeter in retweeters.data:
listUser.append({
"name": retweeter.name,
"id": retweeter.id,
"username": retweeter.username
})
next_token = retweeters.meta['next_token']
else:
next_token = None
return listUser
def obj_dict(obj):
return obj.__dict__
tweet_id="1582388225049780230"
listUser = get_user_ids_by_retweets(tweet_id)
print(json.dumps(listUser, indent=4, default=obj_dict))
Result
[
{
"name": "valmig",
"id": 1594136795905593344,
"username": "AngelVa00615402"
},
{
"name": "Wyatt Jones",
"id": 764734669434871808,
"username": "TheGhostZeus"
},
{
"name": "Prime Projects",
"id": 1603887705242435584,
"username": "PrimeProjects4"
},
... removed
{
"name": "Ryan Maldonado",
"id": 1419009007688224768,
"username": "RyanMal87509518"
},
{
"name": "Jimmy Daugherty",
"id": 20888017,
"username": "JimmyDaugherty"
},
{
"name": "Nike Basketball",
"id": 5885732,
"username": "nikebasketball"
}
]
Main Idea
Get tweeter API return limited number of tweeters with next_token
.
It can be next paging’s tweeter by assign to pagination_token
.
It can be all of tweeter until ‘next_token` is null.
So #1 and #2 get two tweeters with next_token
, those sum tweeters are same as #3 tweeters.
import tweepy
bearer_token ="*************************"
client = tweepy.Client(bearer_token=bearer_token)
tweet_id="1582388225049780230"
print("#1 -------- Get first two tweeter -------------------------")
retweeters = client.get_retweeters(id=tweet_id, max_results=2)
print("#2 -------- Show Meta --------------------")
print(retweeters.meta)
print(" ")
print("#3 -------- print two -------------------------")
for retweeter in retweeters.data:
print(retweeter.name, " -> ",retweeter.id,",",retweeter.username)
print(" ")
print("#4 ---------Get Next two tweeter ---------------------------")
retweeters = client.get_retweeters(id=tweet_id, pagination_token=retweeters.meta['next_token'] ,max_results=2)
print(retweeters.meta)
print(" ")
print("#5 -------- print two -------------------------")
for retweeter in retweeters.data:
print(retweeter.name, " -> ",retweeter.id,",",retweeter.username)
print(" ")
print("#6 --- Get First four tweeter == are same #1 + #2 ---------")
retweeters = client.get_retweeters(id=tweet_id, max_results=4)
print(" ")
print("#7 -------- print four -------------------------")
for retweeter in retweeters.data:
print(retweeter.name, " -> ",retweeter.id,",",retweeter.username)
$ python retweet.py
#1 -------- Get first two tweeter -------------------------
#2 -------- Show Meta --------------------
{'result_count': 2, 'next_token': '7140dibdnow9c7btw4827c3yb0pfg7mg4qq12dn59ot9s'}
#3 -------- print two -------------------------
valmig -> 1594136795905593344 , AngelVa00615402
Wyatt Jones -> 764734669434871808 , TheGhostZeus
#4 ---------Get Next two tweeter ---------------------------
{'result_count': 2, 'next_token': '7140dibdnow9c7btw4827c3nilr9nqckqkuxdzj3u7pkn', 'previous_token': '77qpymm88g5h9vqkluxdnrmaxhecakrtbzn80cd5hizht'}
#5 -------- print two -------------------------
Prime Projects -> 1603887705242435584 , PrimeProjects4
Joshua Paul Hudson -> 847275330 , JoshswiftJoshua
#6 --- Get First four tweeter == are same #1 + #2 ---------
#7 -------- print four -------------------------
valmig -> 1594136795905593344 , AngelVa00615402
Wyatt Jones -> 764734669434871808 , TheGhostZeus
Prime Projects -> 1603887705242435584 , PrimeProjects4
Joshua Paul Hudson -> 847275330 , JoshswiftJoshua
References
I would avoid managing the tokens manually, if not needed. The Paginator
is the tool for it (it’s the API V2 version of the API V1.1 Cursor
that you’ve tried to use). If you are sure that the amount of retweets is covered by the currently available number of requests (default is 100 retweeters per request) then you could try the following (it’s the equivalent to the other answer):
def get_user_ids_by_retweets(tweet_id):
client = tweepy.Client(BEARER_TOKEN, return_type=dict)
return list(tweepy.Paginator(client.get_retweeters, tweet_id).flatten())
If you’re not sure about it but just want to give it a try without loosing any retrieved retweeters, then you could use this variation which catches the resp. tweepy.errors.TooManyRequests
exception:
def get_user_ids_by_retweets(tweet_id):
client = tweepy.Client(BEARER_TOKEN, return_type=dict)
users = []
try:
for page in tweepy.Paginator(client.get_retweeters, tweet_id):
users.extend(page.get("data", []))
except tweepy.errors.TooManyRequests:
print("Too many requests, couldn't retrieve all retweeters.")
return users
If you want to make sure that you get all retweeters, then you could add a waiting period that is tailored to your access level (if you’re using the free version then you should have 75 requests per 15 minutes, i.e. after reaching the limit you need to wait 60 * 15 seconds). Here you need to use the token to re-enter at the point where you left in case the rate limit was reached:
from time import sleep
DURATION = 60 * 15 + 5
def get_user_ids_by_retweets(tweet_id):
client = tweepy.Client(BEARER_TOKEN, return_type=dict)
users, token = [], None
while True:
pages = tweepy.Paginator(
client.get_retweeters, tweet_id, pagination_token=token
)
try:
for page in pages:
users.extend(page.get("data", []))
token = page["meta"].get("next_token", None)
if token is None:
break
except tweepy.errors.TooManyRequests:
print("Request rate limit reached, taking a nap.")
sleep(DURATION)
return users