I want to use boto3 in async function, python
Question:
I am developing web-scraper in playwright and want to upload images to aws-s3 asynchronouslly.
but boto3 is not an async function.. how to fix it?
class Boto3:
def __init__(self, key, id):
self.S3 = boto3.client('s3', aws_access_key_id=aws_key_id, aws_secret_access_key=aws_secret)
def upload_stream(self, stream, bucket_name, key):
self.S3.put_object(Body=stream, Bucket=bucket_name, Key=key)
...
...
class Scraper:
def __init__(self, key, id):
self.S3 = boto3.client('s3', aws_access_key_id=id, aws_secret_access_key=key)
asnyc _save_image(res):
buffer = await res.body()
# S3.put_object is not an async function.
self.S3.put_object(
Body=buffer,
Bucket=bucket_name,
Key=bucket_key,
)
async def scrape():
playwright = await async_playwright().start()
browser = await playwright.chromium.launch( headless = True, devtools = False )
page = browser.new_page()
page.on('response', _save_image)
await page.goto('https://www.example.com')
scraper = Scraper(key, id)
asyncio.run(scraper.scrape())
self.S3.put_object : this func is not an asnyc so want to change it with in async-version.
How to fix it?
Thanks in advance.
Answers:
How to fix it?
You can’t, as boto3
is not async. At best you can try a third party, non-AWS library, such as aioboto3 in place of boto3
.
I would suggest using run_in_executor and partial
from functools import partial
class Scraper:
def __init__(self, key, id):
self.S3 = boto3.client('s3', aws_access_key_id=id, aws_secret_access_key=key)
asnyc _save_image(res):
buffer = await res.body()
loop = asyncio.get_event_loop()
put_object_partial = partial(
self.S3.put_object,
Body=buffer,
Bucket=bucket_name,
Key=bucket_key
)
await loop.run_in_executor(None, put_object_partial)
...
Python 3.7+
You can use asgiref for such a things. It can convert sync function to async and backwards.
from asgiref.sync import sync_to_async
@sync_to_async
def _save_image(res):
buffer = await res.body()
# S3.put_object is not an async function.
self.S3.put_object(
Body=buffer,
Bucket=bucket_name,
Key=bucket_key,
)
# And then you can call it as async
async def main():
await _save_image(res)
I am developing web-scraper in playwright and want to upload images to aws-s3 asynchronouslly.
but boto3 is not an async function.. how to fix it?
class Boto3:
def __init__(self, key, id):
self.S3 = boto3.client('s3', aws_access_key_id=aws_key_id, aws_secret_access_key=aws_secret)
def upload_stream(self, stream, bucket_name, key):
self.S3.put_object(Body=stream, Bucket=bucket_name, Key=key)
...
...
class Scraper:
def __init__(self, key, id):
self.S3 = boto3.client('s3', aws_access_key_id=id, aws_secret_access_key=key)
asnyc _save_image(res):
buffer = await res.body()
# S3.put_object is not an async function.
self.S3.put_object(
Body=buffer,
Bucket=bucket_name,
Key=bucket_key,
)
async def scrape():
playwright = await async_playwright().start()
browser = await playwright.chromium.launch( headless = True, devtools = False )
page = browser.new_page()
page.on('response', _save_image)
await page.goto('https://www.example.com')
scraper = Scraper(key, id)
asyncio.run(scraper.scrape())
self.S3.put_object : this func is not an asnyc so want to change it with in async-version.
How to fix it?
Thanks in advance.
How to fix it?
You can’t, as boto3
is not async. At best you can try a third party, non-AWS library, such as aioboto3 in place of boto3
.
I would suggest using run_in_executor and partial
from functools import partial
class Scraper:
def __init__(self, key, id):
self.S3 = boto3.client('s3', aws_access_key_id=id, aws_secret_access_key=key)
asnyc _save_image(res):
buffer = await res.body()
loop = asyncio.get_event_loop()
put_object_partial = partial(
self.S3.put_object,
Body=buffer,
Bucket=bucket_name,
Key=bucket_key
)
await loop.run_in_executor(None, put_object_partial)
...
Python 3.7+
You can use asgiref for such a things. It can convert sync function to async and backwards.
from asgiref.sync import sync_to_async
@sync_to_async
def _save_image(res):
buffer = await res.body()
# S3.put_object is not an async function.
self.S3.put_object(
Body=buffer,
Bucket=bucket_name,
Key=bucket_key,
)
# And then you can call it as async
async def main():
await _save_image(res)