uplad file to google drive with progress bar with python requests
Question:
This is my code for uploading to google drive with python requests using google-drive-api.
import sys
import json
import requests
from tqdm import tqdm
import requests_toolbelt
from requests.exceptions import JSONDecodeError
class ProgressBar(tqdm):
def update_to(self, n: int) -> None:
self.update(n - self.n)
def upload_file(access_token:str, filename:str, filedirectory:str):
metadata = {
"title": filename,
}
files = {}
session = requests.session()
with open(filedirectory, "rb") as fp:
files["file"] = fp
files["data"] = ('metadata', json.dumps(metadata), 'application/json')
encoder = requests_toolbelt.MultipartEncoder(files)
with ProgressBar(
total=encoder.len,
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
file=sys.stdout,
) as bar:
monitor = requests_toolbelt.MultipartEncoderMonitor(
encoder, lambda monitor: bar.update_to(monitor.bytes_read)
)
r = session.post(
"https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart",
data=monitor,
allow_redirects=False,
headers={"Authorization": "Bearer " + access_token},
)
try:
resp = r.json()
print(resp)
except JSONDecodeError:
sys.exit(r.text)
upload_file("access_token", "test.txt", "test.txt")
When i am trying send file with data attribute in post request then file name did not send and with files attribute in post request then requests-toolbelt not working. How to fix this error ?
Answers:
Metadata needs to be sent in the post body as json.
data = Optional. A dictionary, list of tuples, bytes or a file object to send to the specified url
json = Optional. A JSON object to send to the specified url
metadata = {
"name": filename,
}
r = session.post(
url,
json=json.dumps(metadata),
allow_redirects=False,
headers={"Authorization": "Bearer " + access_token},
)
When I saw your script, I thought that the content type is not included in the request header. In this case, I think that the request body is directly shown in the uploaded file. I thought that this might be the reason for your current issue. In order to remove this issue, how about the following modification?
From:
r = session.post(
url,
data=monitor,
allow_redirects=False,
headers={"Authorization": "Bearer " + access_token},
)
To:
r = session.post(
url,
data=monitor,
allow_redirects=False,
headers={
"Authorization": "Bearer " + access_token,
"Content-Type": monitor.content_type,
},
)
-
In this case, from metadata = { "title": filename }
, it supposes that url
is https://www.googleapis.com/upload/drive/v2/files?uploadType=multipart
. Please be careful about this.
-
When you want to use Drive API v3, please modify metadata = { "title": filename }
to metadata = { "name": filename }
, and use the endpoint of https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart
.
- When the file is uploaded with Drive API v3, the value of
{'kind': 'drive#file', 'id': '###', 'name': 'test.txt', 'mimeType': 'text/plain'}
is returned.
-
By the way, when an error like badContent
occurs in your testing, please try to test the following modification. When in the request body of multipart/form-data
the file content is put before the file metadata, it seems that an error occurs. I’m not sure whether this is the current specification. But, I didn’t know the order of request body is required to be checked.
-
From
files = {}
files["file"] = fp
files["data"] = ('metadata', json.dumps(metadata), 'application/json')
-
To
files = collections.OrderedDict(data=("metadata", json.dumps(metadata), "application/json"), file=fp)
Note:
-
I thought that in your script, an error might occur at file_size = os.path.getsize(filename)
. Please confirm this again.
-
When I tested your script by modifying the above modifications, I could confirm that a test file could be uploaded to Google Drive with the expected filename. In this case, I also modified it as follows.
files = collections.OrderedDict(data=("metadata", json.dumps(metadata), "application/json"), file=fp)
References:
Future readers can find below a complete script that also contains details on how to get access to the bearer token for HTTP authentication.
Most of the credit goes to the OP and answers to the OPs question.
"""
Goal: For one time upload of a large file (as the GDrive UI hangs up)
Step 1 - Create OAuth 2.0 Client ID + Client Secret
- by following the "Authentication" part of https://pythonhosted.org/PyDrive/quickstart.html
Step 2 - Get Access Token
- from the OAuth playground -> https://developers.google.com/oauthplayground/
--> Select Drive API v3 -> www.googleapis.com/auth/drive --> Click on "Authorize APIs"
--> Click on "Exchange authorization code for tokens" --> "Copy paste the access token"
--> Use it in the script below
Step 3 - Run file as daemon process
- nohup python -u upload_gdrive.py > upload_gdrive.log 2>&1 &
- tail -f upload_gdrive.log
"""
import sys
import json
import requests
from tqdm import tqdm
import requests_toolbelt # pip install requests_toolbelt
from requests.exceptions import JSONDecodeError
import collections
class ProgressBar(tqdm):
def update_to(self, n: int) -> None:
self.update(n - self.n)
def upload_file(access_token:str, filename:str, filepath:str):
metadata = {
"name": filename,
}
files = {}
session = requests.session()
with open(filepath, "rb") as fp:
files = collections.OrderedDict(data=("metadata", json.dumps(metadata), "application/json"), file=fp)
encoder = requests_toolbelt.MultipartEncoder(files)
with ProgressBar(
total=encoder.len,
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
file=sys.stdout,
) as bar:
monitor = requests_toolbelt.MultipartEncoderMonitor(
encoder, lambda monitor: bar.update_to(monitor.bytes_read)
)
r = session.post(
"https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart",
data=monitor,
allow_redirects=False,
headers={
"Authorization": "Bearer " + access_token
, "Content-Type": monitor.content_type
},
)
try:
resp = r.json()
print(resp)
except JSONDecodeError:
sys.exit(r.text)
upload_file("<access_token>"
, "<upload_filename>", "<path_to_file>")
This is my code for uploading to google drive with python requests using google-drive-api.
import sys
import json
import requests
from tqdm import tqdm
import requests_toolbelt
from requests.exceptions import JSONDecodeError
class ProgressBar(tqdm):
def update_to(self, n: int) -> None:
self.update(n - self.n)
def upload_file(access_token:str, filename:str, filedirectory:str):
metadata = {
"title": filename,
}
files = {}
session = requests.session()
with open(filedirectory, "rb") as fp:
files["file"] = fp
files["data"] = ('metadata', json.dumps(metadata), 'application/json')
encoder = requests_toolbelt.MultipartEncoder(files)
with ProgressBar(
total=encoder.len,
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
file=sys.stdout,
) as bar:
monitor = requests_toolbelt.MultipartEncoderMonitor(
encoder, lambda monitor: bar.update_to(monitor.bytes_read)
)
r = session.post(
"https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart",
data=monitor,
allow_redirects=False,
headers={"Authorization": "Bearer " + access_token},
)
try:
resp = r.json()
print(resp)
except JSONDecodeError:
sys.exit(r.text)
upload_file("access_token", "test.txt", "test.txt")
When i am trying send file with data attribute in post request then file name did not send and with files attribute in post request then requests-toolbelt not working. How to fix this error ?
Metadata needs to be sent in the post body as json.
data = Optional. A dictionary, list of tuples, bytes or a file object to send to the specified url
json = Optional. A JSON object to send to the specified url
metadata = {
"name": filename,
}
r = session.post(
url,
json=json.dumps(metadata),
allow_redirects=False,
headers={"Authorization": "Bearer " + access_token},
)
When I saw your script, I thought that the content type is not included in the request header. In this case, I think that the request body is directly shown in the uploaded file. I thought that this might be the reason for your current issue. In order to remove this issue, how about the following modification?
From:
r = session.post(
url,
data=monitor,
allow_redirects=False,
headers={"Authorization": "Bearer " + access_token},
)
To:
r = session.post(
url,
data=monitor,
allow_redirects=False,
headers={
"Authorization": "Bearer " + access_token,
"Content-Type": monitor.content_type,
},
)
-
In this case, from
metadata = { "title": filename }
, it supposes thaturl
ishttps://www.googleapis.com/upload/drive/v2/files?uploadType=multipart
. Please be careful about this. -
When you want to use Drive API v3, please modify
metadata = { "title": filename }
tometadata = { "name": filename }
, and use the endpoint ofhttps://www.googleapis.com/upload/drive/v3/files?uploadType=multipart
.- When the file is uploaded with Drive API v3, the value of
{'kind': 'drive#file', 'id': '###', 'name': 'test.txt', 'mimeType': 'text/plain'}
is returned.
- When the file is uploaded with Drive API v3, the value of
-
By the way, when an error like
badContent
occurs in your testing, please try to test the following modification. When in the request body ofmultipart/form-data
the file content is put before the file metadata, it seems that an error occurs. I’m not sure whether this is the current specification. But, I didn’t know the order of request body is required to be checked.-
From
files = {} files["file"] = fp files["data"] = ('metadata', json.dumps(metadata), 'application/json')
-
To
files = collections.OrderedDict(data=("metadata", json.dumps(metadata), "application/json"), file=fp)
-
Note:
-
I thought that in your script, an error might occur at
file_size = os.path.getsize(filename)
. Please confirm this again. -
When I tested your script by modifying the above modifications, I could confirm that a test file could be uploaded to Google Drive with the expected filename. In this case, I also modified it as follows.
files = collections.OrderedDict(data=("metadata", json.dumps(metadata), "application/json"), file=fp)
References:
Future readers can find below a complete script that also contains details on how to get access to the bearer token for HTTP authentication.
Most of the credit goes to the OP and answers to the OPs question.
"""
Goal: For one time upload of a large file (as the GDrive UI hangs up)
Step 1 - Create OAuth 2.0 Client ID + Client Secret
- by following the "Authentication" part of https://pythonhosted.org/PyDrive/quickstart.html
Step 2 - Get Access Token
- from the OAuth playground -> https://developers.google.com/oauthplayground/
--> Select Drive API v3 -> www.googleapis.com/auth/drive --> Click on "Authorize APIs"
--> Click on "Exchange authorization code for tokens" --> "Copy paste the access token"
--> Use it in the script below
Step 3 - Run file as daemon process
- nohup python -u upload_gdrive.py > upload_gdrive.log 2>&1 &
- tail -f upload_gdrive.log
"""
import sys
import json
import requests
from tqdm import tqdm
import requests_toolbelt # pip install requests_toolbelt
from requests.exceptions import JSONDecodeError
import collections
class ProgressBar(tqdm):
def update_to(self, n: int) -> None:
self.update(n - self.n)
def upload_file(access_token:str, filename:str, filepath:str):
metadata = {
"name": filename,
}
files = {}
session = requests.session()
with open(filepath, "rb") as fp:
files = collections.OrderedDict(data=("metadata", json.dumps(metadata), "application/json"), file=fp)
encoder = requests_toolbelt.MultipartEncoder(files)
with ProgressBar(
total=encoder.len,
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
file=sys.stdout,
) as bar:
monitor = requests_toolbelt.MultipartEncoderMonitor(
encoder, lambda monitor: bar.update_to(monitor.bytes_read)
)
r = session.post(
"https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart",
data=monitor,
allow_redirects=False,
headers={
"Authorization": "Bearer " + access_token
, "Content-Type": monitor.content_type
},
)
try:
resp = r.json()
print(resp)
except JSONDecodeError:
sys.exit(r.text)
upload_file("<access_token>"
, "<upload_filename>", "<path_to_file>")