dropbox API v2 upload large files using python
Question:
I’m trying to upload big file (~900MB) via Dropbox API v2 but I’m getting this error:
requests.exceptions.ConnectionError: (‘Connection aborted.’,
ConnectionResetError(104, ‘Connection reset by peer’))
It works ok with smaller files.
I found in documentation that I need to open upload session using files_upload_session_start
method but I have an error on this command and I can’t go further with ._append
methods.
How can I solve this problem? There’re no info in docs.
I’m using Python 3.5.1 and latest dropbox module installed using pip.
Here’s code which I’m running to:
c = Dropbox(access_token)
f = open("D:\Programs\ubuntu-13.10-desktop-amd64.iso", "rb")
result = c.files_upload_session_start(f)
f.seek(0, os.SEEK_END)
size = f.tell()
c.files_upload_session_finish(f, files.UploadSessionCursor(result.session_id, size), files.CommitInfo("/test900.iso"))
Answers:
For large files like this, you’ll need to use upload sessions. Otherwise, you’ll run in to issues like the error you posted.
This uses the Dropbox Python SDK to upload a file to the Dropbox API from the local file as specified by file_path
to the remote path as specified by dest_path
. It also chooses whether or not to use an upload session based on the size of the file:
import os
from tqdm import tqdm
import dropbox
def upload(
access_token,
file_path,
target_path,
timeout=900,
chunk_size=4 * 1024 * 1024,
):
dbx = dropbox.Dropbox(access_token, timeout=timeout)
with open(file_path, "rb") as f:
file_size = os.path.getsize(file_path)
if file_size <= chunk_size:
print(dbx.files_upload(f.read(), target_path))
else:
with tqdm(total=file_size, desc="Uploaded") as pbar:
upload_session_start_result = dbx.files_upload_session_start(
f.read(chunk_size)
)
pbar.update(chunk_size)
cursor = dropbox.files.UploadSessionCursor(
session_id=upload_session_start_result.session_id,
offset=f.tell(),
)
commit = dropbox.files.CommitInfo(path=target_path)
while f.tell() < file_size:
if (file_size - f.tell()) <= chunk_size:
print(
dbx.files_upload_session_finish(
f.read(chunk_size), cursor, commit
)
)
else:
dbx.files_upload_session_append(
f.read(chunk_size),
cursor.session_id,
cursor.offset,
)
cursor.offset = f.tell()
pbar.update(chunk_size)
@Greg answer can be updated with Dropbox Api v2 call:
self.client.files_upload_session_append_v2(
f.read(self.CHUNK_SIZE), cursor)
cursor.offset = f.tell()
Even though @Greg’s answer is very complete and the nicest solution (and most efficient), I would like to share this minimal implementation for those wanting to quickly learn:
def file_upload(dbx: dropbox.Dropbox, local_path: pathlib.Path, remote_path: str):
CHUNKSIZE = 100 * 1024 * 1024
upload_session_start_result = dbx.files_upload_session_start(b'')
cursor = dropbox.files.UploadSessionCursor(
session_id=upload_session_start_result.session_id,
offset=0
)
with local_path.open("rb") as f:
while True:
data = f.read(CHUNKSIZE)
if data == b"":
break
logger.debug("Pushing %d bytes", len(data))
dbx.files_upload_session_append_v2(data, cursor)
cursor.offset += len(data)
commit = dropbox.files.CommitInfo(path=remote_path)
dbx.files_upload_session_finish(b'', cursor, commit)
It will open a session without sending any data, then in the loop add data, and when there is no more data left, finish. It will do more calls than Greg’s answer (in exchange for more readable code).
In python 3.8+ you can use assignment expressions to make this code even nicer (I think):
def file_upload(dbx: dropbox.Dropbox, local_path: pathlib.Path, remote_path: str):
CHUNKSIZE = 1 * 1024
upload_session_start_result = dbx.files_upload_session_start(b'')
cursor = dropbox.files.UploadSessionCursor(
session_id=upload_session_start_result.session_id,
offset=0
)
with local_path.open("rb") as f:
while (data := f.read(CHUNKSIZE)) != b"":
logger.debug("Pushing %d bytes", len(data))
dbx.files_upload_session_append_v2(data, cursor)
cursor.offset += len(data)
commit = dropbox.files.CommitInfo(path=remote_path)
dbx.files_upload_session_finish(b'', cursor, commit)
I’m trying to upload big file (~900MB) via Dropbox API v2 but I’m getting this error:
requests.exceptions.ConnectionError: (‘Connection aborted.’,
ConnectionResetError(104, ‘Connection reset by peer’))
It works ok with smaller files.
I found in documentation that I need to open upload session using files_upload_session_start
method but I have an error on this command and I can’t go further with ._append
methods.
How can I solve this problem? There’re no info in docs.
I’m using Python 3.5.1 and latest dropbox module installed using pip.
Here’s code which I’m running to:
c = Dropbox(access_token)
f = open("D:\Programs\ubuntu-13.10-desktop-amd64.iso", "rb")
result = c.files_upload_session_start(f)
f.seek(0, os.SEEK_END)
size = f.tell()
c.files_upload_session_finish(f, files.UploadSessionCursor(result.session_id, size), files.CommitInfo("/test900.iso"))
For large files like this, you’ll need to use upload sessions. Otherwise, you’ll run in to issues like the error you posted.
This uses the Dropbox Python SDK to upload a file to the Dropbox API from the local file as specified by file_path
to the remote path as specified by dest_path
. It also chooses whether or not to use an upload session based on the size of the file:
import os
from tqdm import tqdm
import dropbox
def upload(
access_token,
file_path,
target_path,
timeout=900,
chunk_size=4 * 1024 * 1024,
):
dbx = dropbox.Dropbox(access_token, timeout=timeout)
with open(file_path, "rb") as f:
file_size = os.path.getsize(file_path)
if file_size <= chunk_size:
print(dbx.files_upload(f.read(), target_path))
else:
with tqdm(total=file_size, desc="Uploaded") as pbar:
upload_session_start_result = dbx.files_upload_session_start(
f.read(chunk_size)
)
pbar.update(chunk_size)
cursor = dropbox.files.UploadSessionCursor(
session_id=upload_session_start_result.session_id,
offset=f.tell(),
)
commit = dropbox.files.CommitInfo(path=target_path)
while f.tell() < file_size:
if (file_size - f.tell()) <= chunk_size:
print(
dbx.files_upload_session_finish(
f.read(chunk_size), cursor, commit
)
)
else:
dbx.files_upload_session_append(
f.read(chunk_size),
cursor.session_id,
cursor.offset,
)
cursor.offset = f.tell()
pbar.update(chunk_size)
@Greg answer can be updated with Dropbox Api v2 call:
self.client.files_upload_session_append_v2(
f.read(self.CHUNK_SIZE), cursor)
cursor.offset = f.tell()
Even though @Greg’s answer is very complete and the nicest solution (and most efficient), I would like to share this minimal implementation for those wanting to quickly learn:
def file_upload(dbx: dropbox.Dropbox, local_path: pathlib.Path, remote_path: str):
CHUNKSIZE = 100 * 1024 * 1024
upload_session_start_result = dbx.files_upload_session_start(b'')
cursor = dropbox.files.UploadSessionCursor(
session_id=upload_session_start_result.session_id,
offset=0
)
with local_path.open("rb") as f:
while True:
data = f.read(CHUNKSIZE)
if data == b"":
break
logger.debug("Pushing %d bytes", len(data))
dbx.files_upload_session_append_v2(data, cursor)
cursor.offset += len(data)
commit = dropbox.files.CommitInfo(path=remote_path)
dbx.files_upload_session_finish(b'', cursor, commit)
It will open a session without sending any data, then in the loop add data, and when there is no more data left, finish. It will do more calls than Greg’s answer (in exchange for more readable code).
In python 3.8+ you can use assignment expressions to make this code even nicer (I think):
def file_upload(dbx: dropbox.Dropbox, local_path: pathlib.Path, remote_path: str):
CHUNKSIZE = 1 * 1024
upload_session_start_result = dbx.files_upload_session_start(b'')
cursor = dropbox.files.UploadSessionCursor(
session_id=upload_session_start_result.session_id,
offset=0
)
with local_path.open("rb") as f:
while (data := f.read(CHUNKSIZE)) != b"":
logger.debug("Pushing %d bytes", len(data))
dbx.files_upload_session_append_v2(data, cursor)
cursor.offset += len(data)
commit = dropbox.files.CommitInfo(path=remote_path)
dbx.files_upload_session_finish(b'', cursor, commit)