Extract ZipFile using Python, display Progress Percentage?
Question:
I know how to extract a zip archive using Python, but how exactly do I display the progress of that extraction in a percentage?
Answers:
the extract method doesn’t provide a call back for this so one would have to use getinfo
to get the e uncompressed size and then open the file read from it in blocks and write it to the place you want the file to go and update the percentage one would also have to restore the mtime if that is wanted an example:
import zipfile
z = zipfile.ZipFile(some_source)
entry_info = z.getinfo(entry_name)
i = z.open(entry_name)
o = open(target_name, 'w')
offset = 0
while True:
b = i.read(block_size)
offset += len(b)
set_percentage(float(offset)/float(entry_info.file_size) * 100.)
if b == '':
break
o.write(b)
i.close()
o.close()
set_attributes_from(entry_info)
this extracts entry_name
to target_name
most of this is also done by shutil.copyfileobj
but it doesn’t have a call back for progress either
the source of the ZipFile.extract
method calls _extract_member
uses:
source = self.open(member, pwd=pwd)
target = file(targetpath, "wb")
shutil.copyfileobj(source, target)
source.close()
target.close()
where member has be converted from a name to a ZipInfo object by getinfo(member)
if it wasn’t a ZipInfo object
I suggest using tqdm
, you can install it using pip
like so:
pip install tqdm
Then, you can use it directly like so:
>>> from tqdm import tqdm
>>>
>>> with zipfile.ZipFile(some_source) as zf:
... for member in tqdm(zf.infolist(), desc='Extracting '):
... try:
... zf.extract(member, target_path)
... except zipfile.error as e:
... pass
This will produce something like so:
Extracting : 100%|██████████| 60.0k/60.0k [14:56<00:00, 66.9File/s]
Sorry a bit late seeing this. Had a similar problem, needing an equivalent to zipfile.Zipfile.extractall
. If you have tqdm>=4.40.0
(which I released over a year ago), then:
from os import fspath
from pathlib import Path
from shutil import copyfileobj
from zipfile import ZipFile
from tqdm.auto import tqdm # could use from tqdm.gui import tqdm
from tqdm.utils import CallbackIOWrapper
def extractall(fzip, dest, desc="Extracting"):
"""zipfile.Zipfile(fzip).extractall(dest) with progress"""
dest = Path(dest).expanduser()
with ZipFile(fzip) as zipf, tqdm(
desc=desc, unit="B", unit_scale=True, unit_divisor=1024,
total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
) as pbar:
for i in zipf.infolist():
if not getattr(i, "file_size", 0): # directory
zipf.extract(i, fspath(dest))
else:
with zipf.open(i) as fi, open(fspath(dest / i.filename), "wb") as fo:
copyfileobj(CallbackIOWrapper(pbar.update, fi), fo)
For the lazy, below is a self-contained working example based on Dan D’s answer. Tested on Python 3.10.6. Not optimized, but works.
In this example, the assumption is that the target "test" directory exists, but you can of course create it in the extract function.
The advantage of Dan’s answer over most of the answers I’ve seen for this topic is that showing progress each time a file from the archive is processed does not achieve the goal if the archive consists of very large files.
import zipfile
import os
from pathlib import Path
def extract(zip_path, target_path):
block_size = 8192
z = zipfile.ZipFile(zip_path)
for entry_name in z.namelist():
entry_info = z.getinfo(entry_name)
i = z.open(entry_name)
print(entry_name)
if entry_name[-1] != '/':
dir_name = os.path.dirname(entry_name)
p = Path(f"{target_path}/{dir_name}")
p.mkdir(parents=True, exist_ok=True)
o = open(f"{target_path}/{entry_name}", 'wb')
offset = 0
while True:
b = i.read(block_size)
offset += len(b)
print(float(offset)/float(entry_info.file_size) * 100.)
if b == b'':
break
o.write(b)
o.close()
i.close()
z.close()
extract("test.zip", "test")
import zipfile
srcZipFile = 'srcZipFile.zip'
distZipFile = 'distZipFile'
with zipfile.ZipFile(srcZipFile) as zf:
filesList = zf.namelist()
for idx, file in enumerate(filesList):
percent = round((idx / len(filesList))*100)
print(percent)
zf.extract(file, distZipFile)
zf.close()
I know how to extract a zip archive using Python, but how exactly do I display the progress of that extraction in a percentage?
the extract method doesn’t provide a call back for this so one would have to use getinfo
to get the e uncompressed size and then open the file read from it in blocks and write it to the place you want the file to go and update the percentage one would also have to restore the mtime if that is wanted an example:
import zipfile
z = zipfile.ZipFile(some_source)
entry_info = z.getinfo(entry_name)
i = z.open(entry_name)
o = open(target_name, 'w')
offset = 0
while True:
b = i.read(block_size)
offset += len(b)
set_percentage(float(offset)/float(entry_info.file_size) * 100.)
if b == '':
break
o.write(b)
i.close()
o.close()
set_attributes_from(entry_info)
this extracts entry_name
to target_name
most of this is also done by shutil.copyfileobj
but it doesn’t have a call back for progress either
the source of the ZipFile.extract
method calls _extract_member
uses:
source = self.open(member, pwd=pwd)
target = file(targetpath, "wb")
shutil.copyfileobj(source, target)
source.close()
target.close()
where member has be converted from a name to a ZipInfo object by getinfo(member)
if it wasn’t a ZipInfo object
I suggest using tqdm
, you can install it using pip
like so:
pip install tqdm
Then, you can use it directly like so:
>>> from tqdm import tqdm
>>>
>>> with zipfile.ZipFile(some_source) as zf:
... for member in tqdm(zf.infolist(), desc='Extracting '):
... try:
... zf.extract(member, target_path)
... except zipfile.error as e:
... pass
This will produce something like so:
Extracting : 100%|██████████| 60.0k/60.0k [14:56<00:00, 66.9File/s]
Sorry a bit late seeing this. Had a similar problem, needing an equivalent to zipfile.Zipfile.extractall
. If you have tqdm>=4.40.0
(which I released over a year ago), then:
from os import fspath
from pathlib import Path
from shutil import copyfileobj
from zipfile import ZipFile
from tqdm.auto import tqdm # could use from tqdm.gui import tqdm
from tqdm.utils import CallbackIOWrapper
def extractall(fzip, dest, desc="Extracting"):
"""zipfile.Zipfile(fzip).extractall(dest) with progress"""
dest = Path(dest).expanduser()
with ZipFile(fzip) as zipf, tqdm(
desc=desc, unit="B", unit_scale=True, unit_divisor=1024,
total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
) as pbar:
for i in zipf.infolist():
if not getattr(i, "file_size", 0): # directory
zipf.extract(i, fspath(dest))
else:
with zipf.open(i) as fi, open(fspath(dest / i.filename), "wb") as fo:
copyfileobj(CallbackIOWrapper(pbar.update, fi), fo)
For the lazy, below is a self-contained working example based on Dan D’s answer. Tested on Python 3.10.6. Not optimized, but works.
In this example, the assumption is that the target "test" directory exists, but you can of course create it in the extract function.
The advantage of Dan’s answer over most of the answers I’ve seen for this topic is that showing progress each time a file from the archive is processed does not achieve the goal if the archive consists of very large files.
import zipfile
import os
from pathlib import Path
def extract(zip_path, target_path):
block_size = 8192
z = zipfile.ZipFile(zip_path)
for entry_name in z.namelist():
entry_info = z.getinfo(entry_name)
i = z.open(entry_name)
print(entry_name)
if entry_name[-1] != '/':
dir_name = os.path.dirname(entry_name)
p = Path(f"{target_path}/{dir_name}")
p.mkdir(parents=True, exist_ok=True)
o = open(f"{target_path}/{entry_name}", 'wb')
offset = 0
while True:
b = i.read(block_size)
offset += len(b)
print(float(offset)/float(entry_info.file_size) * 100.)
if b == b'':
break
o.write(b)
o.close()
i.close()
z.close()
extract("test.zip", "test")
import zipfile
srcZipFile = 'srcZipFile.zip'
distZipFile = 'distZipFile'
with zipfile.ZipFile(srcZipFile) as zf:
filesList = zf.namelist()
for idx, file in enumerate(filesList):
percent = round((idx / len(filesList))*100)
print(percent)
zf.extract(file, distZipFile)
zf.close()