Extract ZipFile using Python, display Progress Percentage?

Question:

I know how to extract a zip archive using Python, but how exactly do I display the progress of that extraction in a percentage?

Asked By: Zac Brown

||

Answers:

the extract method doesn’t provide a call back for this so one would have to use getinfo to get the e uncompressed size and then open the file read from it in blocks and write it to the place you want the file to go and update the percentage one would also have to restore the mtime if that is wanted an example:

import zipfile
z = zipfile.ZipFile(some_source)
entry_info = z.getinfo(entry_name)
i = z.open(entry_name)
o = open(target_name, 'w')
offset = 0
while True:
    b = i.read(block_size)
    offset += len(b)
    set_percentage(float(offset)/float(entry_info.file_size) * 100.)
    if b == '':
        break
    o.write(b)
i.close()
o.close()
set_attributes_from(entry_info)

this extracts entry_name to target_name


most of this is also done by shutil.copyfileobj but it doesn’t have a call back for progress either

the source of the ZipFile.extract method calls _extract_member uses:

source = self.open(member, pwd=pwd)
target = file(targetpath, "wb")
shutil.copyfileobj(source, target)
source.close()
target.close()

where member has be converted from a name to a ZipInfo object by getinfo(member) if it wasn’t a ZipInfo object

Answered By: Dan D.

I suggest using tqdm, you can install it using pip like so:

pip install tqdm

Then, you can use it directly like so:

>>> from tqdm import tqdm
>>>
>>> with zipfile.ZipFile(some_source) as zf:
...     for member in tqdm(zf.infolist(), desc='Extracting '):
...         try:
...             zf.extract(member, target_path)
...         except zipfile.error as e:
...             pass

This will produce something like so:

Extracting : 100%|██████████| 60.0k/60.0k [14:56<00:00, 66.9File/s]
Answered By: Anwarvic

Sorry a bit late seeing this. Had a similar problem, needing an equivalent to zipfile.Zipfile.extractall. If you have tqdm>=4.40.0 (which I released over a year ago), then:

from os import fspath
from pathlib import Path
from shutil import copyfileobj
from zipfile import ZipFile
from tqdm.auto import tqdm  # could use from tqdm.gui import tqdm
from tqdm.utils import CallbackIOWrapper

def extractall(fzip, dest, desc="Extracting"):
    """zipfile.Zipfile(fzip).extractall(dest) with progress"""
    dest = Path(dest).expanduser()
    with ZipFile(fzip) as zipf, tqdm(
        desc=desc, unit="B", unit_scale=True, unit_divisor=1024,
        total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()),
    ) as pbar:
        for i in zipf.infolist():
            if not getattr(i, "file_size", 0):  # directory
                zipf.extract(i, fspath(dest))
            else:
                with zipf.open(i) as fi, open(fspath(dest / i.filename), "wb") as fo:
                    copyfileobj(CallbackIOWrapper(pbar.update, fi), fo)
Answered By: casper.dcl

For the lazy, below is a self-contained working example based on Dan D’s answer. Tested on Python 3.10.6. Not optimized, but works.

In this example, the assumption is that the target "test" directory exists, but you can of course create it in the extract function.

The advantage of Dan’s answer over most of the answers I’ve seen for this topic is that showing progress each time a file from the archive is processed does not achieve the goal if the archive consists of very large files.

import zipfile
import os
from pathlib import Path

def extract(zip_path, target_path):
    block_size = 8192
    z = zipfile.ZipFile(zip_path)
    for entry_name in z.namelist():
        entry_info = z.getinfo(entry_name)
        i = z.open(entry_name)
        print(entry_name)
        if entry_name[-1] != '/':
            dir_name = os.path.dirname(entry_name)
            p = Path(f"{target_path}/{dir_name}")
            p.mkdir(parents=True, exist_ok=True)
            o = open(f"{target_path}/{entry_name}", 'wb')
            offset = 0
            while True:
                b = i.read(block_size)
                offset += len(b)
                print(float(offset)/float(entry_info.file_size) * 100.)
                if b == b'':
                    break
                o.write(b)
            o.close()
        i.close()
    z.close()

extract("test.zip", "test")
Answered By: Amnon
import zipfile
srcZipFile = 'srcZipFile.zip'
distZipFile = 'distZipFile'
with zipfile.ZipFile(srcZipFile) as zf:
    filesList = zf.namelist()
    for idx, file in enumerate(filesList):
        percent = round((idx / len(filesList))*100)
        print(percent)
        zf.extract(file, distZipFile)
    zf.close()
Answered By: Mohamed Omar