zip file and avoid directory structure
Question:
I have a Python script that zips a file (new.txt
):
tofile = "/root/files/result/"+file
targetzipfile = new.zip # This is how I want my zip to look like
zf = zipfile.ZipFile(targetzipfile, mode='w')
try:
#adding to archive
zf.write(tofile)
finally:
zf.close()
When I do this I get the zip file. But when I try to unzip the file I get the text file inside of a series of directories corresponding to the path of the file i.e I see a folder called root
in the result
directory and more directories within it, i.e. I have
/root/files/result/new.zip
and when I unzip new.zip
I have a directory structure that looks like
/root/files/result/root/files/result/new.txt
Is there a way I can zip such that when I unzip I only get new.txt
?
In other words I have /root/files/result/new.zip
and when I unzip new.zip
, it should look like
/root/files/results/new.txt
Answers:
The zipfile.write()
method takes an optional arcname
argument that specifies what the name of the file should be inside the zipfile
I think you need to do a modification for the destination, otherwise it will duplicate the directory. Use :arcname
to avoid it. try like this:
import os
import zipfile
def zip(src, dst):
zf = zipfile.ZipFile("%s.zip" % (dst), "w", zipfile.ZIP_DEFLATED)
abs_src = os.path.abspath(src)
for dirname, subdirs, files in os.walk(src):
for filename in files:
absname = os.path.abspath(os.path.join(dirname, filename))
arcname = absname[len(abs_src) + 1:]
print 'zipping %s as %s' % (os.path.join(dirname, filename),
arcname)
zf.write(absname, arcname)
zf.close()
zip("src", "dst")
Check out the documentation for Zipfile.write.
ZipFile.write(filename[, arcname[, compress_type]]) Write the file
named filename to the archive, giving it the archive name arcname (by
default, this will be the same as filename, but without a drive letter
and with leading path separators removed)
https://docs.python.org/2/library/zipfile.html#zipfile.ZipFile.write
Try the following:
import zipfile
import os
filename = 'foo.txt'
# Using os.path.join is better than using '/' it is OS agnostic
path = os.path.join(os.path.sep, 'tmp', 'bar', 'baz', filename)
zip_filename = os.path.splitext(filename)[0] + '.zip'
zip_path = os.path.join(os.path.dirname(path), zip_filename)
# If you need exception handling wrap this in a try/except block
with zipfile.ZipFile(zip_path, 'w') as zf:
zf.write(path, zip_filename)
The bottom line is that if you do not supply an archive name then the filename is used as the archive name and it will contain the full path to the file.
To illustrate most clearly,
directory structure:
/Users
└── /user
. ├── /pixmaps
. │ ├── pixmap_00.raw
. │ ├── pixmap_01.raw
│ ├── /jpeg
│ │ ├── pixmap_00.jpg
│ │ └── pixmap_01.jpg
│ └── /png
│ ├── pixmap_00.png
│ └── pixmap_01.png
├── /docs
├── /programs
├── /misc
.
.
.
Directory of interest: /Users/user/pixmaps
First attemp
import os
import zipfile
TARGET_DIRECTORY = "/Users/user/pixmaps"
ZIPFILE_NAME = "CompressedDir.zip"
def zip_dir(directory, zipname):
"""
Compress a directory (ZIP file).
"""
if os.path.exists(directory):
outZipFile = zipfile.ZipFile(zipname, 'w', zipfile.ZIP_DEFLATED)
for dirpath, dirnames, filenames in os.walk(directory):
for filename in filenames:
filepath = os.path.join(dirpath, filename)
outZipFile.write(filepath)
outZipFile.close()
if __name__ == '__main__':
zip_dir(TARGET_DIRECTORY, ZIPFILE_NAME)
ZIP file structure:
CompressedDir.zip
.
└── /Users
└── /user
└── /pixmaps
├── pixmap_00.raw
├── pixmap_01.raw
├── /jpeg
│ ├── pixmap_00.jpg
│ └── pixmap_01.jpg
└── /png
├── pixmap_00.png
└── pixmap_01.png
Avoiding the full directory path
def zip_dir(directory, zipname):
"""
Compress a directory (ZIP file).
"""
if os.path.exists(directory):
outZipFile = zipfile.ZipFile(zipname, 'w', zipfile.ZIP_DEFLATED)
# The root directory within the ZIP file.
rootdir = os.path.basename(directory)
for dirpath, dirnames, filenames in os.walk(directory):
for filename in filenames:
# Write the file named filename to the archive,
# giving it the archive name 'arcname'.
filepath = os.path.join(dirpath, filename)
parentpath = os.path.relpath(filepath, directory)
arcname = os.path.join(rootdir, parentpath)
outZipFile.write(filepath, arcname)
outZipFile.close()
if __name__ == '__main__':
zip_dir(TARGET_DIRECTORY, ZIPFILE_NAME)
ZIP file structure:
CompressedDir.zip
.
└── /pixmaps
├── pixmap_00.raw
├── pixmap_01.raw
├── /jpeg
│ ├── pixmap_00.jpg
│ └── pixmap_01.jpg
└── /png
├── pixmap_00.png
└── pixmap_01.png
zf.write(tofile)
to change
zf.write(tofile, zipfile_dir)
for example
zf.write("/root/files/result/root/files/result/new.txt", "/root/files/results/new.txt")
You can isolate just the file name of your sources files using:
name_file_only= name_full_path.split(os.sep)[-1]
For example, if name_full_path
is /root/files/results/myfile.txt
, then name_file_only
will be myfile.txt
. To zip myfile.txt to the root of the archive zf
, you can then use:
zf.write(name_full_path, name_file_only)
I face the same problem and i solve it with writestr
. You can use it like this:
zipObject.writestr(<filename> , <file data, bytes or string>)
The arcname
parameter in the write method specifies what will be the name of the file inside the zipfile:
import os
import zipfile
# 1. Create a zip file which we will write files to
zip_file = "/home/username/test.zip"
zipf = zipfile.ZipFile(zip_file, 'w', zipfile.ZIP_DEFLATED)
# 2. Write files found in "/home/username/files/" to the test.zip
files_to_zip = "/home/username/files/"
for file_to_zip in os.listdir(files_to_zip):
file_to_zip_full_path = os.path.join(files_to_zip, file_to_zip)
# arcname argument specifies what will be the name of the file inside the zipfile
zipf.write(filename=file_to_zip_full_path, arcname=file_to_zip)
zipf.close()
We can use this
import os
# single File
os.system(f"cd {destinationFolder} && zip fname.zip fname")
# directory
os.system(f"cd {destinationFolder} && zip -r folder.zip folder")
For me, This is working.
If you want an elegant way to do it with pathlib
you can use it this way:
from pathlib import Path
import zipfile
def zip_dir(path_to_zip: Path):
zip_file = Path(path_to_zip).with_suffix('.zip')
z = zipfile.ZipFile(zip_file, 'w', zipfile.ZIP_DEFLATED)
for f in list(path_to_zip.rglob('*.*')):
z.write(f, arcname=f.relative_to(path_to_zip))
It is much simpler than expected, I configured the module using the parameter "arcname" as "file_to_be_zipped.txt", so the folders do not appear in my final zipped file:
mmpk_zip_file = zipfile.ZipFile("c:\Destination_folder_namenewzippedfilename.zip", mode='w', compression=zipfile.ZIP_DEFLATED)
mmpk_zip_file.write("c:\Source_folder_namefile_to_be_zipped.txt", "file_to_be_zipped.txt")
mmpk_zip_file.close()
To get rid of the absolute path, I came up with this:
def create_zip(root_path, file_name, ignored=[], storage_path=None):
"""Create a ZIP
This function creates a ZIP file of the provided root path.
Args:
root_path (str): Root path to start from when picking files and directories.
file_name (str): File name to save the created ZIP file as.
ignored (list): A list of files and/or directories that you want to ignore. This
selection is applied in root directory only.
storage_path: If provided, ZIP file will be placed in this location. If None, the
ZIP will be created in root_path
"""
if storage_path is not None:
zip_root = os.path.join(storage_path, file_name)
else:
zip_root = os.path.join(root_path, file_name)
zipf = zipfile.ZipFile(zip_root, 'w', zipfile.ZIP_DEFLATED)
def iter_subtree(path, layer=0):
# iter the directory
path = Path(path)
for p in path.iterdir():
if layer == 0 and p.name in ignored:
continue
zipf.write(p, str(p).replace(root_path, '').lstrip('/'))
if p.is_dir():
iter_subtree(p, layer=layer+1)
iter_subtree(root_path)
zipf.close()
Maybe it isn’t the most elegant solution, but this works. If we just use p.name
when providing the file name to write()
method, then it doesn’t create the proper directory structure.
Moreover, if it’s needed to ignore the selected directories or files from the root path, this ignores those selections too.
Specify the arcname input of the write method as following:
tofile = "/root/files/result/"+file
NewRoot = "files/result/"
zf.write(tofile, arcname=tofile.split(NewRoot)[1])
More info:
ZipFile.write(filename, arcname=None, compress_type=None,
compresslevel=None)
https://docs.python.org/3/library/zipfile.html
This is an example I used. I have one excel file, Treport where I am using python + pandas in my dowork function to create pivot tables, etc. for each of the companies in CompanyNames. I create a zip file of the csv and a non-zip file so I can check as well.
The writer specifies the path where I want my .xlsx to go and for my zip files, I specify that in the zip.write(). I just specify the name of the xlsx file that was recently created, and that is what gets zipped up, not the whole directory. Beforehand I was just specifying ‘writer’ and would zip up the whole directory. This allows me to zip up just the recently created excel file.
Treport = 'TestReportData.csv'
CompanyNames = ['Company1','Company2','Company3']
for CompName in CompanyNames:
strcomp = str(CompName)
#Writer Creates pathway to output report to. Each company gets unique file.
writer = pd.ExcelWriter(f"C:\Users\MyUser\Documents\{strcomp}addReview.xlsx", engine='xlsxwriter')
DoWorkFunction(CompName, Treport, writer)
writer.save()
with ZipFile(f"C:\Users\MyUser\Documents\{strcomp}addR.zip", 'w') as zip:
zip.write(writer, f"{strcomp}addReview.xlsx")
I have a Python script that zips a file (new.txt
):
tofile = "/root/files/result/"+file
targetzipfile = new.zip # This is how I want my zip to look like
zf = zipfile.ZipFile(targetzipfile, mode='w')
try:
#adding to archive
zf.write(tofile)
finally:
zf.close()
When I do this I get the zip file. But when I try to unzip the file I get the text file inside of a series of directories corresponding to the path of the file i.e I see a folder called root
in the result
directory and more directories within it, i.e. I have
/root/files/result/new.zip
and when I unzip new.zip
I have a directory structure that looks like
/root/files/result/root/files/result/new.txt
Is there a way I can zip such that when I unzip I only get new.txt
?
In other words I have /root/files/result/new.zip
and when I unzip new.zip
, it should look like
/root/files/results/new.txt
The zipfile.write()
method takes an optional arcname
argument that specifies what the name of the file should be inside the zipfile
I think you need to do a modification for the destination, otherwise it will duplicate the directory. Use :arcname
to avoid it. try like this:
import os
import zipfile
def zip(src, dst):
zf = zipfile.ZipFile("%s.zip" % (dst), "w", zipfile.ZIP_DEFLATED)
abs_src = os.path.abspath(src)
for dirname, subdirs, files in os.walk(src):
for filename in files:
absname = os.path.abspath(os.path.join(dirname, filename))
arcname = absname[len(abs_src) + 1:]
print 'zipping %s as %s' % (os.path.join(dirname, filename),
arcname)
zf.write(absname, arcname)
zf.close()
zip("src", "dst")
Check out the documentation for Zipfile.write.
ZipFile.write(filename[, arcname[, compress_type]]) Write the file
named filename to the archive, giving it the archive name arcname (by
default, this will be the same as filename, but without a drive letter
and with leading path separators removed)
https://docs.python.org/2/library/zipfile.html#zipfile.ZipFile.write
Try the following:
import zipfile
import os
filename = 'foo.txt'
# Using os.path.join is better than using '/' it is OS agnostic
path = os.path.join(os.path.sep, 'tmp', 'bar', 'baz', filename)
zip_filename = os.path.splitext(filename)[0] + '.zip'
zip_path = os.path.join(os.path.dirname(path), zip_filename)
# If you need exception handling wrap this in a try/except block
with zipfile.ZipFile(zip_path, 'w') as zf:
zf.write(path, zip_filename)
The bottom line is that if you do not supply an archive name then the filename is used as the archive name and it will contain the full path to the file.
To illustrate most clearly,
directory structure:
/Users
└── /user
. ├── /pixmaps
. │ ├── pixmap_00.raw
. │ ├── pixmap_01.raw
│ ├── /jpeg
│ │ ├── pixmap_00.jpg
│ │ └── pixmap_01.jpg
│ └── /png
│ ├── pixmap_00.png
│ └── pixmap_01.png
├── /docs
├── /programs
├── /misc
.
.
.
Directory of interest: /Users/user/pixmaps
First attemp
import os
import zipfile
TARGET_DIRECTORY = "/Users/user/pixmaps"
ZIPFILE_NAME = "CompressedDir.zip"
def zip_dir(directory, zipname):
"""
Compress a directory (ZIP file).
"""
if os.path.exists(directory):
outZipFile = zipfile.ZipFile(zipname, 'w', zipfile.ZIP_DEFLATED)
for dirpath, dirnames, filenames in os.walk(directory):
for filename in filenames:
filepath = os.path.join(dirpath, filename)
outZipFile.write(filepath)
outZipFile.close()
if __name__ == '__main__':
zip_dir(TARGET_DIRECTORY, ZIPFILE_NAME)
ZIP file structure:
CompressedDir.zip
.
└── /Users
└── /user
└── /pixmaps
├── pixmap_00.raw
├── pixmap_01.raw
├── /jpeg
│ ├── pixmap_00.jpg
│ └── pixmap_01.jpg
└── /png
├── pixmap_00.png
└── pixmap_01.png
Avoiding the full directory path
def zip_dir(directory, zipname):
"""
Compress a directory (ZIP file).
"""
if os.path.exists(directory):
outZipFile = zipfile.ZipFile(zipname, 'w', zipfile.ZIP_DEFLATED)
# The root directory within the ZIP file.
rootdir = os.path.basename(directory)
for dirpath, dirnames, filenames in os.walk(directory):
for filename in filenames:
# Write the file named filename to the archive,
# giving it the archive name 'arcname'.
filepath = os.path.join(dirpath, filename)
parentpath = os.path.relpath(filepath, directory)
arcname = os.path.join(rootdir, parentpath)
outZipFile.write(filepath, arcname)
outZipFile.close()
if __name__ == '__main__':
zip_dir(TARGET_DIRECTORY, ZIPFILE_NAME)
ZIP file structure:
CompressedDir.zip
.
└── /pixmaps
├── pixmap_00.raw
├── pixmap_01.raw
├── /jpeg
│ ├── pixmap_00.jpg
│ └── pixmap_01.jpg
└── /png
├── pixmap_00.png
└── pixmap_01.png
zf.write(tofile)
to change
zf.write(tofile, zipfile_dir)
for example
zf.write("/root/files/result/root/files/result/new.txt", "/root/files/results/new.txt")
You can isolate just the file name of your sources files using:
name_file_only= name_full_path.split(os.sep)[-1]
For example, if name_full_path
is /root/files/results/myfile.txt
, then name_file_only
will be myfile.txt
. To zip myfile.txt to the root of the archive zf
, you can then use:
zf.write(name_full_path, name_file_only)
I face the same problem and i solve it with writestr
. You can use it like this:
zipObject.writestr(<filename> , <file data, bytes or string>)
The arcname
parameter in the write method specifies what will be the name of the file inside the zipfile:
import os
import zipfile
# 1. Create a zip file which we will write files to
zip_file = "/home/username/test.zip"
zipf = zipfile.ZipFile(zip_file, 'w', zipfile.ZIP_DEFLATED)
# 2. Write files found in "/home/username/files/" to the test.zip
files_to_zip = "/home/username/files/"
for file_to_zip in os.listdir(files_to_zip):
file_to_zip_full_path = os.path.join(files_to_zip, file_to_zip)
# arcname argument specifies what will be the name of the file inside the zipfile
zipf.write(filename=file_to_zip_full_path, arcname=file_to_zip)
zipf.close()
We can use this
import os
# single File
os.system(f"cd {destinationFolder} && zip fname.zip fname")
# directory
os.system(f"cd {destinationFolder} && zip -r folder.zip folder")
For me, This is working.
If you want an elegant way to do it with pathlib
you can use it this way:
from pathlib import Path
import zipfile
def zip_dir(path_to_zip: Path):
zip_file = Path(path_to_zip).with_suffix('.zip')
z = zipfile.ZipFile(zip_file, 'w', zipfile.ZIP_DEFLATED)
for f in list(path_to_zip.rglob('*.*')):
z.write(f, arcname=f.relative_to(path_to_zip))
It is much simpler than expected, I configured the module using the parameter "arcname" as "file_to_be_zipped.txt", so the folders do not appear in my final zipped file:
mmpk_zip_file = zipfile.ZipFile("c:\Destination_folder_namenewzippedfilename.zip", mode='w', compression=zipfile.ZIP_DEFLATED)
mmpk_zip_file.write("c:\Source_folder_namefile_to_be_zipped.txt", "file_to_be_zipped.txt")
mmpk_zip_file.close()
To get rid of the absolute path, I came up with this:
def create_zip(root_path, file_name, ignored=[], storage_path=None):
"""Create a ZIP
This function creates a ZIP file of the provided root path.
Args:
root_path (str): Root path to start from when picking files and directories.
file_name (str): File name to save the created ZIP file as.
ignored (list): A list of files and/or directories that you want to ignore. This
selection is applied in root directory only.
storage_path: If provided, ZIP file will be placed in this location. If None, the
ZIP will be created in root_path
"""
if storage_path is not None:
zip_root = os.path.join(storage_path, file_name)
else:
zip_root = os.path.join(root_path, file_name)
zipf = zipfile.ZipFile(zip_root, 'w', zipfile.ZIP_DEFLATED)
def iter_subtree(path, layer=0):
# iter the directory
path = Path(path)
for p in path.iterdir():
if layer == 0 and p.name in ignored:
continue
zipf.write(p, str(p).replace(root_path, '').lstrip('/'))
if p.is_dir():
iter_subtree(p, layer=layer+1)
iter_subtree(root_path)
zipf.close()
Maybe it isn’t the most elegant solution, but this works. If we just use p.name
when providing the file name to write()
method, then it doesn’t create the proper directory structure.
Moreover, if it’s needed to ignore the selected directories or files from the root path, this ignores those selections too.
Specify the arcname input of the write method as following:
tofile = "/root/files/result/"+file
NewRoot = "files/result/"
zf.write(tofile, arcname=tofile.split(NewRoot)[1])
More info:
ZipFile.write(filename, arcname=None, compress_type=None,
compresslevel=None)
https://docs.python.org/3/library/zipfile.html
This is an example I used. I have one excel file, Treport where I am using python + pandas in my dowork function to create pivot tables, etc. for each of the companies in CompanyNames. I create a zip file of the csv and a non-zip file so I can check as well.
The writer specifies the path where I want my .xlsx to go and for my zip files, I specify that in the zip.write(). I just specify the name of the xlsx file that was recently created, and that is what gets zipped up, not the whole directory. Beforehand I was just specifying ‘writer’ and would zip up the whole directory. This allows me to zip up just the recently created excel file.
Treport = 'TestReportData.csv'
CompanyNames = ['Company1','Company2','Company3']
for CompName in CompanyNames:
strcomp = str(CompName)
#Writer Creates pathway to output report to. Each company gets unique file.
writer = pd.ExcelWriter(f"C:\Users\MyUser\Documents\{strcomp}addReview.xlsx", engine='xlsxwriter')
DoWorkFunction(CompName, Treport, writer)
writer.save()
with ZipFile(f"C:\Users\MyUser\Documents\{strcomp}addR.zip", 'w') as zip:
zip.write(writer, f"{strcomp}addReview.xlsx")