How to Copy Files Fast
Question:
What is the fastest way to copy files in a python program?
It takes at least 3 times longer to copy files with shutil.copyfile()
versus to a regular right-click-copy > right-click-paste using Windows File Explorer or Mac’s Finder.
Is there any faster alternative to shutil.copyfile()
in Python? What could be done to speed up a file copying process? (The files destination is on the network drive… if it makes any difference…).
EDITED LATER:
Here is what I have ended up with:
def copyWithSubprocess(cmd):
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
win=mac=False
if sys.platform.startswith("darwin"):mac=True
elif sys.platform.startswith("win"):win=True
cmd=None
if mac: cmd=['cp', source, dest]
elif win: cmd=['xcopy', source, dest, '/K/O/X']
if cmd: copyWithSubprocess(cmd)
Answers:
this is just a guess but … your timing it wrong … that is when you copy the file it opens the file and reads it all into memory so that when you paste you only create a file and dump your memory contents
in python
copied_file = open("some_file").read()
is the equivelent of the ctrl + c copy
then
with open("new_file","wb") as f:
f.write(copied_file)
is the equivelent of the ctrl + v paste (so time that for equivelency ….)
if you want it to be more scalable to larger data (but its not going to be as fast as ctrl+v /ctrl+c
with open(infile,"rb") as fin,open(outfile,"wb") as fout:
fout.writelines(iter(fin.readline,''))
You could simply just use the OS you are doing the copy on, for Windows:
from subprocess import call
call(["xcopy", "c:\file.txt", "n:\folder\", "/K/O/X"])
/K – Copies attributes. Typically, Xcopy resets read-only attributes
/O – Copies file ownership and ACL information.
/X – Copies file audit settings (implies /O).
import sys
import subprocess
def copyWithSubprocess(cmd):
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
cmd=None
if sys.platform.startswith("darwin"): cmd=['cp', source, dest]
elif sys.platform.startswith("win"): cmd=['xcopy', source, dest, '/K/O/X']
if cmd: copyWithSubprocess(cmd)
The fastest version w/o overoptimizing the code I’ve got with the following code:
class CTError(Exception):
def __init__(self, errors):
self.errors = errors
try:
O_BINARY = os.O_BINARY
except:
O_BINARY = 0
READ_FLAGS = os.O_RDONLY | O_BINARY
WRITE_FLAGS = os.O_WRONLY | os.O_CREAT | os.O_TRUNC | O_BINARY
BUFFER_SIZE = 128*1024
def copyfile(src, dst):
try:
fin = os.open(src, READ_FLAGS)
stat = os.fstat(fin)
fout = os.open(dst, WRITE_FLAGS, stat.st_mode)
for x in iter(lambda: os.read(fin, BUFFER_SIZE), ""):
os.write(fout, x)
finally:
try: os.close(fin)
except: pass
try: os.close(fout)
except: pass
def copytree(src, dst, symlinks=False, ignore=[]):
names = os.listdir(src)
if not os.path.exists(dst):
os.makedirs(dst)
errors = []
for name in names:
if name in ignore:
continue
srcname = os.path.join(src, name)
dstname = os.path.join(dst, name)
try:
if symlinks and os.path.islink(srcname):
linkto = os.readlink(srcname)
os.symlink(linkto, dstname)
elif os.path.isdir(srcname):
copytree(srcname, dstname, symlinks, ignore)
else:
copyfile(srcname, dstname)
# XXX What about devices, sockets etc.?
except (IOError, os.error), why:
errors.append((srcname, dstname, str(why)))
except CTError, err:
errors.extend(err.errors)
if errors:
raise CTError(errors)
This code runs a little bit slower than native linux “cp -rf”.
Comparing to shutil the gain for the local storage to tmfps is around 2x-3x and around than 6x for NFS to local storage.
After profiling I’ve noticed that shutil.copy does lots of fstat syscals which are pretty heavyweight.
If one want to optimize further I would suggest to do a single fstat for src and reuse the values. Honestly I didn’t go further as I got almost the same figures as native linux copy tool and optimizing for several hundrends of milliseconds wasn’t my goal.
What is the fastest way to copy files in a python program?
It takes at least 3 times longer to copy files with shutil.copyfile()
versus to a regular right-click-copy > right-click-paste using Windows File Explorer or Mac’s Finder.
Is there any faster alternative to shutil.copyfile()
in Python? What could be done to speed up a file copying process? (The files destination is on the network drive… if it makes any difference…).
EDITED LATER:
Here is what I have ended up with:
def copyWithSubprocess(cmd):
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
win=mac=False
if sys.platform.startswith("darwin"):mac=True
elif sys.platform.startswith("win"):win=True
cmd=None
if mac: cmd=['cp', source, dest]
elif win: cmd=['xcopy', source, dest, '/K/O/X']
if cmd: copyWithSubprocess(cmd)
this is just a guess but … your timing it wrong … that is when you copy the file it opens the file and reads it all into memory so that when you paste you only create a file and dump your memory contents
in python
copied_file = open("some_file").read()
is the equivelent of the ctrl + c copy
then
with open("new_file","wb") as f:
f.write(copied_file)
is the equivelent of the ctrl + v paste (so time that for equivelency ….)
if you want it to be more scalable to larger data (but its not going to be as fast as ctrl+v /ctrl+c
with open(infile,"rb") as fin,open(outfile,"wb") as fout:
fout.writelines(iter(fin.readline,''))
You could simply just use the OS you are doing the copy on, for Windows:
from subprocess import call
call(["xcopy", "c:\file.txt", "n:\folder\", "/K/O/X"])
/K – Copies attributes. Typically, Xcopy resets read-only attributes
/O – Copies file ownership and ACL information.
/X – Copies file audit settings (implies /O).
import sys
import subprocess
def copyWithSubprocess(cmd):
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
cmd=None
if sys.platform.startswith("darwin"): cmd=['cp', source, dest]
elif sys.platform.startswith("win"): cmd=['xcopy', source, dest, '/K/O/X']
if cmd: copyWithSubprocess(cmd)
The fastest version w/o overoptimizing the code I’ve got with the following code:
class CTError(Exception):
def __init__(self, errors):
self.errors = errors
try:
O_BINARY = os.O_BINARY
except:
O_BINARY = 0
READ_FLAGS = os.O_RDONLY | O_BINARY
WRITE_FLAGS = os.O_WRONLY | os.O_CREAT | os.O_TRUNC | O_BINARY
BUFFER_SIZE = 128*1024
def copyfile(src, dst):
try:
fin = os.open(src, READ_FLAGS)
stat = os.fstat(fin)
fout = os.open(dst, WRITE_FLAGS, stat.st_mode)
for x in iter(lambda: os.read(fin, BUFFER_SIZE), ""):
os.write(fout, x)
finally:
try: os.close(fin)
except: pass
try: os.close(fout)
except: pass
def copytree(src, dst, symlinks=False, ignore=[]):
names = os.listdir(src)
if not os.path.exists(dst):
os.makedirs(dst)
errors = []
for name in names:
if name in ignore:
continue
srcname = os.path.join(src, name)
dstname = os.path.join(dst, name)
try:
if symlinks and os.path.islink(srcname):
linkto = os.readlink(srcname)
os.symlink(linkto, dstname)
elif os.path.isdir(srcname):
copytree(srcname, dstname, symlinks, ignore)
else:
copyfile(srcname, dstname)
# XXX What about devices, sockets etc.?
except (IOError, os.error), why:
errors.append((srcname, dstname, str(why)))
except CTError, err:
errors.extend(err.errors)
if errors:
raise CTError(errors)
This code runs a little bit slower than native linux “cp -rf”.
Comparing to shutil the gain for the local storage to tmfps is around 2x-3x and around than 6x for NFS to local storage.
After profiling I’ve noticed that shutil.copy does lots of fstat syscals which are pretty heavyweight.
If one want to optimize further I would suggest to do a single fstat for src and reuse the values. Honestly I didn’t go further as I got almost the same figures as native linux copy tool and optimizing for several hundrends of milliseconds wasn’t my goal.