Python dictionary deepcopy
Question:
I was wondering in how does exactly deepcopy work in the following context:
from copy import deepcopy
def copyExample:
self.myDict = {}
firstPosition = "First"
firstPositionContent = ["first", "primero"]
secondPosition = "Second"
secondPositionContent = ["second"]
self.myDict[firstPosition] = firstPositionContent
self.myDict[secondPosition] = secondPositionContent
return deepcopy(self.myDict)
def addExample(self):
copy = self.copyExample()
copy["Second"].add("segundo")
Does it return the reference to the lists I have in the dictionary? Or does it work as I expect and copy every list in a new list with a different reference?
I know what a deep copy is (so there is no need to explain the difference between deep and shallow) but I am wondering if it works as I expect it to do and therefore do not change the instance variable when I use addExample()
.
Answers:
The documentation makes it pretty clear that you’re getting new copies, not references. Deepcopy creates deep copies for built in types, with various exceptions and that you can add custom copy operations to your user-defined objects to get deep copy support for them as well. If you’re not sure, well that’s what unit testing is for.
I know it isn’t answering your question but I think it’s noteworthy for people looking at this question.
If the data you’re copying is simple in nature deepcopy might be overkill. With simple in nature I mean if your data is representable as Json. Let me illustrate with code:
I’ve used http://www.json-generator.com/ to get some sample json data.
def deepCopyList(inp):
for vl in inp:
if isinstance(vl, list):
yield list(deepCopyList(vl))
elif isinstance(vl, dict):
yield deepCopyDict(vl)
else:
yield vl
def deepCopyDict(inp):
outp = inp.copy()
for ky, vl in outp.iteritems():
if isinstance(vl, dict):
outp[ky] = deepCopyDict(vl)
elif isinstance(vl, list):
outp[ky] = list(deepCopyList(vl))
return outp
def simpleDeepCopy(inp):
if isinstance(inp, dict):
return deepCopyDict(inp)
elif isinstance(inp, list):
return deepCopyList(inp)
else:
return inp
if __name__ == '__main__':
import simplejson as json
import time
from copy import deepcopy
fl = open('sample.json', 'r')
sample = json.load(fl)
start = time.time()
for _ in xrange(10000):
tmp = simpleDeepCopy(sample)
end = time.time()
print 'simpleDeepCopy: ' + str(end - start)
start = time.time()
for _ in xrange(10000):
tmp = deepcopy(sample)
end = time.time()
print 'copy.deepcopy: ' + str(end - start)
output:
simpleDeepCopy: 0.0132050514221
copy.deepcopy: 2.66142916679
simpleDeepCopy: 0.0128579139709
copy.deepcopy: 2.60736298561
I was wondering in how does exactly deepcopy work in the following context:
from copy import deepcopy
def copyExample:
self.myDict = {}
firstPosition = "First"
firstPositionContent = ["first", "primero"]
secondPosition = "Second"
secondPositionContent = ["second"]
self.myDict[firstPosition] = firstPositionContent
self.myDict[secondPosition] = secondPositionContent
return deepcopy(self.myDict)
def addExample(self):
copy = self.copyExample()
copy["Second"].add("segundo")
Does it return the reference to the lists I have in the dictionary? Or does it work as I expect and copy every list in a new list with a different reference?
I know what a deep copy is (so there is no need to explain the difference between deep and shallow) but I am wondering if it works as I expect it to do and therefore do not change the instance variable when I use addExample()
.
The documentation makes it pretty clear that you’re getting new copies, not references. Deepcopy creates deep copies for built in types, with various exceptions and that you can add custom copy operations to your user-defined objects to get deep copy support for them as well. If you’re not sure, well that’s what unit testing is for.
I know it isn’t answering your question but I think it’s noteworthy for people looking at this question.
If the data you’re copying is simple in nature deepcopy might be overkill. With simple in nature I mean if your data is representable as Json. Let me illustrate with code:
I’ve used http://www.json-generator.com/ to get some sample json data.
def deepCopyList(inp):
for vl in inp:
if isinstance(vl, list):
yield list(deepCopyList(vl))
elif isinstance(vl, dict):
yield deepCopyDict(vl)
else:
yield vl
def deepCopyDict(inp):
outp = inp.copy()
for ky, vl in outp.iteritems():
if isinstance(vl, dict):
outp[ky] = deepCopyDict(vl)
elif isinstance(vl, list):
outp[ky] = list(deepCopyList(vl))
return outp
def simpleDeepCopy(inp):
if isinstance(inp, dict):
return deepCopyDict(inp)
elif isinstance(inp, list):
return deepCopyList(inp)
else:
return inp
if __name__ == '__main__':
import simplejson as json
import time
from copy import deepcopy
fl = open('sample.json', 'r')
sample = json.load(fl)
start = time.time()
for _ in xrange(10000):
tmp = simpleDeepCopy(sample)
end = time.time()
print 'simpleDeepCopy: ' + str(end - start)
start = time.time()
for _ in xrange(10000):
tmp = deepcopy(sample)
end = time.time()
print 'copy.deepcopy: ' + str(end - start)
output:
simpleDeepCopy: 0.0132050514221
copy.deepcopy: 2.66142916679
simpleDeepCopy: 0.0128579139709
copy.deepcopy: 2.60736298561