The fastest way to generate string of zeros
Question:
I need to generate some string of zeros for example:
import sys
MB = 1024 * 1024
cache = ''
while sys.getsizeof(cache) <= 10 * MB:
cache = cache + "0"
and save it to the file, but I have the impression that this method is too slow, and waste a lot of system resources.
What is the best way to do it as fast as possible?
Answers:
You can “multiply” a string:
cache = '0' * (1024**2)
To receive more than a million times 0
. Any other string or other integer as factor works as well.
There’s many ways you could do it. To figure out which is fastest, let’s just try a lot of ways and measure with timeit
.
Note that this code is a bit sloppy, and might not generate the correct length if the desired length is not a power of two. So you’ll need to neaten it up.
import timeit
from io import StringIO
target_size = 2**24
starting_char = '0'
num_iters = 1000
def single_char_multiplied():
return starting_char * target_size
def single_char_join():
return ''.join(starting_char for _ in range(target_size))
def single_char_power():
s = starting_char
while len(s) < target_size:
s *= 2
s = s[:target_size]
return s
def chunk_join(chunk_size):
chunk = starting_char * chunk_size
# this is not exact,
# need to tweak if target_size is not a power of two
num_chunks = target_size // chunk_size
return ''.join(chunk for _ in range(num_chunks))
def stringio_single_append():
with StringIO() as f:
for _ in range(target_size):
f.write(starting_char)
return f.getvalue()
def stringio_chunk_append(chunk_size):
chunk = starting_char * chunk_size
with StringIO() as f:
while f.tell() < target_size:
f.write(chunk)
return f.getvalue()
def stringio_doubling():
with StringIO(starting_char) as f:
while f.tell() < target_size:
f.write(f.getvalue())
return f.getvalue()
def dev_zero_single_read():
with open('/dev/zero', 'r') as f:
return f.read(target_size)
approaches = [
[single_char_multiplied, 'single_char_multiplied'],
[single_char_join, 'single_char_join'],
[single_char_power, 'single_char_power'],
[stringio_single_append, 'stringio_single_append'],
[stringio_doubling, 'stringio_doubling'],
[dev_zero_single_read, 'dev_zero_single_read'],
]
for chunk_size in [10, 100, 1000, 10000, 100000]:
approaches.append([lambda: chunk_join(chunk_size), f"chunk_join({chunk_size})"])
approaches.append([lambda: stringio_chunk_append(chunk_size), f"stringio_chunk_append({chunk_size})"])
for (i,approach) in enumerate(approaches):
result = timeit.timeit(approach[0], number=num_iters)
approach.append(result)
print(f"{i}/{len(approaches)}: {approach[1]}: {result}")
approaches.sort(key=lambda a: a[-1])
print("Sorted results:")
for func, func_name, result in approaches:
print(f"{func_name}: {result}")
(In hindsight, those chunks should probably be powers of 2.)
This gives:
single_char_multiplied: 1.4196025500000076
chunk_join(1000): 1.976723690999279
chunk_join(10000): 1.978875980000339
chunk_join(100000): 2.0014372969999386
chunk_join(10): 2.003043951000109
stringio_chunk_append(1000): 2.0336110369999005
stringio_chunk_append(100000): 2.038408315000197
stringio_chunk_append(10): 2.0456108839998706
chunk_join(100): 2.0504061949995958
stringio_chunk_append(100): 2.177647779999461
stringio_chunk_append(10000): 2.2308024960002513
single_char_power: 30.150350827999773
dev_zero_single_read: 32.01321319700037
stringio_doubling: 118.23563569500038
single_char_join: 2267.945749295
stringio_single_append: 3360.535176466
Surprisingly it looks like the fastest approach is the simplest approach. Just '0' * n
. As per @klaus-d ‘s answer.
I need to generate some string of zeros for example:
import sys
MB = 1024 * 1024
cache = ''
while sys.getsizeof(cache) <= 10 * MB:
cache = cache + "0"
and save it to the file, but I have the impression that this method is too slow, and waste a lot of system resources.
What is the best way to do it as fast as possible?
You can “multiply” a string:
cache = '0' * (1024**2)
To receive more than a million times 0
. Any other string or other integer as factor works as well.
There’s many ways you could do it. To figure out which is fastest, let’s just try a lot of ways and measure with timeit
.
Note that this code is a bit sloppy, and might not generate the correct length if the desired length is not a power of two. So you’ll need to neaten it up.
import timeit
from io import StringIO
target_size = 2**24
starting_char = '0'
num_iters = 1000
def single_char_multiplied():
return starting_char * target_size
def single_char_join():
return ''.join(starting_char for _ in range(target_size))
def single_char_power():
s = starting_char
while len(s) < target_size:
s *= 2
s = s[:target_size]
return s
def chunk_join(chunk_size):
chunk = starting_char * chunk_size
# this is not exact,
# need to tweak if target_size is not a power of two
num_chunks = target_size // chunk_size
return ''.join(chunk for _ in range(num_chunks))
def stringio_single_append():
with StringIO() as f:
for _ in range(target_size):
f.write(starting_char)
return f.getvalue()
def stringio_chunk_append(chunk_size):
chunk = starting_char * chunk_size
with StringIO() as f:
while f.tell() < target_size:
f.write(chunk)
return f.getvalue()
def stringio_doubling():
with StringIO(starting_char) as f:
while f.tell() < target_size:
f.write(f.getvalue())
return f.getvalue()
def dev_zero_single_read():
with open('/dev/zero', 'r') as f:
return f.read(target_size)
approaches = [
[single_char_multiplied, 'single_char_multiplied'],
[single_char_join, 'single_char_join'],
[single_char_power, 'single_char_power'],
[stringio_single_append, 'stringio_single_append'],
[stringio_doubling, 'stringio_doubling'],
[dev_zero_single_read, 'dev_zero_single_read'],
]
for chunk_size in [10, 100, 1000, 10000, 100000]:
approaches.append([lambda: chunk_join(chunk_size), f"chunk_join({chunk_size})"])
approaches.append([lambda: stringio_chunk_append(chunk_size), f"stringio_chunk_append({chunk_size})"])
for (i,approach) in enumerate(approaches):
result = timeit.timeit(approach[0], number=num_iters)
approach.append(result)
print(f"{i}/{len(approaches)}: {approach[1]}: {result}")
approaches.sort(key=lambda a: a[-1])
print("Sorted results:")
for func, func_name, result in approaches:
print(f"{func_name}: {result}")
(In hindsight, those chunks should probably be powers of 2.)
This gives:
single_char_multiplied: 1.4196025500000076
chunk_join(1000): 1.976723690999279
chunk_join(10000): 1.978875980000339
chunk_join(100000): 2.0014372969999386
chunk_join(10): 2.003043951000109
stringio_chunk_append(1000): 2.0336110369999005
stringio_chunk_append(100000): 2.038408315000197
stringio_chunk_append(10): 2.0456108839998706
chunk_join(100): 2.0504061949995958
stringio_chunk_append(100): 2.177647779999461
stringio_chunk_append(10000): 2.2308024960002513
single_char_power: 30.150350827999773
dev_zero_single_read: 32.01321319700037
stringio_doubling: 118.23563569500038
single_char_join: 2267.945749295
stringio_single_append: 3360.535176466
Surprisingly it looks like the fastest approach is the simplest approach. Just '0' * n
. As per @klaus-d ‘s answer.