Is there a fast way to shuffle numpy image in segments?
Question:
I want to write a function that can take small images and return a permutation of them, block-wise.
Basically I want to turn this:
Into this:
There was an excellent answer in Is there a function in Python that shuffle data by data blocks? that helped me write a solution. However for ~50,000 28×28 images this takes a long time to run.
# blocks of 7x7 shuffling
range1 = np.arange(4)
range2 = np.arange(4)
block_size = int(28 / 4)
print([[x[i*block_size:(i+1)*block_size].shape] for i in range1])
for x in x1:
np.random.shuffle(range1)
x[:] = np.block([[x[i*block_size:(i+1)*block_size]] for i in range1])
for a in x:
np.random.shuffle(range2)
a[:] = np.block([a[i*block_size:(i+1)*block_size] for i in range2])
print("x1", time.time() - begin)
begin = time.time()
Answers:
I already found a solution that runs much faster. I feel silly because I didn’t really need a double for loop, just two separate shuffle indexes. Leaving this solution here in case anyone wants to shuffle an image block-wise in numpy.
If anyone comes up with another good solution, let me know.
# blocks of 7x7 shuffling
range1 = np.arange(4)
range2 = np.arange(4)
block_size = int(28 / 4)
for x in x1:
np.random.shuffle(range1)
np.random.shuffle(range2)
x[:] = np.block([[x[i*block_size:(i+1)*block_size]] for i in range1])
x[:] = np.block([x[:,i*block_size:(i+1)*block_size] for i in range2])
Here’s one approach based on this post
–
def randomize_tiles_3D(x1, H, W):
# W,H are width and height of blocks
m,n,p = x1.shape
l1,l2 = n//H,p//W
combs = np.random.rand(m,l1*l2).argsort(axis=1)
r,c = np.unravel_index(combs,(l1,l2))
x1cr = x1.reshape(-1,l1,H,l2,W)
out = x1cr[np.arange(m)[:,None],r,:,c]
return out.reshape(-1,l1,l2,H,W).swapaxes(2,3).reshape(-1,n,p)
Sample run –
In [46]: x1
Out[46]:
array([[[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]],
[[36, 37, 38, 39, 40, 41],
[42, 43, 44, 45, 46, 47],
[48, 49, 50, 51, 52, 53],
[54, 55, 56, 57, 58, 59],
[60, 61, 62, 63, 64, 65],
[66, 67, 68, 69, 70, 71]]])
In [47]: np.random.seed(0)
In [48]: randomize_tiles_3D(x1, H=3, W=3)
Out[48]:
array([[[21, 22, 23, 0, 1, 2],
[27, 28, 29, 6, 7, 8],
[33, 34, 35, 12, 13, 14],
[18, 19, 20, 3, 4, 5],
[24, 25, 26, 9, 10, 11],
[30, 31, 32, 15, 16, 17]],
[[36, 37, 38, 54, 55, 56],
[42, 43, 44, 60, 61, 62],
[48, 49, 50, 66, 67, 68],
[39, 40, 41, 57, 58, 59],
[45, 46, 47, 63, 64, 65],
[51, 52, 53, 69, 70, 71]]])
It will be more efficient to use numpy.lib.stride_tricks.as_strided
to break 2D matrices into blocks.
import numpy as np
img_width, block_width = 12, 3
n = img_width // block_width
a = np.arange(img_width * img_width).reshape(img_width, img_width)
print(a)
blocks = np.lib.stride_tricks.as_strided(a,
shape=(n, n, block_width, block_width),
strides=(a.itemsize * np.array([n * block_width ** 2, block_width, n * block_width, 1])))
print(blocks)
blocks = blocks.reshape((n * n, block_width, block_width)) # flatten for better shuffle
np.random.shuffle(blocks)
print(blocks)
blocks = np.lib.stride_tricks.as_strided(blocks,
shape=(n, block_width, n, block_width),
strides=(a.itemsize * np.array([n * block_width ** 2, block_width, block_width ** 2, 1])))
shuffled = np.reshape(blocks, (img_width, img_width))
print(shuffled)
Output can be found here: blocks_shuffle_example.ipynb
Document: numpy.lib.stride_tricks.as_strided
Here’s one approach:
Assume that the original image has shape (m, n), and each block has shape (w, h).
import numpy as np
# split image into tiles of w*h blocks with shape = ((m * n) / (w * h), w, h)
tiles = np.array([img_pad[x : x+w, y : y+h] for x in range(0, m, w) for y in range(0, n, h)])
np.random.shuffle(tiles)
# merge back to shape = (m, n)
mb, nb = m // w, n // h
res = np.vstack(np.hstack(tiles[i*nb : (i+1)*nb]) for i in range(mb))
Update:
res = np.vstack(np.hstack(tiles[i*nb : (i+1)*nb]) for i in range(mb))
may cause "FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future." while running.
Use
res = np.block([[np.hstack(tiles[i*nb : (i+1)*nb])] for i in range(mb)])
instead and there’re no warnings.
I want to write a function that can take small images and return a permutation of them, block-wise.
Basically I want to turn this:
Into this:
There was an excellent answer in Is there a function in Python that shuffle data by data blocks? that helped me write a solution. However for ~50,000 28×28 images this takes a long time to run.
# blocks of 7x7 shuffling
range1 = np.arange(4)
range2 = np.arange(4)
block_size = int(28 / 4)
print([[x[i*block_size:(i+1)*block_size].shape] for i in range1])
for x in x1:
np.random.shuffle(range1)
x[:] = np.block([[x[i*block_size:(i+1)*block_size]] for i in range1])
for a in x:
np.random.shuffle(range2)
a[:] = np.block([a[i*block_size:(i+1)*block_size] for i in range2])
print("x1", time.time() - begin)
begin = time.time()
I already found a solution that runs much faster. I feel silly because I didn’t really need a double for loop, just two separate shuffle indexes. Leaving this solution here in case anyone wants to shuffle an image block-wise in numpy.
If anyone comes up with another good solution, let me know.
# blocks of 7x7 shuffling
range1 = np.arange(4)
range2 = np.arange(4)
block_size = int(28 / 4)
for x in x1:
np.random.shuffle(range1)
np.random.shuffle(range2)
x[:] = np.block([[x[i*block_size:(i+1)*block_size]] for i in range1])
x[:] = np.block([x[:,i*block_size:(i+1)*block_size] for i in range2])
Here’s one approach based on this post
–
def randomize_tiles_3D(x1, H, W):
# W,H are width and height of blocks
m,n,p = x1.shape
l1,l2 = n//H,p//W
combs = np.random.rand(m,l1*l2).argsort(axis=1)
r,c = np.unravel_index(combs,(l1,l2))
x1cr = x1.reshape(-1,l1,H,l2,W)
out = x1cr[np.arange(m)[:,None],r,:,c]
return out.reshape(-1,l1,l2,H,W).swapaxes(2,3).reshape(-1,n,p)
Sample run –
In [46]: x1
Out[46]:
array([[[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]],
[[36, 37, 38, 39, 40, 41],
[42, 43, 44, 45, 46, 47],
[48, 49, 50, 51, 52, 53],
[54, 55, 56, 57, 58, 59],
[60, 61, 62, 63, 64, 65],
[66, 67, 68, 69, 70, 71]]])
In [47]: np.random.seed(0)
In [48]: randomize_tiles_3D(x1, H=3, W=3)
Out[48]:
array([[[21, 22, 23, 0, 1, 2],
[27, 28, 29, 6, 7, 8],
[33, 34, 35, 12, 13, 14],
[18, 19, 20, 3, 4, 5],
[24, 25, 26, 9, 10, 11],
[30, 31, 32, 15, 16, 17]],
[[36, 37, 38, 54, 55, 56],
[42, 43, 44, 60, 61, 62],
[48, 49, 50, 66, 67, 68],
[39, 40, 41, 57, 58, 59],
[45, 46, 47, 63, 64, 65],
[51, 52, 53, 69, 70, 71]]])
It will be more efficient to use numpy.lib.stride_tricks.as_strided
to break 2D matrices into blocks.
import numpy as np
img_width, block_width = 12, 3
n = img_width // block_width
a = np.arange(img_width * img_width).reshape(img_width, img_width)
print(a)
blocks = np.lib.stride_tricks.as_strided(a,
shape=(n, n, block_width, block_width),
strides=(a.itemsize * np.array([n * block_width ** 2, block_width, n * block_width, 1])))
print(blocks)
blocks = blocks.reshape((n * n, block_width, block_width)) # flatten for better shuffle
np.random.shuffle(blocks)
print(blocks)
blocks = np.lib.stride_tricks.as_strided(blocks,
shape=(n, block_width, n, block_width),
strides=(a.itemsize * np.array([n * block_width ** 2, block_width, block_width ** 2, 1])))
shuffled = np.reshape(blocks, (img_width, img_width))
print(shuffled)
Output can be found here: blocks_shuffle_example.ipynb
Document: numpy.lib.stride_tricks.as_strided
Here’s one approach:
Assume that the original image has shape (m, n), and each block has shape (w, h).
import numpy as np
# split image into tiles of w*h blocks with shape = ((m * n) / (w * h), w, h)
tiles = np.array([img_pad[x : x+w, y : y+h] for x in range(0, m, w) for y in range(0, n, h)])
np.random.shuffle(tiles)
# merge back to shape = (m, n)
mb, nb = m // w, n // h
res = np.vstack(np.hstack(tiles[i*nb : (i+1)*nb]) for i in range(mb))
Update:
res = np.vstack(np.hstack(tiles[i*nb : (i+1)*nb]) for i in range(mb))
may cause "FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future." while running.
Use
res = np.block([[np.hstack(tiles[i*nb : (i+1)*nb])] for i in range(mb)])
instead and there’re no warnings.