how to perform max/mean pooling on a 2d array using numpy
Question:
Given a 2D(M x N) matrix, and a 2D Kernel(K x L), how do i return a matrix that is the result of max or mean pooling using the given kernel over the image?
I’d like to use numpy if possible.
Note: M, N, K, L can be both even or odd and they need not be perfectly divisible by each other, eg: 7×5 matrix and 2×2 kernel.
eg of max pooling:
matrix:
array([[ 20, 200, -5, 23],
[ -13, 134, 119, 100],
[ 120, 32, 49, 25],
[-120, 12, 09, 23]])
kernel: 2 x 2
soln:
array([[ 200, 119],
[ 120, 49]])
Answers:
You could use scikit-image block_reduce:
import numpy as np
import skimage.measure
a = np.array([
[ 20, 200, -5, 23],
[ -13, 134, 119, 100],
[ 120, 32, 49, 25],
[-120, 12, 9, 23]
])
skimage.measure.block_reduce(a, (2,2), np.max)
Gives:
array([[200, 119],
[120, 49]])
If the image size is evenly divisible by the kernal size, you can reshape the array and use max
or mean
as you see fit
import numpy as np
mat = np.array([[ 20, 200, -5, 23],
[ -13, 134, 119, 100],
[ 120, 32, 49, 25],
[-120, 12, 9, 23]])
M, N = mat.shape
K = 2
L = 2
MK = M // K
NL = N // L
print(mat[:MK*K, :NL*L].reshape(MK, K, NL, L).max(axis=(1, 3)))
# [[200, 119], [120, 49]]
If you don’t have an even number of kernels, you’ll have to handle the boundaries separately. (As pointed out in the comments, this results in the matrix being copied, which will affect performance).
mat = np.array([[20, 200, -5, 23, 7],
[-13, 134, 119, 100, 8],
[120, 32, 49, 25, 12],
[-120, 12, 9, 23, 15],
[-57, 84, 19, 17, 82],
])
# soln
# [200, 119, 8]
# [120, 49, 15]
# [84, 19, 82]
M, N = mat.shape
K = 2
L = 2
MK = M // K
NL = N // L
# split the matrix into 'quadrants'
Q1 = mat[:MK * K, :NL * L].reshape(MK, K, NL, L).max(axis=(1, 3))
Q2 = mat[MK * K:, :NL * L].reshape(-1, NL, L).max(axis=2)
Q3 = mat[:MK * K, NL * L:].reshape(MK, K, -1).max(axis=1)
Q4 = mat[MK * K:, NL * L:].max()
# compose the individual quadrants into one new matrix
soln = np.vstack([np.c_[Q1, Q3], np.c_[Q2, Q4]])
print(soln)
# [[200 119 8]
# [120 49 15]
# [ 84 19 82]]
Instead of making “quadrants” as shown by Elliot’s answer, we could pad it to make it evenly divisible, then perform either max or mean pooling.
As pooling is often used in CNN, the input array is usually 3D. So I made a function that works on either 2D or 3D arrays.
def pooling(mat,ksize,method='max',pad=False):
'''Non-overlapping pooling on 2D or 3D data.
<mat>: ndarray, input array to pool.
<ksize>: tuple of 2, kernel size in (ky, kx).
<method>: str, 'max for max-pooling,
'mean' for mean-pooling.
<pad>: bool, pad <mat> or not. If no pad, output has size
n//f, n being <mat> size, f being kernel size.
if pad, output has size ceil(n/f).
Return <result>: pooled matrix.
'''
m, n = mat.shape[:2]
ky,kx=ksize
_ceil=lambda x,y: int(numpy.ceil(x/float(y)))
if pad:
ny=_ceil(m,ky)
nx=_ceil(n,kx)
size=(ny*ky, nx*kx)+mat.shape[2:]
mat_pad=numpy.full(size,numpy.nan)
mat_pad[:m,:n,...]=mat
else:
ny=m//ky
nx=n//kx
mat_pad=mat[:ny*ky, :nx*kx, ...]
new_shape=(ny,ky,nx,kx)+mat.shape[2:]
if method=='max':
result=numpy.nanmax(mat_pad.reshape(new_shape),axis=(1,3))
else:
result=numpy.nanmean(mat_pad.reshape(new_shape),axis=(1,3))
return result
Sometimes you may want to perform overlapping pooling, at a stride not equal to the kernel size. Here is a function that does that, with or without padding:
def asStride(arr,sub_shape,stride):
'''Get a strided sub-matrices view of an ndarray.
See also skimage.util.shape.view_as_windows()
'''
s0,s1=arr.strides[:2]
m1,n1=arr.shape[:2]
m2,n2=sub_shape
view_shape=(1+(m1-m2)//stride[0],1+(n1-n2)//stride[1],m2,n2)+arr.shape[2:]
strides=(stride[0]*s0,stride[1]*s1,s0,s1)+arr.strides[2:]
subs=numpy.lib.stride_tricks.as_strided(arr,view_shape,strides=strides)
return subs
def poolingOverlap(mat,ksize,stride=None,method='max',pad=False):
'''Overlapping pooling on 2D or 3D data.
<mat>: ndarray, input array to pool.
<ksize>: tuple of 2, kernel size in (ky, kx).
<stride>: tuple of 2 or None, stride of pooling window.
If None, same as <ksize> (non-overlapping pooling).
<method>: str, 'max for max-pooling,
'mean' for mean-pooling.
<pad>: bool, pad <mat> or not. If no pad, output has size
(n-f)//s+1, n being <mat> size, f being kernel size, s stride.
if pad, output has size ceil(n/s).
Return <result>: pooled matrix.
'''
m, n = mat.shape[:2]
ky,kx=ksize
if stride is None:
stride=(ky,kx)
sy,sx=stride
_ceil=lambda x,y: int(numpy.ceil(x/float(y)))
if pad:
ny=_ceil(m,sy)
nx=_ceil(n,sx)
size=((ny-1)*sy+ky, (nx-1)*sx+kx) + mat.shape[2:]
mat_pad=numpy.full(size,numpy.nan)
mat_pad[:m,:n,...]=mat
else:
mat_pad=mat[:(m-ky)//sy*sy+ky, :(n-kx)//sx*sx+kx, ...]
view=asStride(mat_pad,ksize,stride)
if method=='max':
result=numpy.nanmax(view,axis=(2,3))
else:
result=numpy.nanmean(view,axis=(2,3))
return result
Since the numpy documentation says to use “numpy.lib.stride_tricks.as_strided” with “extreme care”, here is another solution for a 2D/3D pooling without it.
If strides=1, it results in using same padding. For strides>1, I am not 100% sure about how same padding is defined…
def pool3D(arr,
kernel=(2, 2, 2),
stride=(1, 1, 1),
func=np.nanmax,
):
# check inputs
assert arr.ndim == 3
assert len(kernel) == 3
# create array with lots of padding around it, from which we grab stuff (could be more efficient, yes)
arr_padded_shape = arr.shape + 2 * np.array(kernel)
arr_padded = np.zeros(arr_padded_shape, dtype=arr.dtype) * np.nan
arr_padded[
kernel[0]:kernel[0] + arr.shape[0],
kernel[1]:kernel[1] + arr.shape[1],
kernel[2]:kernel[2] + arr.shape[2],
] = arr
# create temporary array, which aggregates kernel elements in last axis
size_x = 1 + (arr.shape[0]-1) // stride[0]
size_y = 1 + (arr.shape[1]-1) // stride[1]
size_z = 1 + (arr.shape[2]-1) // stride[2]
size_kernel = np.prod(kernel)
arr_tmp = np.empty((size_x, size_y, size_z, size_kernel), dtype=arr.dtype)
# fill temporary array
kx_center = (kernel[0] - 1) // 2
ky_center = (kernel[1] - 1) // 2
kz_center = (kernel[2] - 1) // 2
idx_kernel = 0
for kx in range(kernel[0]):
dx = kernel[0] + kx - kx_center
for ky in range(kernel[1]):
dy = kernel[1] + ky - ky_center
for kz in range(kernel[2]):
dz = kernel[2] + kz - kz_center
arr_tmp[:, :, :, idx_kernel] = arr_padded[
dx:dx + arr.shape[0]:stride[0],
dy:dy + arr.shape[1]:stride[1],
dz:dz + arr.shape[2]:stride[2],
]
idx_kernel += 1
# perform pool function
arr_final = func(arr_tmp, axis=-1)
return arr_final
def pool2D(arr,
kernel=(2, 2),
stride=(1, 1),
func=np.nanmax,
):
# check inputs
assert arr.ndim == 2
assert len(kernel) == 2
# transform into 3D array with empty dimension?
arr3D = arr[..., np.newaxis]
kernel3D = kernel + (1,)
stride3D = stride + (1,)
arr3D_final = pool3D(arr3D, kernel3D, stride3D, func)
arr2D_final = arr3D_final[:, :, 0]
return arr2D_final
Another solution uses the little-known magic of np.maximum.at
(or you can adapt this to mean-pooling using np.add.at and dividing)
def max_pool(img, factor: int):
""" Perform max pooling with a (factor x factor) kernel"""
ds_img = np.full((img.shape[0] // factor, img.shape[1] // factor), -float('inf'), dtype=img.dtype)
np.maximum.at(ds_img, (np.arange(img.shape[0])[:, None] // factor, np.arange(img.shape[1]) // factor), img)
return ds_img
example usage:
img = np.array([[20, 200, -5, 23],
[-13, 134, 119, 100],
[120, 32, 49, 25],
[-120, 12, 9, 23]])
print(f'Input: n{img}')
print(f"Output: n{max_pool(img, factor=2)}")
prints
Input:
[[ 20 200 -5 23]
[ -13 134 119 100]
[ 120 32 49 25]
[-120 12 9 23]]
Output:
[[200 119]
[120 49]]
Unfortunately it appears to be a little slow though so I’d still go with the solution provided by mdh
maxpooling for 3 x 3
kernel and for square matrix a
a = np.array(a)
return [[a[i-1:i+2,j-1:j+2].max() for j in range(1,len(a)-1)] for i in range(1,len(a)-1)]
This function can apply max pooling on any size kernel, using only numpy functions.
def max_pooling(feature_map : np.ndarray, kernel : tuple) -> np.ndarray:
"""
Applies max pooling to a feature map.
Parameters
----------
feature_map : np.ndarray
A 2D or 3D feature map to apply max pooling to.
kernel : tuple
The size of the kernel to use for max pooling.
Returns
-------
np.ndarray
The feature map after max pooling was applied.
"""
# Check if it fits without padding the feature map
if feature_map.shape[0] % kernel[0] != 0:
# Add padding to the feature map
feature_map = np.pad(feature_map, ((0, kernel[0] - feature_map.shape[0] % kernel[0]), (0, 0), (0,0)), 'constant')
if feature_map.shape[1] % kernel[1] != 0:
feature_map = np.pad(feature_map, ((0, 0), (0, kernel[1] - feature_map.shape[1] % kernel[1]), (0,0)), 'constant')
# Apply max pooling to the padded feature map
pooled = feature_map.reshape(feature_map.shape[0] // kernel[0],
kernel[0],
feature_map.shape[1] // kernel[1],
kernel[1]
).max(axis=(1, 3))
return pooled
You can also do the same using as_strided()
function of numpy. So, the idea is to create a sub-matrices of the input using the given kernel size and stride and then simply take the maximum along the height and width axes.
Note: The main benefit of using this method is that it can be extended for input with channels (depth) and batches as well!
import numpy as np
np.random.seed(10)
# input
X = np.array([[ 20, 200, -5, 23],
[ -13, 134, 119, 100],
[ 120, 32, 49, 25],
[-120, 12, 9, 23]])
Nh, Nw = X.shape # input size
Kh, Kw = (2,2) # Kernel size (along height and width)
sh, sw = (2,2) # strides along height and width
X
>>> array([[ 20, 200, -5, 23],
[ -13, 134, 119, 100],
[ 120, 32, 49, 25],
[-120, 12, 9, 23]])
Oh = (Nh-Kh)//sh + 1 # output height
Ow = (Nw-Kw)//sw + 1 # output width
# creating appropriate strides
strides = (sh*Nw, sw, Nw, 1)
strides = tuple(i * X.itemsize for i in strides)
subM = np.lib.stride_tricks.as_strided(X, shape=(Oh, Ow, Kh, Kw),
strides=strides)
subM
>>>> array([[[[ 20, 200],
[ -13, 134]],
[[ -5, 23],
[ 119, 100]]],
[[[ 120, 32],
[-120, 12]],
[[ 49, 25],
[ 9, 23]]]])
# taking maximum along the height and width axes.
np.max(subM, axis=(2,3))
>>> array([[200, 119],
[120, 49]])
We have our required output!
Only using Numpy, python loop
Python Numpy MaxPooling
This is generic solution, so you can adjust stride, pooling and kernel
for this example : strids=2, kernel=2, pooling=0
import numpy as np
np.random.seed(10)
#create a dummy input array
data = np.array([[ 20, 200, -5, 23],
[ -13, 134, 119, 100],
[ 120, 32, 49, 25],
[-120, 12, 9, 23]])
shape = data.shape
pading = 0
kernel = 2
stride = 2
height = shape[0] + (pading * 2)
width = shape[1] + (pading * 2)
data = data.astype(np.int32)
data = np.pad(data, (pading,pading))
eq = ((height - kernel) // stride) + 1 # input_dim - filter_dim / stride + 1
pooled = []
for h in range(0, height, stride):
for w in range(0, width, stride):
if h > height - kernel or w > width - kernel:
continue
slice_array = data[h:h + kernel, w:w + kernel]
val = int(np.max(slice_array)) #max
#val = int(np.mean(slice_array)) #mean
pooled.append(val)
pooled = np.array(pooled, np.int32).reshape(eq,eq)
print(pooled)
# array([[ 200, 119],
# [ 120, 49]])
Given a 2D(M x N) matrix, and a 2D Kernel(K x L), how do i return a matrix that is the result of max or mean pooling using the given kernel over the image?
I’d like to use numpy if possible.
Note: M, N, K, L can be both even or odd and they need not be perfectly divisible by each other, eg: 7×5 matrix and 2×2 kernel.
eg of max pooling:
matrix:
array([[ 20, 200, -5, 23],
[ -13, 134, 119, 100],
[ 120, 32, 49, 25],
[-120, 12, 09, 23]])
kernel: 2 x 2
soln:
array([[ 200, 119],
[ 120, 49]])
You could use scikit-image block_reduce:
import numpy as np
import skimage.measure
a = np.array([
[ 20, 200, -5, 23],
[ -13, 134, 119, 100],
[ 120, 32, 49, 25],
[-120, 12, 9, 23]
])
skimage.measure.block_reduce(a, (2,2), np.max)
Gives:
array([[200, 119],
[120, 49]])
If the image size is evenly divisible by the kernal size, you can reshape the array and use max
or mean
as you see fit
import numpy as np
mat = np.array([[ 20, 200, -5, 23],
[ -13, 134, 119, 100],
[ 120, 32, 49, 25],
[-120, 12, 9, 23]])
M, N = mat.shape
K = 2
L = 2
MK = M // K
NL = N // L
print(mat[:MK*K, :NL*L].reshape(MK, K, NL, L).max(axis=(1, 3)))
# [[200, 119], [120, 49]]
If you don’t have an even number of kernels, you’ll have to handle the boundaries separately. (As pointed out in the comments, this results in the matrix being copied, which will affect performance).
mat = np.array([[20, 200, -5, 23, 7],
[-13, 134, 119, 100, 8],
[120, 32, 49, 25, 12],
[-120, 12, 9, 23, 15],
[-57, 84, 19, 17, 82],
])
# soln
# [200, 119, 8]
# [120, 49, 15]
# [84, 19, 82]
M, N = mat.shape
K = 2
L = 2
MK = M // K
NL = N // L
# split the matrix into 'quadrants'
Q1 = mat[:MK * K, :NL * L].reshape(MK, K, NL, L).max(axis=(1, 3))
Q2 = mat[MK * K:, :NL * L].reshape(-1, NL, L).max(axis=2)
Q3 = mat[:MK * K, NL * L:].reshape(MK, K, -1).max(axis=1)
Q4 = mat[MK * K:, NL * L:].max()
# compose the individual quadrants into one new matrix
soln = np.vstack([np.c_[Q1, Q3], np.c_[Q2, Q4]])
print(soln)
# [[200 119 8]
# [120 49 15]
# [ 84 19 82]]
Instead of making “quadrants” as shown by Elliot’s answer, we could pad it to make it evenly divisible, then perform either max or mean pooling.
As pooling is often used in CNN, the input array is usually 3D. So I made a function that works on either 2D or 3D arrays.
def pooling(mat,ksize,method='max',pad=False):
'''Non-overlapping pooling on 2D or 3D data.
<mat>: ndarray, input array to pool.
<ksize>: tuple of 2, kernel size in (ky, kx).
<method>: str, 'max for max-pooling,
'mean' for mean-pooling.
<pad>: bool, pad <mat> or not. If no pad, output has size
n//f, n being <mat> size, f being kernel size.
if pad, output has size ceil(n/f).
Return <result>: pooled matrix.
'''
m, n = mat.shape[:2]
ky,kx=ksize
_ceil=lambda x,y: int(numpy.ceil(x/float(y)))
if pad:
ny=_ceil(m,ky)
nx=_ceil(n,kx)
size=(ny*ky, nx*kx)+mat.shape[2:]
mat_pad=numpy.full(size,numpy.nan)
mat_pad[:m,:n,...]=mat
else:
ny=m//ky
nx=n//kx
mat_pad=mat[:ny*ky, :nx*kx, ...]
new_shape=(ny,ky,nx,kx)+mat.shape[2:]
if method=='max':
result=numpy.nanmax(mat_pad.reshape(new_shape),axis=(1,3))
else:
result=numpy.nanmean(mat_pad.reshape(new_shape),axis=(1,3))
return result
Sometimes you may want to perform overlapping pooling, at a stride not equal to the kernel size. Here is a function that does that, with or without padding:
def asStride(arr,sub_shape,stride):
'''Get a strided sub-matrices view of an ndarray.
See also skimage.util.shape.view_as_windows()
'''
s0,s1=arr.strides[:2]
m1,n1=arr.shape[:2]
m2,n2=sub_shape
view_shape=(1+(m1-m2)//stride[0],1+(n1-n2)//stride[1],m2,n2)+arr.shape[2:]
strides=(stride[0]*s0,stride[1]*s1,s0,s1)+arr.strides[2:]
subs=numpy.lib.stride_tricks.as_strided(arr,view_shape,strides=strides)
return subs
def poolingOverlap(mat,ksize,stride=None,method='max',pad=False):
'''Overlapping pooling on 2D or 3D data.
<mat>: ndarray, input array to pool.
<ksize>: tuple of 2, kernel size in (ky, kx).
<stride>: tuple of 2 or None, stride of pooling window.
If None, same as <ksize> (non-overlapping pooling).
<method>: str, 'max for max-pooling,
'mean' for mean-pooling.
<pad>: bool, pad <mat> or not. If no pad, output has size
(n-f)//s+1, n being <mat> size, f being kernel size, s stride.
if pad, output has size ceil(n/s).
Return <result>: pooled matrix.
'''
m, n = mat.shape[:2]
ky,kx=ksize
if stride is None:
stride=(ky,kx)
sy,sx=stride
_ceil=lambda x,y: int(numpy.ceil(x/float(y)))
if pad:
ny=_ceil(m,sy)
nx=_ceil(n,sx)
size=((ny-1)*sy+ky, (nx-1)*sx+kx) + mat.shape[2:]
mat_pad=numpy.full(size,numpy.nan)
mat_pad[:m,:n,...]=mat
else:
mat_pad=mat[:(m-ky)//sy*sy+ky, :(n-kx)//sx*sx+kx, ...]
view=asStride(mat_pad,ksize,stride)
if method=='max':
result=numpy.nanmax(view,axis=(2,3))
else:
result=numpy.nanmean(view,axis=(2,3))
return result
Since the numpy documentation says to use “numpy.lib.stride_tricks.as_strided” with “extreme care”, here is another solution for a 2D/3D pooling without it.
If strides=1, it results in using same padding. For strides>1, I am not 100% sure about how same padding is defined…
def pool3D(arr,
kernel=(2, 2, 2),
stride=(1, 1, 1),
func=np.nanmax,
):
# check inputs
assert arr.ndim == 3
assert len(kernel) == 3
# create array with lots of padding around it, from which we grab stuff (could be more efficient, yes)
arr_padded_shape = arr.shape + 2 * np.array(kernel)
arr_padded = np.zeros(arr_padded_shape, dtype=arr.dtype) * np.nan
arr_padded[
kernel[0]:kernel[0] + arr.shape[0],
kernel[1]:kernel[1] + arr.shape[1],
kernel[2]:kernel[2] + arr.shape[2],
] = arr
# create temporary array, which aggregates kernel elements in last axis
size_x = 1 + (arr.shape[0]-1) // stride[0]
size_y = 1 + (arr.shape[1]-1) // stride[1]
size_z = 1 + (arr.shape[2]-1) // stride[2]
size_kernel = np.prod(kernel)
arr_tmp = np.empty((size_x, size_y, size_z, size_kernel), dtype=arr.dtype)
# fill temporary array
kx_center = (kernel[0] - 1) // 2
ky_center = (kernel[1] - 1) // 2
kz_center = (kernel[2] - 1) // 2
idx_kernel = 0
for kx in range(kernel[0]):
dx = kernel[0] + kx - kx_center
for ky in range(kernel[1]):
dy = kernel[1] + ky - ky_center
for kz in range(kernel[2]):
dz = kernel[2] + kz - kz_center
arr_tmp[:, :, :, idx_kernel] = arr_padded[
dx:dx + arr.shape[0]:stride[0],
dy:dy + arr.shape[1]:stride[1],
dz:dz + arr.shape[2]:stride[2],
]
idx_kernel += 1
# perform pool function
arr_final = func(arr_tmp, axis=-1)
return arr_final
def pool2D(arr,
kernel=(2, 2),
stride=(1, 1),
func=np.nanmax,
):
# check inputs
assert arr.ndim == 2
assert len(kernel) == 2
# transform into 3D array with empty dimension?
arr3D = arr[..., np.newaxis]
kernel3D = kernel + (1,)
stride3D = stride + (1,)
arr3D_final = pool3D(arr3D, kernel3D, stride3D, func)
arr2D_final = arr3D_final[:, :, 0]
return arr2D_final
Another solution uses the little-known magic of np.maximum.at
(or you can adapt this to mean-pooling using np.add.at and dividing)
def max_pool(img, factor: int):
""" Perform max pooling with a (factor x factor) kernel"""
ds_img = np.full((img.shape[0] // factor, img.shape[1] // factor), -float('inf'), dtype=img.dtype)
np.maximum.at(ds_img, (np.arange(img.shape[0])[:, None] // factor, np.arange(img.shape[1]) // factor), img)
return ds_img
example usage:
img = np.array([[20, 200, -5, 23],
[-13, 134, 119, 100],
[120, 32, 49, 25],
[-120, 12, 9, 23]])
print(f'Input: n{img}')
print(f"Output: n{max_pool(img, factor=2)}")
prints
Input:
[[ 20 200 -5 23]
[ -13 134 119 100]
[ 120 32 49 25]
[-120 12 9 23]]
Output:
[[200 119]
[120 49]]
Unfortunately it appears to be a little slow though so I’d still go with the solution provided by mdh
maxpooling for 3 x 3
kernel and for square matrix a
a = np.array(a)
return [[a[i-1:i+2,j-1:j+2].max() for j in range(1,len(a)-1)] for i in range(1,len(a)-1)]
This function can apply max pooling on any size kernel, using only numpy functions.
def max_pooling(feature_map : np.ndarray, kernel : tuple) -> np.ndarray:
"""
Applies max pooling to a feature map.
Parameters
----------
feature_map : np.ndarray
A 2D or 3D feature map to apply max pooling to.
kernel : tuple
The size of the kernel to use for max pooling.
Returns
-------
np.ndarray
The feature map after max pooling was applied.
"""
# Check if it fits without padding the feature map
if feature_map.shape[0] % kernel[0] != 0:
# Add padding to the feature map
feature_map = np.pad(feature_map, ((0, kernel[0] - feature_map.shape[0] % kernel[0]), (0, 0), (0,0)), 'constant')
if feature_map.shape[1] % kernel[1] != 0:
feature_map = np.pad(feature_map, ((0, 0), (0, kernel[1] - feature_map.shape[1] % kernel[1]), (0,0)), 'constant')
# Apply max pooling to the padded feature map
pooled = feature_map.reshape(feature_map.shape[0] // kernel[0],
kernel[0],
feature_map.shape[1] // kernel[1],
kernel[1]
).max(axis=(1, 3))
return pooled
You can also do the same using as_strided()
function of numpy. So, the idea is to create a sub-matrices of the input using the given kernel size and stride and then simply take the maximum along the height and width axes.
Note: The main benefit of using this method is that it can be extended for input with channels (depth) and batches as well!
import numpy as np
np.random.seed(10)
# input
X = np.array([[ 20, 200, -5, 23],
[ -13, 134, 119, 100],
[ 120, 32, 49, 25],
[-120, 12, 9, 23]])
Nh, Nw = X.shape # input size
Kh, Kw = (2,2) # Kernel size (along height and width)
sh, sw = (2,2) # strides along height and width
X
>>> array([[ 20, 200, -5, 23],
[ -13, 134, 119, 100],
[ 120, 32, 49, 25],
[-120, 12, 9, 23]])
Oh = (Nh-Kh)//sh + 1 # output height
Ow = (Nw-Kw)//sw + 1 # output width
# creating appropriate strides
strides = (sh*Nw, sw, Nw, 1)
strides = tuple(i * X.itemsize for i in strides)
subM = np.lib.stride_tricks.as_strided(X, shape=(Oh, Ow, Kh, Kw),
strides=strides)
subM
>>>> array([[[[ 20, 200],
[ -13, 134]],
[[ -5, 23],
[ 119, 100]]],
[[[ 120, 32],
[-120, 12]],
[[ 49, 25],
[ 9, 23]]]])
# taking maximum along the height and width axes.
np.max(subM, axis=(2,3))
>>> array([[200, 119],
[120, 49]])
We have our required output!
Only using Numpy, python loop
Python Numpy MaxPooling
This is generic solution, so you can adjust stride, pooling and kernel
for this example : strids=2, kernel=2, pooling=0
import numpy as np
np.random.seed(10)
#create a dummy input array
data = np.array([[ 20, 200, -5, 23],
[ -13, 134, 119, 100],
[ 120, 32, 49, 25],
[-120, 12, 9, 23]])
shape = data.shape
pading = 0
kernel = 2
stride = 2
height = shape[0] + (pading * 2)
width = shape[1] + (pading * 2)
data = data.astype(np.int32)
data = np.pad(data, (pading,pading))
eq = ((height - kernel) // stride) + 1 # input_dim - filter_dim / stride + 1
pooled = []
for h in range(0, height, stride):
for w in range(0, width, stride):
if h > height - kernel or w > width - kernel:
continue
slice_array = data[h:h + kernel, w:w + kernel]
val = int(np.max(slice_array)) #max
#val = int(np.mean(slice_array)) #mean
pooled.append(val)
pooled = np.array(pooled, np.int32).reshape(eq,eq)
print(pooled)
# array([[ 200, 119],
# [ 120, 49]])