How to turn a video into numpy array?
Question:
I have a python script in the same folder as a video I want to convert to a numpy array. My video is called ‘test.mp4’.
Within my script, I want to call someFunction('test.mp4')
and get back a numpy array. The resulting numpy array should be a numpy array of images, where each image is a 3-d numpy array.
Does that make sense?
Thanks!
Answers:
The script below does what you want. You may separate part of it into the function.
Code below doesn’t check for errors, in particular, production code will check that every frame*
variable is greater than zero.
import cv2
import numpy as np
cap = cv2.VideoCapture('test.mp4')
frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frameWidth = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frameHeight = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
buf = np.empty((frameCount, frameHeight, frameWidth, 3), np.dtype('uint8'))
fc = 0
ret = True
while (fc < frameCount and ret):
ret, buf[fc] = cap.read()
fc += 1
cap.release()
cv2.namedWindow('frame 10')
cv2.imshow('frame 10', buf[9])
cv2.waitKey(0)
skvideo is a python package can be used to read video and stores into the multi-dimensional array.
import skvideo.io
videodata = skvideo.io.vread("video_file_name")
print(videodata.shape)
For more details:
http://www.scikit-video.org/stable/index.html
and
http://mllearners.blogspot.in/2018/01/scikit-video-skvideo-tutorial-for.html
When we look to the video from image processing perspective, we can assume that it is a sequence of images.
From this point, you can loop over the frames of your video and convert them into a numpy
array.
Here is an example with Pillow and OpenCV libraries, where I’m converting the screenshots from my webcam into numpy
arrays:
import cv2
import numpy as np
from PIL import Image, ImageOps
def screenshot():
global cam
cv2.imwrite('screenshot.png', cam.read()[1])
if __name__ == '__main__':
np.set_printoptions(suppress=True)
cam = cv2.VideoCapture(0) # You can replace it with your video path
while True:
ret, img = cam.read()
cv2.imshow('My Camera', img)
ch = cv2.waitKey(5)
if ch == 27:
break
screenshot()
data = np.ndarray(shape=(1, 224, 224, 3), dtype=np.float32)
image = Image.open('screenshot.png')
size = (224, 224) # Put your suitable size
image = ImageOps.fit(image, size, Image.ANTIALIAS)
image_array = np.asarray(image) # Here, an image -> numpy array
print(image_array)
cv2.destroyAllWindows()
Some of the comments here are interested in a speed comparision. OpenCV and skvideo are mentioned by others before me here, so will compare them here.
We will read 1000 frames and compare the speeds.
OpenCV Code
def read_video_cv2(n_frames=1000):
cap = cv2.VideoCapture("rec_q26b_10min.mp4")
all = []
i = 0
while cap.isOpened() and i < n_frames:
ret, frame = cap.read()
arr = np.array(frame)
all.append(arr)
i += 1
return np.array(all)
scikit-video code
def read_video_sk(n_frames=1000):
videodata = skvideo.io.vread("rec_q26b_10min.mp4", num_frames=n_frames)
return videodata
main function
if __name__ == "__main__":
print(read_video_cv2().shape)
print(read_video_sk().shape)
execution
❯ kernprof -l -v test.py
(1000, 480, 1280)
(1000, 480, 1280, 3)
rote profile results to test.py.lprof
Timer unit: 1e-06 s
Total time: 3.72707 s
File: test.py
Function: read_video_cv2 at line 24
Line # Hits Time Per Hit % Time Line Contents
==============================================================
24 @profile
25 def read_video_cv2(n_frames=100):
26 1 23785.0 23785.0 0.6 cap = cv2.VideoCapture("rec_q26b_10min.mp4")
27 1 5.0 5.0 0.0 all = []
28 1 1.0 1.0 0.0 i = 0
29 1001 5261.0 5.3 0.1 while cap.isOpened() and i < n_frames:
30 1000 2366040.0 2366.0 63.5 ret, frame = cap.read()
31 1000 279732.0 279.7 7.5 arr = np.array(frame)
32 1000 4769.0 4.8 0.1 all.append(arr)
33 1000 1984.0 2.0 0.1 i += 1
34 1 1045494.0 1045494.0 28.1 return np.array(all)
Total time: 3.32195 s
File: test.py
Function: read_video_sk at line 36
Line # Hits Time Per Hit % Time Line Contents
==============================================================
36 @profile
37 def read_video_sk(n_frames=100):
38 1 3321951.0 3321951.0 100.0 videodata = skvideo.io.vread("rec_q26b_10min.mp4", num_frames=n_frames)
39 1 2.0 2.0 0.0 return videodata
We can see that both methods will return numpy arrays of shape (1000, 480, 1280)
and
(1000, 480, 1280, 3)
respectively (our video dimensions are 1280×480).
OpenCV with a simple while loop took a total execution time of 3.72s
whereas skvideo took 3.32s
.
So we can see times are very similar and my code is very unefficiently putting a list of arrays into a numpy array. I might suspect we could save some more time here if we pre-allocate the memory for the whole array and write to it efficiently and shave of half sec.
I have a python script in the same folder as a video I want to convert to a numpy array. My video is called ‘test.mp4’.
Within my script, I want to call someFunction('test.mp4')
and get back a numpy array. The resulting numpy array should be a numpy array of images, where each image is a 3-d numpy array.
Does that make sense?
Thanks!
The script below does what you want. You may separate part of it into the function.
Code below doesn’t check for errors, in particular, production code will check that every frame*
variable is greater than zero.
import cv2
import numpy as np
cap = cv2.VideoCapture('test.mp4')
frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frameWidth = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frameHeight = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
buf = np.empty((frameCount, frameHeight, frameWidth, 3), np.dtype('uint8'))
fc = 0
ret = True
while (fc < frameCount and ret):
ret, buf[fc] = cap.read()
fc += 1
cap.release()
cv2.namedWindow('frame 10')
cv2.imshow('frame 10', buf[9])
cv2.waitKey(0)
skvideo is a python package can be used to read video and stores into the multi-dimensional array.
import skvideo.io
videodata = skvideo.io.vread("video_file_name")
print(videodata.shape)
For more details:
http://www.scikit-video.org/stable/index.html
and
http://mllearners.blogspot.in/2018/01/scikit-video-skvideo-tutorial-for.html
When we look to the video from image processing perspective, we can assume that it is a sequence of images.
From this point, you can loop over the frames of your video and convert them into a numpy
array.
Here is an example with Pillow and OpenCV libraries, where I’m converting the screenshots from my webcam into numpy
arrays:
import cv2
import numpy as np
from PIL import Image, ImageOps
def screenshot():
global cam
cv2.imwrite('screenshot.png', cam.read()[1])
if __name__ == '__main__':
np.set_printoptions(suppress=True)
cam = cv2.VideoCapture(0) # You can replace it with your video path
while True:
ret, img = cam.read()
cv2.imshow('My Camera', img)
ch = cv2.waitKey(5)
if ch == 27:
break
screenshot()
data = np.ndarray(shape=(1, 224, 224, 3), dtype=np.float32)
image = Image.open('screenshot.png')
size = (224, 224) # Put your suitable size
image = ImageOps.fit(image, size, Image.ANTIALIAS)
image_array = np.asarray(image) # Here, an image -> numpy array
print(image_array)
cv2.destroyAllWindows()
Some of the comments here are interested in a speed comparision. OpenCV and skvideo are mentioned by others before me here, so will compare them here.
We will read 1000 frames and compare the speeds.
OpenCV Code
def read_video_cv2(n_frames=1000):
cap = cv2.VideoCapture("rec_q26b_10min.mp4")
all = []
i = 0
while cap.isOpened() and i < n_frames:
ret, frame = cap.read()
arr = np.array(frame)
all.append(arr)
i += 1
return np.array(all)
scikit-video code
def read_video_sk(n_frames=1000):
videodata = skvideo.io.vread("rec_q26b_10min.mp4", num_frames=n_frames)
return videodata
main function
if __name__ == "__main__":
print(read_video_cv2().shape)
print(read_video_sk().shape)
execution
❯ kernprof -l -v test.py
(1000, 480, 1280)
(1000, 480, 1280, 3)
rote profile results to test.py.lprof
Timer unit: 1e-06 s
Total time: 3.72707 s
File: test.py
Function: read_video_cv2 at line 24
Line # Hits Time Per Hit % Time Line Contents
==============================================================
24 @profile
25 def read_video_cv2(n_frames=100):
26 1 23785.0 23785.0 0.6 cap = cv2.VideoCapture("rec_q26b_10min.mp4")
27 1 5.0 5.0 0.0 all = []
28 1 1.0 1.0 0.0 i = 0
29 1001 5261.0 5.3 0.1 while cap.isOpened() and i < n_frames:
30 1000 2366040.0 2366.0 63.5 ret, frame = cap.read()
31 1000 279732.0 279.7 7.5 arr = np.array(frame)
32 1000 4769.0 4.8 0.1 all.append(arr)
33 1000 1984.0 2.0 0.1 i += 1
34 1 1045494.0 1045494.0 28.1 return np.array(all)
Total time: 3.32195 s
File: test.py
Function: read_video_sk at line 36
Line # Hits Time Per Hit % Time Line Contents
==============================================================
36 @profile
37 def read_video_sk(n_frames=100):
38 1 3321951.0 3321951.0 100.0 videodata = skvideo.io.vread("rec_q26b_10min.mp4", num_frames=n_frames)
39 1 2.0 2.0 0.0 return videodata
We can see that both methods will return numpy arrays of shape (1000, 480, 1280)
and
(1000, 480, 1280, 3)
respectively (our video dimensions are 1280×480).
OpenCV with a simple while loop took a total execution time of 3.72s
whereas skvideo took 3.32s
.
So we can see times are very similar and my code is very unefficiently putting a list of arrays into a numpy array. I might suspect we could save some more time here if we pre-allocate the memory for the whole array and write to it efficiently and shave of half sec.