Extract bounding box and save it as an image
Question:
Suppose you have the following image:
Now I want to extract each of the independent letters into individual images. Currently, I’ve recovered the contours and then drew a bounding box, in this case for the character a
:
After this, I want to extract each of the boxes (in this case for the letter a
) and save it to an image file.
Expected result:
Here’s my code so far:
import numpy as np
import cv2
im = cv2.imread('abcd.png')
im[im == 255] = 1
im[im == 0] = 255
im[im == 1] = 0
im2 = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(im2,127,255,0)
contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
for i in range(0, len(contours)):
if (i % 2 == 0):
cnt = contours[i]
#mask = np.zeros(im2.shape,np.uint8)
#cv2.drawContours(mask,[cnt],0,255,-1)
x,y,w,h = cv2.boundingRect(cnt)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2)
cv2.imshow('Features', im)
cv2.imwrite(str(i)+'.png', im)
cv2.destroyAllWindows()
Thanks in advance.
Answers:
The following will give you a single letter
letter = im[y:y+h,x:x+w]
Here’s an approach:
- Convert image to grayscale
- Otsu’s threshold to obtain a binary image
- Find contours
- Iterate through contours and extract ROI using Numpy slicing
After finding contours, we use cv2.boundingRect()
to obtain the bounding rectangle coordinates for each letter.
x,y,w,h = cv2.boundingRect(c)
To extract the ROI, we use Numpy slicing
ROI = image[y:y+h, x:x+w]
Since we have the bounding rectangle coordinates, we can draw the green bounding boxes
cv2.rectangle(copy,(x,y),(x+w,y+h),(36,255,12),2)
Here’s the detected letters
Here’s each saved letter ROI
import cv2
image = cv2.imread('1.png')
copy = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray,0,255,cv2.THRESH_OTSU + cv2.THRESH_BINARY)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
ROI_number = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
ROI = image[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
cv2.rectangle(copy,(x,y),(x+w,y+h),(36,255,12),2)
ROI_number += 1
cv2.imshow('thresh', thresh)
cv2.imshow('copy', copy)
cv2.waitKey()
def bounding_box_img(img,bbox):
x_min, y_min, x_max, y_max = bbox
bbox_obj = img[y_min:y_max, x_min:x_max]
return bbox_obj
img = cv2.imread("image.jpg")
cropped_img = bounding_box_img(img,bbox)
cv2.imshow(cropped_img)
this returns cropped image (bounding box)
in this aproach, bounding box coordinates bases on pascal-voc annotation formats like here
Suppose you have the following image:
Now I want to extract each of the independent letters into individual images. Currently, I’ve recovered the contours and then drew a bounding box, in this case for the character a
:
After this, I want to extract each of the boxes (in this case for the letter a
) and save it to an image file.
Expected result:
Here’s my code so far:
import numpy as np
import cv2
im = cv2.imread('abcd.png')
im[im == 255] = 1
im[im == 0] = 255
im[im == 1] = 0
im2 = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(im2,127,255,0)
contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
for i in range(0, len(contours)):
if (i % 2 == 0):
cnt = contours[i]
#mask = np.zeros(im2.shape,np.uint8)
#cv2.drawContours(mask,[cnt],0,255,-1)
x,y,w,h = cv2.boundingRect(cnt)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2)
cv2.imshow('Features', im)
cv2.imwrite(str(i)+'.png', im)
cv2.destroyAllWindows()
Thanks in advance.
The following will give you a single letter
letter = im[y:y+h,x:x+w]
Here’s an approach:
- Convert image to grayscale
- Otsu’s threshold to obtain a binary image
- Find contours
- Iterate through contours and extract ROI using Numpy slicing
After finding contours, we use cv2.boundingRect()
to obtain the bounding rectangle coordinates for each letter.
x,y,w,h = cv2.boundingRect(c)
To extract the ROI, we use Numpy slicing
ROI = image[y:y+h, x:x+w]
Since we have the bounding rectangle coordinates, we can draw the green bounding boxes
cv2.rectangle(copy,(x,y),(x+w,y+h),(36,255,12),2)
Here’s the detected letters
Here’s each saved letter ROI
import cv2
image = cv2.imread('1.png')
copy = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray,0,255,cv2.THRESH_OTSU + cv2.THRESH_BINARY)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
ROI_number = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
ROI = image[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
cv2.rectangle(copy,(x,y),(x+w,y+h),(36,255,12),2)
ROI_number += 1
cv2.imshow('thresh', thresh)
cv2.imshow('copy', copy)
cv2.waitKey()
def bounding_box_img(img,bbox):
x_min, y_min, x_max, y_max = bbox
bbox_obj = img[y_min:y_max, x_min:x_max]
return bbox_obj
img = cv2.imread("image.jpg")
cropped_img = bounding_box_img(img,bbox)
cv2.imshow(cropped_img)
this returns cropped image (bounding box)
in this aproach, bounding box coordinates bases on pascal-voc annotation formats like here