Crop Boxes in Tensorflow Object Detection and display it as jpg image

Question:

I’m using the tensorflow objection detection to detect specific data on passports like full name and other things. I’ve already trained the data and everything is working fine. It perfectly identifies data surrounding it with a bounding box. However, now I just want to crop the detected boxes.

Code:

import os
import cv2
import numpy as np
import tensorflow as tf
import sys

sys.path.append("..")

from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

MODEL_NAME = 'inference_graph'

CWD_PATH = os.getcwd()

PATH_TO_CKPT = 'C:/Users/UI UX/Desktop/Captcha 3/CAPTCHA_frozen_inference_graph.pb'

PATH_TO_LABELS = 'C:/Users/UI UX/Desktop/Captcha 3/CAPTCHA_labelmap.pbtxt'

PATH_TO_IMAGE = 'C:/Users/UI UX/Desktop/(47).jpg'

NUM_CLASSES = 11

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.Session(graph=detection_graph)

image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')

num_detections = detection_graph.get_tensor_by_name('num_detections:0')

image = cv2.imread(PATH_TO_IMAGE)

image_np = cv2.resize(image, (0, 0), fx=2.0, fy=2.0)

image_expanded = np.expand_dims(image_np, axis=0)

(boxes, scores, classes, num) = sess.run(
    [detection_boxes, detection_scores, detection_classes, num_detections],
    feed_dict={image_tensor: image_expanded})

vis_util.visualize_boxes_and_labels_on_image_array(
    image_np,
    np.squeeze(boxes),
    np.squeeze(classes).astype(np.int32),
    np.squeeze(scores),
    category_index,
    use_normalized_coordinates=True,
    line_thickness=2,
    min_score_thresh=0.60)

width, height = image_np.shape[:2]
for i, box in enumerate(np.squeeze(boxes)):
      if(np.squeeze(scores)[i] > 0.80):
        (ymin, xmin, ymax, xmax) = (box[0]*height, box[1]*width, box[2]*height, box[3]*width)
        cropped_image = tf.image.crop_to_bounding_box(image_np, ymin, xmin, ymax - ymin, xmax - xmin)
        cv2.imshow('cropped_image', image_np)
        cv2.waitKey(0)

cv2.imshow('Object detector', image_np)

cv2.waitKey(0)

cv2.destroyAllWindows()

but get this error:

Traceback (most recent call last):
File "C:/Users/UI UX/PycharmProjects/pythonProject1/vedio_object_detection.py", line 71, in
cropped_image = tf.image.crop_to_bounding_box(image_np, ymin, xmin, ymax – ymin, xmax – xmin)
File "C:ProgramDataAnaconda2envstf_cpulibsite-packagestensorflow_corepythonopsimage_ops_impl.py", line 875, in crop_to_bounding_box
array_ops.stack([-1, target_height, target_width, -1]))
File "C:ProgramDataAnaconda2envstf_cpulibsite-packagestensorflow_corepythonopsarray_ops.py", line 855, in slice
return gen_array_ops.slice(input, begin, size, name=name)
File "C:ProgramDataAnaconda2envstf_cpulibsite-packagestensorflow_corepythonopsgen_array_ops.py", line 9222, in _slice
"Slice", input=input, begin=begin, size=size, name=name)
File "C:ProgramDataAnaconda2envstf_cpulibsite-packagestensorflow_corepythonframeworkop_def_library.py", line 632, in _apply_op_helper
param_name=input_name)
File "C:ProgramDataAnaconda2envstf_cpulibsite-packagestensorflow_corepythonframeworkop_def_library.py", line 61, in _SatisfiesTypeConstraint
", ".join(dtypes.as_dtype(x).name for x in allowed_list)))
TypeError: Value passed to parameter ‘begin’ has DataType float32 not in list of allowed values: int32, int64

Any Kind of help?

Asked By: Abdullah Md

||

Answers:

I found the solution of this by add this pice of code after end of this line:

(boxes, scores, classes, num) = sess.run([detection_boxes, detection_scores, detection_classes, num_detections],feed_dict={image_tensor: image_expanded})

I add this:

(frame_height, frame_width) = image.shape[:2]

for i in range(len(np.squeeze(scores))):
#print(np.squeeze(boxes)[i])
ymin = int((np.squeeze(boxes)[i][0]*frame_height))
xmin = int((np.squeeze(boxes)[i][1]*frame_width))
ymax = int((np.squeeze(boxes)[i][2]*frame_height))
xmax = int((np.squeeze(boxes)[i][3]*frame_width))
cropped_img = image[ymax:ymin,xmax:xmin]
cv2.imwrite(f'/your/path/img_{i}.png', cropped_img)
Answered By: Abdullah Md

For people looking for a solution for bounding box of human detection here’s a quick way.
When you’re doing human detection it gives you a bounding box with x1, y1 as opencv coordinates and x2 and y2 as width and height. If you want to draw rectangles or crop image after human detection you can use the following code

top = box[0]
left = box[1]

bottom = top+box[2]
right = left+box[3]

cv2.rectangle(image, (top,left),(bottom,right), (0, 255, 0), 2)
Answered By: Rex