Draw a bounding box of second class on main image which was cropped to get detection of second class

Question:

I have a problem.
I have an object detection model that detects two classes, what I want to do is:

  • Detect class 1 (say c1) on source image (640×640) Draw bounding box and crop bounding box -> (c1 image) and then resize it to (640×640) (DONE)
  • Detect class 2 (say c2) on c1 image (640×640) (DONE)
  • Now I want to draw bounding box of c2 on source image

I have tried to explain it here by visualizing it
1
how can I do it? please help.

Code:

frame = self.REC.ImgResize(frame)
frame, score1, self.FLAG1, x, y, w, h = self.Detect(frame, "c1")
if self.FLAG1 and x > 0 and y > 0:
   x1, y1 = w,h
   cv2.rectangle(frame, (x, y), (w, h), self.COLOR1, 1)
   c1Img = frame[y:h, x:w]
   c1Img = self.REC.ImgResize(c1Img)
   ratio = c2Img.shape[1] / float(frame.shape[1])
   if ratio > 0.35:
      c2Img, score2, self.FLAG2, xN, yN, wN, hN = self.Detect(c1Img, "c2")
      if self.FLAG2 and xN > 0 and yN > 0:
         # What should be the values for these => (__, __),(__,__)
         cv2.rectangle(frame, (__, __), (__, __), self.COLOR2, 1)

I had tried a way which could only solve (x,y) coordinates but width and height was a mess
what I tried was
first found the rate of width and height at which the cropped c1 image increased after resize.

for example

x1 = 329
y1 = 102 
h1 = 637 
w1 = 630
r_w = 630 / 640 # 0.9843
r_h = 637 / 640 # 0.9953
x2 = 158
y2 = 393
h2 = 499
w2 = 588
new_x2 = 158 * 0.9843 # 156
new_y2 = 389 * 0.9953 # 389
new_x2 = x1 + new_x2
new_y2 = y1 + new_y2

this work to find (x,y)
but I am still trying to find a way to get (w,h) of the bounding box.

EDIT

The complete code is:

import cv2
import random
import numpy as np
import onnxruntime as ort

cuda = False
w = "models/model.onnx"
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
session = ort.InferenceSession(w, providers=providers)
names = ['face', 'glasses']
colors = {name:[random.randint(0, 255) for _ in range(3)] for name in names}
img = cv2.imread("test.jpg")


def ImgResize(image, width = 640, height = 640, inter = cv2.INTER_CUBIC):
    if image is not None:
        resized = cv2.resize(image, (width,height), interpolation = inter)
    return resized


def Detect(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
    flag = False
    w, h = 0, 0
    x, y = 0, 0
    score = 0
    try:
        if im is None:
            raise Exception(IOError())
        shape = im.shape[:2]  
        if isinstance(new_shape, int):
            new_shape = (new_shape, new_shape)
        ratio = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
        if not scaleup:
            ratio = min(ratio, 1.0)
        new_unpad = int(round(shape[1] * ratio)), int(round(shape[0] * ratio))
        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
        if auto:
            dw, dh = np.mod(dw, stride), np.mod(dh, stride)
        dw /= 2
        dh /= 2
        if shape[::-1] != new_unpad:
            im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
        top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
        left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
        im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
        image_ = im.transpose((2, 0, 1))
        image_ = np.expand_dims(image_, 0)
        image_ = np.ascontiguousarray(image_)
        im = image_.astype(np.float32)
        im /= 255
        outname = [i.name for i in session.get_outputs()]
        inname = [i.name for i in session.get_inputs()]
        inp = {inname[0]:im}
        outputs = session.run(outname, inp)[0]
        return im, outputs, ratio, (dw, dh)
    except IOError:
        print("Invalid Image File")



def Detection(img, c_name):
    score = 0
    name = ""
    a, b, c, d = 0, 0, 0, 0
    image_, outputs, ratio, dwdh = Detect(img)
    ori_images = [img.copy()]
    for batch_id, x0, y0, x1, y1, cls_id, score in outputs:
        img = ori_images[int(batch_id)]
        box = np.array([x0, y0, x1, y1])
        box -= np.array(dwdh * 2)
        box /= ratio
        box = box.round().astype(np.int32).tolist()
        cls_id = int(cls_id)
        score = round(float(score), 3)
        if score > 0.55:
            name = names[cls_id]
    if name != c_name:
            return img, 0, False, 0, 0, 0, 0, "Could Not Detect"
    flag = True
    a, b, c, d = tuple(box)
    score = round(score * 100, 0)
    return img, score, flag, a, b, c, d, name 



COLORF = (212, 15, 24)
COLORG = (25, 240, 255)
nameW = "Det"
flagF, flagN = False, False
img = ImgResize(img)
c1_img, score, flagF, x1,y1,w1,h1,name = Detection(img,"face")

print(score, flagF, x1,y1,w1,h1,name)
if flagF:
    cv2.rectangle(img, (x1,y1), (w1,h1), COLORF, 1)
    cv2.putText(img, name, (x1,y1),cv2.FONT_HERSHEY_PLAIN, 2,COLORF,2)
    cv2.imshow("face", img)
    c1_img = c1_img[y1:h1,x1:w1]
    c1_img_orig = c1_img.copy()
    c1_img = ImgResize(c1_img)
    c2_img, score, flagG, x2,y2,w2,h2,name = Detection(c1_img,"glasses")
    if flagG:
        c2_img = c2_img[y2:h2,x2:w2]
        cv2.rectangle(c1_img_orig, (x2,y2), (w2,h2), COLORG, 1)
        cv2.putText(c1_img_orig, name, (x1,y1),cv2.FONT_HERSHEY_PLAIN, 2,COLORG,2)
        cv2.imshow("glasses", c2_img)
        x3 = x1 + int(x2 * w1 / 640)
        y3 = y1 + int(y2 * h1 / 640)
        w3 = int(w2 * w1 / 640)
        h3 = int(h2 * h1 / 640)
        cv2.rectangle(img, (x3,y3), (w3,h3), COLORG, 1)
cv2.imshow(nameW, img)
cv2.waitKey(0)
cv2.destroyAllWindows()

what this code does is for some images it draws the bounding box as required:
2

but for other images and in video stream this is what happens:
3

Asked By: M Zaid

||

Answers:

I suggest that you treat your bounding box coordinates relatively.

If I understand correctly, your problem is that you have different referential. One way to bypass that is to normalize at each step your bbox coordinates.

c1_box is relative to your image, so :

c1_x = x/640
c1_y = y/640

When you crop, you can record the ratio values between main image and your cropped object.

image_vs_c1_x = c1_x / img_x
image_vs_c1_y = c1_y / img_y

Then you need to multiply your c2 bounding box coordinates by those ratios.

Answered By: ThOpaque

Here is a complete programming example. Please keep in mind that for cv2.rectangle you need to pass top-left corner and bottom-right corner of the rectangle. As you didn’t share ImgResize and Detect I made some assumptions:

import cv2
import numpy as np


COLOR1 = (0, 255, 0)
COLOR2 = (0, 0, 255)
DETECT_c1 = (40, 20, 120, 160)
DETECT_c2 = (20, 120, 160, 40)
RESIZE_x, RESIZE_y = 200, 200

frame = np.zeros((RESIZE_y, RESIZE_x, 3), np.uint8)

x1, y1, w1, h1 = DETECT_c1

c1Img = frame[y1:h1, x1:w1]
cv2.rectangle(frame, (x1, y1), (x1 + w1, y1 + h1), COLOR1, 1)

c1Img = cv2.resize(c1Img, (RESIZE_x, RESIZE_y))

x2, y2, w2, h2 = DETECT_c2

x3 = x1 + int(x2 * w1 / RESIZE_x)
y3 = y1 + int(y2 * h1 / RESIZE_y)
w3 = int(w2 * w1 / RESIZE_x)
h3 = int(h2 * h1 / RESIZE_y)

cv2.rectangle(frame, (x3, y3), (x3 + w3, y3 + h3), COLOR2, 1)

cv2.imwrite('out.png', frame)

Output:

enter image description here

Answered By: Markus

this is how I was able to solve it.

rwf = round((w1-x1)/640, 2)
rhf = round((h1-y1)/640, 2)
x3 = int(x2*rwf )
y3 = int(y2*rhf)
w3 = int(w2*rwf)
h3 = int(h2*rhf)
# these are the top right and bottom left cooridinates
x4 = x1 + x3
y4 = y1 + y3
w4 = x1 + w3
h4 = y1 + h3
Answered By: M Zaid