Tensorflow Object Detection and Image Cropping for Data Extraction
Question:
Below is my code that detect the regions (tables, paragraphs) from invoice and and crop the detected region from the invoice. I am facing issues while performing data extraction on cropped images as images are very small. I am passing cropped image to tesseract for text extraction code below.
Is there any other options where we can perform extraction over the entire image but only for detected region instead of cropping the image. Cropping leads to loss of image quality and also tesseract text extraction is poor quality.
for idx in range(len(bboxes)):
if bscores[idx] >= Threshold:
#Region of Interest
y_min = int(bboxes[idx][0] * im_height)
x_min = int(bboxes[idx][1] * im_width)
y_max = int(bboxes[idx][2] * im_height)
x_max = int(bboxes[idx][3] * im_width)
class_label = category_index[int(bclasses[idx])]['name']
class_labels.append(class_label)
bbox.append([x_min, y_min, x_max, y_max, class_label, float(bscores[idx])])
#Crop Image - Working Code
cropped_image = tf.image.crop_to_bounding_box(image, y_min, x_min, y_max - y_min, x_max - x_min).numpy().astype(np.int32)
# encode_jpeg encodes a tensor of type uint8 to string
output_image = tf.image.encode_jpeg(cropped_image)
# decode_jpeg decodes the string tensor to a tensor of type uint8
#output_image = tf.image.decode_jpeg(output_image)
score = bscores[idx] * 100
file_name = tf.constant(OUTPUT_PATH+image_name[:-4]+'_'+str(idx)+'_'+class_label+'_'+str(round(score))+'%'+'_'+os.path.splitext(image_name)[1])
writefile = tf.io.write_file(file_name, output_image)
Code for text extraction
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:Program FilesTesseract-OCRtesseract.exe"
# Grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph open to remove noise and invert image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
invert = 255 - opening
# Perform text extraction
data = pytesseract.image_to_string(invert, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('invert', invert)
cv2.waitKey()
Answers:
It sounds like you want to perform data extraction on the detected regions of the image without actually cropping the image. One way to do this would be to use the coordinates of the bounding boxes to extract the corresponding regions from the original image and perform data extraction on those regions. This way, you won’t lose image quality because you aren’t cropping the image.
Here’s an example of how you could do this:
# Load the original image
image = cv2.imread('original_image.jpg')
# Loop over the detected bounding boxes
for idx in range(len(bboxes)):
# If the confidence score is above the threshold
if bscores[idx] >= Threshold:
# Get the coordinates of the bounding box
y_min = int(bboxes[idx][0] * im_height)
x_min = int(bboxes[idx][1] * im_width)
y_max = int(bboxes[idx][2] * im_height)
x_max = int(bboxes[idx][3] * im_width)
# Extract the region of the original image corresponding to the bounding box
region = image[y_min:y_max, x_min:x_max]
# Perform data extraction on the region
data = pytesseract.image_to_string(region, lang='eng', config='--psm 6')
print(data)
This way, you can extract the regions of interest from the original image and perform data extraction on those regions without losing image quality. You can then use the extracted data as needed. I hope this helps!
Below is my code that detect the regions (tables, paragraphs) from invoice and and crop the detected region from the invoice. I am facing issues while performing data extraction on cropped images as images are very small. I am passing cropped image to tesseract for text extraction code below.
Is there any other options where we can perform extraction over the entire image but only for detected region instead of cropping the image. Cropping leads to loss of image quality and also tesseract text extraction is poor quality.
for idx in range(len(bboxes)):
if bscores[idx] >= Threshold:
#Region of Interest
y_min = int(bboxes[idx][0] * im_height)
x_min = int(bboxes[idx][1] * im_width)
y_max = int(bboxes[idx][2] * im_height)
x_max = int(bboxes[idx][3] * im_width)
class_label = category_index[int(bclasses[idx])]['name']
class_labels.append(class_label)
bbox.append([x_min, y_min, x_max, y_max, class_label, float(bscores[idx])])
#Crop Image - Working Code
cropped_image = tf.image.crop_to_bounding_box(image, y_min, x_min, y_max - y_min, x_max - x_min).numpy().astype(np.int32)
# encode_jpeg encodes a tensor of type uint8 to string
output_image = tf.image.encode_jpeg(cropped_image)
# decode_jpeg decodes the string tensor to a tensor of type uint8
#output_image = tf.image.decode_jpeg(output_image)
score = bscores[idx] * 100
file_name = tf.constant(OUTPUT_PATH+image_name[:-4]+'_'+str(idx)+'_'+class_label+'_'+str(round(score))+'%'+'_'+os.path.splitext(image_name)[1])
writefile = tf.io.write_file(file_name, output_image)
Code for text extraction
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:Program FilesTesseract-OCRtesseract.exe"
# Grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph open to remove noise and invert image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
invert = 255 - opening
# Perform text extraction
data = pytesseract.image_to_string(invert, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('invert', invert)
cv2.waitKey()
It sounds like you want to perform data extraction on the detected regions of the image without actually cropping the image. One way to do this would be to use the coordinates of the bounding boxes to extract the corresponding regions from the original image and perform data extraction on those regions. This way, you won’t lose image quality because you aren’t cropping the image.
Here’s an example of how you could do this:
# Load the original image
image = cv2.imread('original_image.jpg')
# Loop over the detected bounding boxes
for idx in range(len(bboxes)):
# If the confidence score is above the threshold
if bscores[idx] >= Threshold:
# Get the coordinates of the bounding box
y_min = int(bboxes[idx][0] * im_height)
x_min = int(bboxes[idx][1] * im_width)
y_max = int(bboxes[idx][2] * im_height)
x_max = int(bboxes[idx][3] * im_width)
# Extract the region of the original image corresponding to the bounding box
region = image[y_min:y_max, x_min:x_max]
# Perform data extraction on the region
data = pytesseract.image_to_string(region, lang='eng', config='--psm 6')
print(data)
This way, you can extract the regions of interest from the original image and perform data extraction on those regions without losing image quality. You can then use the extracted data as needed. I hope this helps!