(Python) same procss but missing one picture, name "output_y:389_x:150.png"

Question:

I’m processing the img, I capture a picture from production line, then extract the part I want from image, cut the img into small pieces, and detect img to text

Quesion: one of the image is disappear, for now I know it called "output_y:389_x:150.png" cause I name them by their (x,y) value in original img

  • the script
import cv2
import numpy as np
from os import listdir
from os.path import isfile, join
from PIL import Image
import pytesseract
import cv2 
import numpy as np
from os import listdir
from os.path import isfile, join

image = cv2.imread("/home/student_DC/desktop/test_11_8/original.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
white_bg = 255*np.ones_like(image)

ret, thresh = cv2.threshold(gray, 60, 255, cv2.THRESH_BINARY_INV)
blur = cv2.medianBlur(thresh, 1)
kernel = np.ones((10, 20), np.uint8)
img_dilation = cv2.dilate(blur, kernel, iterations=1)
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])

j = 1
xy_list = []
for i, ctr in enumerate(sorted_ctrs):
    # Get bounding box
    x, y, w, h = cv2.boundingRect(ctr)
    roi = image[y:y + h, x:x + w]
    if (h > 50 and w > 50) and h < 200:

        cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 1)        
#        cv2.imshow('{}.png'.format(i), roi)
        
        print(x , y )
        
        if 25 < x < 65:
            x = 45
        elif 130 < x < 170:
            x = 150
        elif 235 < x < 275:
            x = 255
        elif 340 < x < 380:
            x = 360
        elif 445 < x < 485:
            x = 465
        elif 550 < x < 590:
            x = 570
        else:
            x = 0
                    
            

        if 27 < y < 67:
            y = 47
        elif 139 < y < 179:
            y = 159
        elif 253 < y < 293:
            y = 273
        elif 369 < y < 409:
            y = 389
        elif 484 < y < 524:
            y = 504
        else:
            y = 0           
        print("new number" , x , y )
        
        tem_list_x_and_y = [ ] 

        if (x != 0) and (y != 0):
            cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output01_test11_9/output_y:{y}_x:{x}.png", roi)
            tem_list_x_and_y.append(x)
            tem_list_x_and_y.append(y)
            
            xy_list.append(tem_list_x_and_y)

        #--- paste ROIs on image with white background 
        # white_bg[y:y+h, x:x+w] = roi
        j +=1


print("len is : " ,len(xy_list))


aaa_list = (sorted(xy_list , key=lambda k: [k[1], k[0]]))
print(aaa_list)

print("+ + + +")

cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output01_test11_9/final_output_{x}_{y}.png", white_bg)
cv2.waitKey(0)
cv2.destroyAllWindows() 

mypath = "/home/student_DC/desktop/test_11_8/output01_test11_9"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

          # print("output01_test11_9 file data")
          # print(onlyfiles)
          # print(type(onlyfiles))
i = 1
number_of_onlyfiles = len(onlyfiles)

while i < number_of_onlyfiles:
    each_file_path = '/home/student_DC/desktop/test_11_8/output01_test11_9/'+ onlyfiles[i]
    image = cv2.imread(each_file_path)
    y=51
    x=25
    h=16
    w=61
    crop = image[y:y+h, x:x+w]
    cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output02_test11_9/"+ onlyfiles[i] , crop)
    i +=1

ordered_png_name_list = []
    
for i in aaa_list:
    ordered_png_name_list.append('output_y:' + str(i[1]) + '_x:' + str(i[0]) + '.png')
    
print(ordered_png_name_list)



i = 0
listOfElems = []
listOfDuplicate = []
Number_of_onlyfiles = len(onlyfiles)
while i < Number_of_onlyfiles :
    each_file_path = '/home/student_DC/desktop/test_11_8/output02_test11_9/'+ ordered_png_name_list[i]
    image = Image.open(each_file_path)
    text = pytesseract.image_to_string(image, lang='eng')
    print("=  =  =  =  =  =  =  =  =")
    print(ordered_png_name_list[i])
    print(text)
       
    
    if text not in listOfElems:
        listOfElems.append(text) 
    else:
        print("Duplicate")
        listOfDuplicate.append(each_file_path)   
     
    i +=1
    
print(listOfElems)  
print(listOfDuplicate) 

print("= = = = ")
print ("each_Duplicate_file_path")

j =0
Number_of_listOfDuplicate = len(listOfDuplicate)
while j < Number_of_listOfDuplicate :
    each_Duplicate_file_path =  listOfDuplicate[j]
    image = Image.open(each_Duplicate_file_path)
    print (each_Duplicate_file_path)
    j +=1
  • the error:
Traceback (most recent call last):
  File "/home/student_DC/desktop/test_11_8/locate_sticker_xy_order加1by1.py", line 154, in <module>
    image = Image.open(each_file_path)
  File "/home/student_DC/miniconda3/lib/python3.9/site-packages/PIL/Image.py", line 3131, in open
    fp = builtins.open(filename, "rb")
FileNotFoundError: [Errno 2] No such file or directory: '/student_DC/desktop/test_11_8/output02_test11_9/output_y:389_x:150.png'

if I just individualy do with img "output_y:389_x:150.png",it works fine

import cv2
import numpy as np
from os import listdir
from os.path import isfile, join
from PIL import Image
import pytesseract
import cv2 
import numpy as np
from os import listdir
from os.path import isfile, join


image = cv2.imread('/home/student_DC/desktop/test_11_8/output01_test11_9/output_y:389_x:150.png')
y=51
x=25
h=16
w=61
crop = image[y:y+h, x:x+w]
cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output02_test11_9/output_y:389_x:150.png" , crop)

the image link: https://imgur.com/a/U8bn9DX
(pic 2) in link, if I just singly do with "output_y:389_x:150.png" works fine

but like (pic 1) for loop do with total 30 image from "output 1 file" to "output 2 file" , the "output_y:389_x:150.png" img can missing, how come?

Asked By: DC con

||

Answers:

Here’s a version of your code that makes the loops more consistent. In general, it’s a very bad idea to include hard-coded paths in code like this (that is, "/home/student_DC/desktop/test_11_8", etc.). If you are going to run this code from the "test_11_8" directory, then you can remove that prefix from all of your paths.

import os
import cv2
import numpy as np
from PIL import Image
import pytesseract

image = cv2.imread("/home/student_DC/desktop/test_11_8/original.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
white_bg = 255*np.ones_like(image)

ret, thresh = cv2.threshold(gray, 60, 255, cv2.THRESH_BINARY_INV)
blur = cv2.medianBlur(thresh, 1)
kernel = np.ones((10, 20), np.uint8)
img_dilation = cv2.dilate(blur, kernel, iterations=1)
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])

xy_list = []
for i, ctr in enumerate(sorted_ctrs):
    # Get bounding box
    x, y, w, h = cv2.boundingRect(ctr)
    roi = image[y:y + h, x:x + w]
    if (h > 50 and w > 50) and h < 200:

        cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 1)        
        
        print(x , y )
        
        for xc in (45,150,255,360,465,570):
            if xc-20 < x < xc+20:
                x = xc
                break
        else:
            x = 0
                    
        for yc in (47,159,272,389,504):
            if yc-20 < y < yc+20:
                y = yc
                break
        else:
            y = 0           

        print("new number" , x , y )
        
        tem_list_x_and_y = [ ] 

        if (x != 0) and (y != 0):
            cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output01_test11_9/output_y:{y}_x:{x}.png", roi)
            tem_list_x_and_y.append(x)
            tem_list_x_and_y.append(y)
            
            xy_list.append(tem_list_x_and_y)


print("len is : " ,len(xy_list))

aaa_list = (sorted(xy_list , key=lambda k: [k[1], k[0]]))
print(aaa_list)

print("+ + + +")

cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output01_test11_9/final_output_{x}_{y}.png", white_bg)
cv2.waitKey(0)
cv2.destroyAllWindows() 

mypath = "/home/student_DC/desktop/test_11_8/output01_test11_9"
onlyfiles = [f for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath, f))]

for name in onlyfiles:
    each_file_path = '/home/student_DC/desktop/test_11_8/output01_test11_9/'+ name
    image = cv2.imread(each_file_path)
    y=51
    x=25
    h=16
    w=61
    crop = image[y:y+h, x:x+w]
    cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output02_test11_9/"+ name, crop)
    i +=1

ordered_png_name_list = []
for i in aaa_list:
    ordered_png_name_list.append('output_y:' + str(i[1]) + '_x:' + str(i[0]) + '.png')
    
print(ordered_png_name_list)



listOfElems = []
listOfDuplicate = []
for name in ordered_png_name_list:
    each_file_path = '/home/student_DC/desktop/test_11_8/output02_test11_9/'+ name
    image = Image.open(each_file_path)
    text = pytesseract.image_to_string(image, lang='eng')
    print("=  =  =  =  =  =  =  =  =")
    print(name)
    print(text)
    
    if text not in listOfElems:
        listOfElems.append(text) 
    else:
        print("Duplicate")
        listOfDuplicate.append(each_file_path)   
    
print(listOfElems)  
print(listOfDuplicate) 

print("= = = = ")
print ("each_Duplicate_file_path")

for path in listOfDuplicate:
    print (path)
Answered By: Tim Roberts
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.