How do I recognise an image on webpage using Python?
Question:
I’ve created a simple Python application that uses the CV2 computer vision library to recognise a template image on a webpage.
I give the application a template image that it needs to recognise on the source image. In this case, the source image is a screenshot of the website www.google.com and the template image is the Google search button.
I thought the application worked at first, but it’s drawing the rectangle completely in the wrong place on the input (source) image. I’ve added a picture below of where the application located the template image.
Here’s the source code.
Main Application Source
import cv2
import numpy
from io import BytesIO
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
class Automate:
def __init__(self):
chrome_options = Options()
chrome_options.add_argument("kiosk")
self.driver = webdriver.Chrome(ChromeDriverManager("93.0.4577.63").install(), options=chrome_options)
#self.driver = webdriver.Chrome(executable_path='./chromedriver',options=chrome_options)
self.screenShot = None
self.finalImage = None
def open_webpage(self, url):
print(f"Open webpage {url}")
self.driver.get(url)
def close_webpage(self):
Event().wait(5)
self.driver.close()
print("Closing webpage")
def snap_screen(self):
print("Capturing screen")
self.screenShot = "screenshot.png"
self.driver.save_screenshot(self.screenShot)
print("done.")
def match(self, image, template):
# convert images to greyscale.
src = cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2GRAY)
temp = cv2.cvtColor(cv2.imread(template), cv2.COLOR_BGR2GRAY)
cv2.imshow("out", temp)
cv2.waitKey(0)
height, width = src.shape
H, W = temp.shape
result = cv2.matchTemplate(src, temp, cv2.cv2.TM_CCOEFF_NORMED)
minVal, maxVal, minLoc, maxLoc = cv2.minMaxLoc(result)
location = maxLoc
bottomRight = (location[0] + W, location[1] + H)
src2 = cv2.imread(image)
cv2.rectangle(src2, location, bottomRight, (0, 0, 255), 5)
cv2.imshow("output", src2)
cv2.waitKey(0)
cv2.destroyAllWindows()
def main():
url = "http://www.google.com"
auto = Automate()
auto.open_webpage(url)
auto.snap_screen()
auto.close_webpage()
match_image = "images/templates/google-button.png"
# Match screenshot with template image.
auto.check_match(
image=auto.screenShot,
template=match_image
)
I’d appreciate any help or advice on how to solve this issue.
Update
Following the advice given by user zteffi, I resized my template image to the correct image dimensions. After doing this, the match template function works as expected.
You want to make sure that your template image is a close as possible to the actual size of the image you want to be located in the base image. In my case, this was around 150 x 150 or 200 x 200 so that it will be easier to find the button.
Answers:
I resized my template image to the correct image dimensions. After doing this, the match template function works as expected.
You want to make sure that your template image is a close as possible to the actual size of the image you want to be located in the base image. In my case, this was around 150 x 150 or 200 x 200 so that it will be easier to find the button.
I’ve created a simple Python application that uses the CV2 computer vision library to recognise a template image on a webpage.
I give the application a template image that it needs to recognise on the source image. In this case, the source image is a screenshot of the website www.google.com and the template image is the Google search button.
I thought the application worked at first, but it’s drawing the rectangle completely in the wrong place on the input (source) image. I’ve added a picture below of where the application located the template image.
Here’s the source code.
Main Application Source
import cv2
import numpy
from io import BytesIO
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
class Automate:
def __init__(self):
chrome_options = Options()
chrome_options.add_argument("kiosk")
self.driver = webdriver.Chrome(ChromeDriverManager("93.0.4577.63").install(), options=chrome_options)
#self.driver = webdriver.Chrome(executable_path='./chromedriver',options=chrome_options)
self.screenShot = None
self.finalImage = None
def open_webpage(self, url):
print(f"Open webpage {url}")
self.driver.get(url)
def close_webpage(self):
Event().wait(5)
self.driver.close()
print("Closing webpage")
def snap_screen(self):
print("Capturing screen")
self.screenShot = "screenshot.png"
self.driver.save_screenshot(self.screenShot)
print("done.")
def match(self, image, template):
# convert images to greyscale.
src = cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2GRAY)
temp = cv2.cvtColor(cv2.imread(template), cv2.COLOR_BGR2GRAY)
cv2.imshow("out", temp)
cv2.waitKey(0)
height, width = src.shape
H, W = temp.shape
result = cv2.matchTemplate(src, temp, cv2.cv2.TM_CCOEFF_NORMED)
minVal, maxVal, minLoc, maxLoc = cv2.minMaxLoc(result)
location = maxLoc
bottomRight = (location[0] + W, location[1] + H)
src2 = cv2.imread(image)
cv2.rectangle(src2, location, bottomRight, (0, 0, 255), 5)
cv2.imshow("output", src2)
cv2.waitKey(0)
cv2.destroyAllWindows()
def main():
url = "http://www.google.com"
auto = Automate()
auto.open_webpage(url)
auto.snap_screen()
auto.close_webpage()
match_image = "images/templates/google-button.png"
# Match screenshot with template image.
auto.check_match(
image=auto.screenShot,
template=match_image
)
I’d appreciate any help or advice on how to solve this issue.
Update
Following the advice given by user zteffi, I resized my template image to the correct image dimensions. After doing this, the match template function works as expected.
You want to make sure that your template image is a close as possible to the actual size of the image you want to be located in the base image. In my case, this was around 150 x 150 or 200 x 200 so that it will be easier to find the button.
I resized my template image to the correct image dimensions. After doing this, the match template function works as expected.
You want to make sure that your template image is a close as possible to the actual size of the image you want to be located in the base image. In my case, this was around 150 x 150 or 200 x 200 so that it will be easier to find the button.