I'm trying to use opencv to detect objects in a video game. I have grabbed the image as a png and trimmed it so that the background is transparent, and yet, it only detects it at threshold levels ~ 0.6.
Needle image:
Haystack image:
This was done using a threshold of 0.8. Notice the amount of false positives, as well as a true negative on the detection.
Here is the python code.
import cv2 as cv
import numpy as np
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
method = None
# constructor
def __init__(self, needle_img_path, method=cv.TM_CCOEFF_NORMED):
# load the image we're trying to match
self.needle_img = cv.imread(needle_img_path, cv.IMREAD_UNCHANGED)
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
# There are 6 methods to choose from:
# TM_CCOEFF, TM_CCOEFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_SQDIFF, TM_SQDIFF_NORMED
self.method = method
def find(self, haystack_img, threshold=0.9, debug_mode=None):
# run the OpenCV algorithm
base = self.needle_img[:, :, 0:3]
alpha = self.needle_img[:, :, 3]
alpha = cv.merge([alpha, alpha, alpha])
result = cv.matchTemplate(haystack_img, base, self.method,mask=alpha)
# Get the all the positions from the match result that exceed our threshold
locations = np.where(result >= threshold)
locations = list(zip(*locations[::-1]))
#print(locations)
# First we need to create the list of [x, y, w, h] rectangles
rectangles = []
for loc in locations:
rect = [int(loc[0]), int(loc[1]), self.needle_w, self.needle_h]
# Add every box to the list twice in order to retain single (non-overlapping) boxes
rectangles.append(rect)
rectangles.append(rect)
# Apply group rectangles
# "Relative difference between sides of the rectangles to merge them into a group."
rectangles, weights = cv.groupRectangles(rectangles, groupThreshold=1, eps=0.5)
#print(rectangles)
points = []
if len(rectangles):
#print('Found needle.')
line_color = (0, 255, 0)
line_type = cv.LINE_4
marker_color = (255, 0, 255)
marker_type = cv.MARKER_CROSS
# Loop over all the rectangles
for (x, y, w, h) in rectangles:
# Determine the center position
center_x = x + int(w/2)
center_y = y + int(h/2)
# Save the points
points.append((center_x, center_y))
if debug_mode == 'rectangles':
# Determine the box position
top_left = (x, y)
bottom_right = (x + w, y + h)
# Draw the box
cv.rectangle(haystack_img, top_left, bottom_right, color=line_color,
lineType=line_type, thickness=2)
elif debug_mode == 'points':
# Draw the center point
cv.drawMarker(haystack_img, (center_x, center_y),
color=marker_color, markerType=marker_type,
markerSize=40, thickness=2)
if debug_mode:
cv.imshow('Matches', haystack_img)
#cv.waitKey()
#cv.imwrite('result_click_point.jpg', haystack_img)
return points
I'm also using wincap to capture my screen in real time, but I think the underlying issue is in the image detection. If I'm feeding the needle image as the exact pixel perfect image that I want it to detect, why can't it properly detect it at high thresholds?
I'm trying to use opencv to detect objects in a video game. I have grabbed the image as a png and trimmed it so that the background is transparent, and yet, it only detects it at threshold levels ~ 0.6.
Needle image:
Haystack image:
This was done using a threshold of 0.8. Notice the amount of false positives, as well as a true negative on the detection.
Here is the python code.
import cv2 as cv
import numpy as np
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
method = None
# constructor
def __init__(self, needle_img_path, method=cv.TM_CCOEFF_NORMED):
# load the image we're trying to match
self.needle_img = cv.imread(needle_img_path, cv.IMREAD_UNCHANGED)
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
# There are 6 methods to choose from:
# TM_CCOEFF, TM_CCOEFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_SQDIFF, TM_SQDIFF_NORMED
self.method = method
def find(self, haystack_img, threshold=0.9, debug_mode=None):
# run the OpenCV algorithm
base = self.needle_img[:, :, 0:3]
alpha = self.needle_img[:, :, 3]
alpha = cv.merge([alpha, alpha, alpha])
result = cv.matchTemplate(haystack_img, base, self.method,mask=alpha)
# Get the all the positions from the match result that exceed our threshold
locations = np.where(result >= threshold)
locations = list(zip(*locations[::-1]))
#print(locations)
# First we need to create the list of [x, y, w, h] rectangles
rectangles = []
for loc in locations:
rect = [int(loc[0]), int(loc[1]), self.needle_w, self.needle_h]
# Add every box to the list twice in order to retain single (non-overlapping) boxes
rectangles.append(rect)
rectangles.append(rect)
# Apply group rectangles
# "Relative difference between sides of the rectangles to merge them into a group."
rectangles, weights = cv.groupRectangles(rectangles, groupThreshold=1, eps=0.5)
#print(rectangles)
points = []
if len(rectangles):
#print('Found needle.')
line_color = (0, 255, 0)
line_type = cv.LINE_4
marker_color = (255, 0, 255)
marker_type = cv.MARKER_CROSS
# Loop over all the rectangles
for (x, y, w, h) in rectangles:
# Determine the center position
center_x = x + int(w/2)
center_y = y + int(h/2)
# Save the points
points.append((center_x, center_y))
if debug_mode == 'rectangles':
# Determine the box position
top_left = (x, y)
bottom_right = (x + w, y + h)
# Draw the box
cv.rectangle(haystack_img, top_left, bottom_right, color=line_color,
lineType=line_type, thickness=2)
elif debug_mode == 'points':
# Draw the center point
cv.drawMarker(haystack_img, (center_x, center_y),
color=marker_color, markerType=marker_type,
markerSize=40, thickness=2)
if debug_mode:
cv.imshow('Matches', haystack_img)
#cv.waitKey()
#cv.imwrite('result_click_point.jpg', haystack_img)
return points
I'm also using wincap to capture my screen in real time, but I think the underlying issue is in the image detection. If I'm feeding the needle image as the exact pixel perfect image that I want it to detect, why can't it properly detect it at high thresholds?
Share Improve this question asked Nov 17, 2024 at 5:45 h0tdawgz132h0tdawgz132 11 silver badge 2- Template matching does not work properly, most of the time. Why you did not do object detection via yolo or find objects via surf or sift techniques? – BarzanHayati Commented Nov 17, 2024 at 5:56
- 1 @BarzanHayati that's wrong. it works fine, if you know how to use it, and when to use it. for this problem here, it's fine to use. OP's code is just using it wrong. – Christoph Rackwitz Commented Nov 17, 2024 at 10:55
1 Answer
Reset to default 3Your template does not perfectly match the instance in the haystack. Left: your template. Right: piece of the haystack, where I erased the surroundings. Ignore edge pixels, look at the pixels inside the objects.
Now that we've established that there cannot be a perfect match on this data, I hope you'll understand that you need to give the program some tolerance.
Now to the false positives:
Those happen because you chose a terrible matching mode for this data, TM_CCOEFF_NORMED
. On (nearly) perfectly flat areas, this goes completely wild due to division.
When the instances in the haystack are pixel-perfect copies of the needle, you should use TM_SQDIFF
or TM_SQDIFF_NORMED
. That also goes for when the instances have a little difference, but they generally have the same brightness and color.
This is the result of using TM_SQDIFF_NORMED
, with a mask
argument derived from the needle, and accepting a difference of 0.2. The instance has a difference of 0.192.