I have two models. One is pretrained with YOLOv11 and detects faces and second one trained from scratch with Tensorflow converted to onnx using tf2onnx and then to pt using onnx2pytorch. Second model recognizes face. If you wondered program is supposed to take image, detect face, and pass resized, detected face to second model that recognizes face. This workaround with changing model was caused by conflict of PyTorch and Tensorflow. When writing program that uses both models. Second model has problems and when I run the code I encounter this runtime error
RuntimeError: Given groups=1, weight of size [32, 3, 2, 2], expected input[1, 224, 3, 224] to have 3 channels, but got 224 channels instead
among with
0: 640x480 1 Face, 79.0ms
Speed: 31.3ms preprocess, 79.0ms inference, 917.1ms postprocess per image at shape (1, 3, 640, 480)
Face tensor shape: torch.Size([1, 3, 224, 224])
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "REDACTED/face_recognition/src/interface/recognize_face.py", line 207, in <module>
main()
File "REDACTED/face_recognition/src/interface/recognize_face.py", line 185, in main
result_image, boxes, names, confidences = recognizer.process_image(image_path, save_output=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "REDACTED/src/interface/recognize_face.py", line 149, in process_image
name, confidence = self.recognize_face(image, box)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "REDACTED/face_recognition/src/interface/recognize_face.py", line 104, in recognize_face
prediction = self.model(face_tensor)
^^^^^^^^^^^^^^^^^^^^^^^
File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/onnx2pytorch/convert/model.py", line 224, in forward
activations[out_op_id] = op(*in_activations)
^^^^^^^^^^^^^^^^^^^
File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/torch/nn/modules/conv.py", line 554, in forward
return self._conv_forward(input, self.weight, self.bias)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/torch/nn/modules/conv.py", line 549, in _conv_forward
return F.conv2d(
^^^^^^^^^
This is code that is being executed:
# src/interface/recognize_face.py
import cv2
import numpy as np
import os
import torch
from typing import List, Tuple, Optional, Union
from ultralytics import YOLO
from src.interface.detect_image import FaceDetector
class FaceRecognizer:
"""
A class for detecting and recognizing faces in images.
Combines face detection using a YOLO model with recognition using a PyTorch classifier.
"""
def __init__(self, detector_path=None, recognizer_path=None):
# Get the directory where the script is located
script_dir = os.path.dirname(os.path.abspath(__file__))
# Go up to project root (from src/interface/ to project root) - only TWO levels
project_root = os.path.dirname(os.path.dirname(script_dir))
# Set default paths relative to project root
if detector_path is None:
detector_path = os.path.join(project_root, 'models', 'detection',
'face_detector', 'weights', 'best.pt')
if recognizer_path is None:
recognizer_path = os.path.join(project_root, 'models', 'recognition',
'model.pt')
self.detector = FaceDetector(detector_path)
# Load PyTorch model
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.model = torch.load(recognizer_path, map_location=self.device)
self.model.eval() # Set model to evaluation mode
self.class_names = self._load_class_names()
def _load_class_names(self) -> List[str]:
"""Load class names from file or return default if not found."""
script_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(os.path.dirname(script_dir))
class_file = os.path.join(project_root, 'models', 'recognition', 'class_names.txt')
if os.path.exists(class_file):
with open(class_file, 'r') as f:
return [line.strip() for line in f.readlines()]
return ["Unknown"]
def preprocess_face(self, image: np.ndarray, face_box: List[int]) -> torch.Tensor:
"""
Preprocess a detected face for the recognition model.
Args:
image: Original image containing the face
face_box: Bounding box coordinates [x1, y1, x2, y2]
Returns:
Preprocessed face tensor ready for the model
"""
x1, y1, x2, y2 = face_box
face_img = image[y1:y2, x1:x2]
face_img = cv2.resize(face_img, (224, 224)) # Resize to 224x224
# Check if image has 4 channels (RGBA) and convert to 3 channels (RGB)
if face_img.shape[2] == 4:
face_img = face_img[:, :, :3] # Remove alpha channel
# OpenCV loads images in BGR, convert to RGB for most models
face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
# Normalize pixel values
face_img = face_img / 255.0
# Convert to tensor and correctly rearrange dimensions
# Input shape: (224, 224, 3) [H, W, C]
# PyTorch expects: (1, 3, 224, 224) [B, C, H, W]
face_tensor = torch.from_numpy(face_img).float()
face_tensor = face_tensor.permute(2, 0, 1) # Change to (3, 224, 224)
face_tensor = face_tensor.unsqueeze(0) # Add batch dimension: (1, 3, 224, 224)
face_tensor = face_tensor.to(self.device)
# Debug information to verify tensor shape
print(f"Face tensor shape: {face_tensor.shape}")
return face_tensor
def recognize_face(self, image: np.ndarray, face_box: List[int]) -> Tuple[str, float]:
"""
Recognize a face in the given bounding box.
Args:
image: Original image containing the face
face_box: Bounding box coordinates [x1, y1, x2, y2]
Returns:
Tuple of (predicted_name, confidence_score)
"""
# Preprocess face image
face_tensor = self.preprocess_face(image, face_box)
# Run recognition model with PyTorch
with torch.no_grad():
prediction = self.model(face_tensor)
# Get predicted class
probabilities = self._softmax(prediction[0].cpu().numpy())
class_idx = np.argmax(probabilities)
confidence = probabilities[class_idx]
if class_idx < len(self.class_names):
name = self.class_names[class_idx]
else:
name = "Unknown"
return name, confidence
def _softmax(self, x: np.ndarray) -> np.ndarray:
"""Compute softmax values for array x."""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum()
def process_image(self, image_path: str, save_output: bool = False) -> Tuple[
Optional[np.ndarray], List[List[int]], List[str], List[float]]:
"""
Process an image to detect and recognize faces.
Args:
image_path: Path to the input image
save_output: Whether to save the annotated output image
Returns:
Tuple of (annotated_image, face_boxes, recognized_names, confidence_scores)
Returns None for the image if loading fails
"""
# Detect faces
boxes, confidences, image = self.detector.detect_faces(image_path)
# Check if image was loaded successfully
if image is None:
print(f"Error: Failed to load image from {image_path}")
return None, [], [], []
# Recognize each face
names = []
recognition_confidences = []
for box in boxes:
name, confidence = self.recognize_face(image, box)
names.append(name)
recognition_confidences.append(confidence)
# Draw results
result_image = image.copy()
# Rest of the method remains unchanged
for i, box in enumerate(boxes):
x1, y1, x2, y2 = box
name = names[i]
detect_conf = confidences[i]
recog_conf = recognition_confidences[i]
# Draw rectangle
cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
# Add label
label = f"{name}: {recog_conf:.2f}"
cv2.putText(result_image, label, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# Save if requested
if save_output:
output_path = os.path.splitext(image_path)[0] + "_recognized.jpg"
cv2.imwrite(output_path, result_image)
return result_image, boxes, names, recognition_confidences
def main():
recognizer = FaceRecognizer()
# Process image
image_path = './data/detection/test2/test_image.jpg'
result_image, boxes, names, confidences = recognizer.process_image(image_path, save_output=True)
# Check if image processing was successful
if result_image is None:
print("Failed to process image. Check if the file exists.")
return
# Display results
if len(boxes) == 0:
print("No faces detected")
else:
print(f"Detected {len(boxes)} faces")
for i, box in enumerate(boxes):
print(f"Face {i + 1}: {box}, Identity: {names[i]}, Confidence: {confidences[i]:.2f}")
# Display the image
cv2.imshow("Face Recognition", result_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
and if needed this is src.interface.detect_image
# src/inference/detect_image.py
import os
import cv2
import numpy as np
from ultralytics import YOLO
class FaceDetector:
def __init__(self, model_path='./models/detection/face_detector/weights/best.pt'):
self.model = YOLO(model_path)
self.conf_threshold = 0.5
def detect_faces(self, image_path):
"""
Detect faces in an image.
Args:
image_path: Path to the image file
Returns:
boxes: List of bounding boxes in [x1, y1, x2, y2] format
confidences: List of confidence scores
image: Original image with detections
"""
# Read the image
image = cv2.imread(image_path)
# Run inference
results = self.model(image)
# Extract detections
boxes = []
confidences = []
for result in results:
for box in result.boxes:
if box.conf >= self.conf_threshold:
x1, y1, x2, y2 = box.xyxy[0].tolist()
boxes.append([int(x1), int(y1), int(x2), int(y2)])
confidences.append(float(box.conf))
return boxes, confidences, image
def visualize_detections(self, image, boxes, confidences, save_path=None):
"""
Draw bounding boxes on the image and optionally save it.
"""
image_copy = image.copy()
for i, box in enumerate(boxes):
x1, y1, x2, y2 = box
conf = confidences[i]
# Draw rectangle
cv2.rectangle(image_copy, (x1, y1), (x2, y2), (0, 255, 0), 2)
# Add label
label = f"Face: {conf:.2f}"
cv2.putText(image_copy, label, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# Save result if path provided
if save_path is not None:
os.makedirs(os.path.dirname(save_path), exist_ok=True)
cv2.imwrite(save_path, image_copy)
return image_copy
def main():
detector = FaceDetector()
# Example usage
image_path = './data/detection/test_image.jpg'
output_path = './data/detection/output_image.jpg'
boxes, confidences, image = detector.detect_faces(image_path)
if len(boxes) == 0:
print("No faces detected")
else:
print(f"Detected {len(boxes)} faces")
for i, box in enumerate(boxes):
print(f"Face {i + 1}: {box}, Confidence: {confidences[i]:.2f}")
# Visualize and save results
result_image = detector.visualize_detections(image, boxes, confidences, output_path)
# Display the image
cv2.imshow("Detected Faces", result_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
I tried modifying image that is inserted into model. Searching internet and working with LLM's couldn't help me solve my problem. Any kind of guidance on how to resolve this problem will be helpful.