最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

python - PyTorch runtime error with input when using two models - Stack Overflow

programmeradmin1浏览0评论

I have two models. One is pretrained with YOLOv11 and detects faces and second one trained from scratch with Tensorflow converted to onnx using tf2onnx and then to pt using onnx2pytorch. Second model recognizes face. If you wondered program is supposed to take image, detect face, and pass resized, detected face to second model that recognizes face. This workaround with changing model was caused by conflict of PyTorch and Tensorflow. When writing program that uses both models. Second model has problems and when I run the code I encounter this runtime error RuntimeError: Given groups=1, weight of size [32, 3, 2, 2], expected input[1, 224, 3, 224] to have 3 channels, but got 224 channels instead among with

0: 640x480 1 Face, 79.0ms
Speed: 31.3ms preprocess, 79.0ms inference, 917.1ms postprocess per image at shape (1, 3, 640, 480)
Face tensor shape: torch.Size([1, 3, 224, 224])
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "REDACTED/face_recognition/src/interface/recognize_face.py", line 207, in <module>
    main()
  File "REDACTED/face_recognition/src/interface/recognize_face.py", line 185, in main
    result_image, boxes, names, confidences = recognizer.process_image(image_path, save_output=True)
                                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "REDACTED/src/interface/recognize_face.py", line 149, in process_image
    name, confidence = self.recognize_face(image, box)
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "REDACTED/face_recognition/src/interface/recognize_face.py", line 104, in recognize_face
    prediction = self.model(face_tensor)
                 ^^^^^^^^^^^^^^^^^^^^^^^
  File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/onnx2pytorch/convert/model.py", line 224, in forward
    activations[out_op_id] = op(*in_activations)
                             ^^^^^^^^^^^^^^^^^^^
  File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/torch/nn/modules/conv.py", line 554, in forward
    return self._conv_forward(input, self.weight, self.bias)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "REDACTED/face_recognition/.venv/lib/python3.12/site-packages/torch/nn/modules/conv.py", line 549, in _conv_forward
    return F.conv2d(
           ^^^^^^^^^

This is code that is being executed:

# src/interface/recognize_face.py
import cv2
import numpy as np
import os
import torch
from typing import List, Tuple, Optional, Union
from ultralytics import YOLO
from src.interface.detect_image import FaceDetector


class FaceRecognizer:
    """
    A class for detecting and recognizing faces in images.

    Combines face detection using a YOLO model with recognition using a PyTorch classifier.
    """

    def __init__(self, detector_path=None, recognizer_path=None):
        # Get the directory where the script is located
        script_dir = os.path.dirname(os.path.abspath(__file__))
        # Go up to project root (from src/interface/ to project root) - only TWO levels
        project_root = os.path.dirname(os.path.dirname(script_dir))

        # Set default paths relative to project root
        if detector_path is None:
            detector_path = os.path.join(project_root, 'models', 'detection',
                                         'face_detector', 'weights', 'best.pt')
        if recognizer_path is None:
            recognizer_path = os.path.join(project_root, 'models', 'recognition',
                                           'model.pt')

        self.detector = FaceDetector(detector_path)
        # Load PyTorch model
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = torch.load(recognizer_path, map_location=self.device)
        self.model.eval()  # Set model to evaluation mode
        self.class_names = self._load_class_names()

    def _load_class_names(self) -> List[str]:
        """Load class names from file or return default if not found."""
        script_dir = os.path.dirname(os.path.abspath(__file__))
        project_root = os.path.dirname(os.path.dirname(script_dir))
        class_file = os.path.join(project_root, 'models', 'recognition', 'class_names.txt')

        if os.path.exists(class_file):
            with open(class_file, 'r') as f:
                return [line.strip() for line in f.readlines()]
        return ["Unknown"]

    def preprocess_face(self, image: np.ndarray, face_box: List[int]) -> torch.Tensor:
        """
        Preprocess a detected face for the recognition model.

        Args:
            image: Original image containing the face
            face_box: Bounding box coordinates [x1, y1, x2, y2]

        Returns:
            Preprocessed face tensor ready for the model
        """
        x1, y1, x2, y2 = face_box
        face_img = image[y1:y2, x1:x2]
        face_img = cv2.resize(face_img, (224, 224))  # Resize to 224x224

        # Check if image has 4 channels (RGBA) and convert to 3 channels (RGB)
        if face_img.shape[2] == 4:
            face_img = face_img[:, :, :3]  # Remove alpha channel

        # OpenCV loads images in BGR, convert to RGB for most models
        face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

        # Normalize pixel values
        face_img = face_img / 255.0

        # Convert to tensor and correctly rearrange dimensions
        # Input shape: (224, 224, 3) [H, W, C]
        # PyTorch expects: (1, 3, 224, 224) [B, C, H, W]
        face_tensor = torch.from_numpy(face_img).float()
        face_tensor = face_tensor.permute(2, 0, 1)  # Change to (3, 224, 224)
        face_tensor = face_tensor.unsqueeze(0)  # Add batch dimension: (1, 3, 224, 224)
        face_tensor = face_tensor.to(self.device)

        # Debug information to verify tensor shape
        print(f"Face tensor shape: {face_tensor.shape}")

        return face_tensor

    def recognize_face(self, image: np.ndarray, face_box: List[int]) -> Tuple[str, float]:
        """
        Recognize a face in the given bounding box.

        Args:
            image: Original image containing the face
            face_box: Bounding box coordinates [x1, y1, x2, y2]

        Returns:
            Tuple of (predicted_name, confidence_score)
        """
        # Preprocess face image
        face_tensor = self.preprocess_face(image, face_box)

        # Run recognition model with PyTorch
        with torch.no_grad():
            prediction = self.model(face_tensor)

        # Get predicted class
        probabilities = self._softmax(prediction[0].cpu().numpy())
        class_idx = np.argmax(probabilities)
        confidence = probabilities[class_idx]

        if class_idx < len(self.class_names):
            name = self.class_names[class_idx]
        else:
            name = "Unknown"

        return name, confidence

    def _softmax(self, x: np.ndarray) -> np.ndarray:
        """Compute softmax values for array x."""
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum()

    def process_image(self, image_path: str, save_output: bool = False) -> Tuple[
        Optional[np.ndarray], List[List[int]], List[str], List[float]]:
        """
        Process an image to detect and recognize faces.

        Args:
            image_path: Path to the input image
            save_output: Whether to save the annotated output image

        Returns:
            Tuple of (annotated_image, face_boxes, recognized_names, confidence_scores)
            Returns None for the image if loading fails
        """
        # Detect faces
        boxes, confidences, image = self.detector.detect_faces(image_path)

        # Check if image was loaded successfully
        if image is None:
            print(f"Error: Failed to load image from {image_path}")
            return None, [], [], []

        # Recognize each face
        names = []
        recognition_confidences = []

        for box in boxes:
            name, confidence = self.recognize_face(image, box)
            names.append(name)
            recognition_confidences.append(confidence)

        # Draw results
        result_image = image.copy()

        # Rest of the method remains unchanged

        for i, box in enumerate(boxes):
            x1, y1, x2, y2 = box
            name = names[i]
            detect_conf = confidences[i]
            recog_conf = recognition_confidences[i]

            # Draw rectangle
            cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)

            # Add label
            label = f"{name}: {recog_conf:.2f}"
            cv2.putText(result_image, label, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Save if requested
        if save_output:
            output_path = os.path.splitext(image_path)[0] + "_recognized.jpg"
            cv2.imwrite(output_path, result_image)

        return result_image, boxes, names, recognition_confidences


def main():
    recognizer = FaceRecognizer()

    # Process image
    image_path = './data/detection/test2/test_image.jpg'
    result_image, boxes, names, confidences = recognizer.process_image(image_path, save_output=True)

    # Check if image processing was successful
    if result_image is None:
        print("Failed to process image. Check if the file exists.")
        return

    # Display results
    if len(boxes) == 0:
        print("No faces detected")
    else:
        print(f"Detected {len(boxes)} faces")
        for i, box in enumerate(boxes):
            print(f"Face {i + 1}: {box}, Identity: {names[i]}, Confidence: {confidences[i]:.2f}")

    # Display the image
    cv2.imshow("Face Recognition", result_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


if __name__ == "__main__":
    main()


and if needed this is src.interface.detect_image

# src/inference/detect_image.py
import os
import cv2
import numpy as np
from ultralytics import YOLO


class FaceDetector:
    def __init__(self, model_path='./models/detection/face_detector/weights/best.pt'):
        self.model = YOLO(model_path)
        self.conf_threshold = 0.5

    def detect_faces(self, image_path):
        """
        Detect faces in an image.

        Args:
            image_path: Path to the image file

        Returns:
            boxes: List of bounding boxes in [x1, y1, x2, y2] format
            confidences: List of confidence scores
            image: Original image with detections
        """
        # Read the image
        image = cv2.imread(image_path)

        # Run inference
        results = self.model(image)

        # Extract detections
        boxes = []
        confidences = []

        for result in results:
            for box in result.boxes:
                if box.conf >= self.conf_threshold:
                    x1, y1, x2, y2 = box.xyxy[0].tolist()
                    boxes.append([int(x1), int(y1), int(x2), int(y2)])
                    confidences.append(float(box.conf))

        return boxes, confidences, image

    def visualize_detections(self, image, boxes, confidences, save_path=None):
        """
        Draw bounding boxes on the image and optionally save it.
        """
        image_copy = image.copy()

        for i, box in enumerate(boxes):
            x1, y1, x2, y2 = box
            conf = confidences[i]

            # Draw rectangle
            cv2.rectangle(image_copy, (x1, y1), (x2, y2), (0, 255, 0), 2)

            # Add label
            label = f"Face: {conf:.2f}"
            cv2.putText(image_copy, label, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Save result if path provided
        if save_path is not None:
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            cv2.imwrite(save_path, image_copy)

        return image_copy


def main():
    detector = FaceDetector()

    # Example usage
    image_path = './data/detection/test_image.jpg'
    output_path = './data/detection/output_image.jpg'

    boxes, confidences, image = detector.detect_faces(image_path)

    if len(boxes) == 0:
        print("No faces detected")
    else:
        print(f"Detected {len(boxes)} faces")
        for i, box in enumerate(boxes):
            print(f"Face {i + 1}: {box}, Confidence: {confidences[i]:.2f}")

    # Visualize and save results
    result_image = detector.visualize_detections(image, boxes, confidences, output_path)

    # Display the image
    cv2.imshow("Detected Faces", result_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


if __name__ == "__main__":
    main()

I tried modifying image that is inserted into model. Searching internet and working with LLM's couldn't help me solve my problem. Any kind of guidance on how to resolve this problem will be helpful.

发布评论

评论列表(0)

  1. 暂无评论