python - Error in calculating bbox coordinate Y when uploading a background image

I am writing a script that loads an image and we can draw bboxes on it by pressing the key B and then dragging the mouse. However for some reason the calculation of the bboxes coordinates (specially the Y coordinate) gets wrong.

I have managed to get a minimal reproducible code. Here it is

import sys
import os
from PyQt6.QtWidgets import (
    QApplication,
    QGraphicsView,
    QGraphicsScene,
    QGraphicsRectItem,
    QPushButton,
    QVBoxLayout,
    QWidget,
    QHBoxLayout,
    QCheckBox,
    QFileDialog,
)
from PyQt6.QtGui import QPen, QPixmap, QPainter, QColor
from PyQt6.QtCore import Qt, QEvent, QRectF, QPointF


class RectangleItem(QGraphicsRectItem):
    def __init__(self, rect, parent=None):
        super().__init__(rect, parent)
        self.setPen(QPen(Qt.GlobalColor.green, 1, Qt.PenStyle.DashLine))
        self.setFlags(
            QGraphicsRectItem.GraphicsItemFlag.ItemIsMovable
            | QGraphicsRectItem.GraphicsItemFlag.ItemIsSelectable
            | QGraphicsRectItem.GraphicsItemFlag.ItemIsFocusable
        )


class StickerApp(QWidget):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Bounding Box Debugger")
        self.resize(800, 600)

        self.view = QGraphicsView()
        self.scene = QGraphicsScene()
        self.view.setScene(self.scene)
        self.view.viewport().installEventFilter(self)

        self.save_bboxes_checkbox = QCheckBox("Save Bounding Boxes")
        self.save_bboxes_checkbox.setChecked(False)

        self.save_button = QPushButton("Save Image")
        self.save_button.clicked.connect(self.save_image)

        button_layout = QVBoxLayout()
        button_layout.addWidget(self.save_bboxes_checkbox)
        button_layout.addWidget(self.save_button)

        main_layout = QHBoxLayout()
        main_layout.addWidget(self.view)
        main_layout.addLayout(button_layout)
        self.setLayout(main_layout)

        self.current_mode = "select"  # 'select' or 'draw'
        self.drawing = False
        self.start_pos = None
        self.current_rect = None

        self.bg_pixmap_item = None

        self.load_background_image()
        # self.draw_triangle()   #<--- THIS SEEMS TO WORK!

    def load_background_image(self):
        file_path = "./HarpAkademileriTunnel.jpg"
        if file_path:
            self.input_folder = os.path.dirname(file_path)
            self.loaded_image_path = file_path
            pixmap = QPixmap(file_path)
            if self.bg_pixmap_item:
                self.scene.removeItem(self.bg_pixmap_item)
            self.bg_pixmap_item = self.scene.addPixmap(pixmap)
            self.bg_pixmap_item.setZValue(-1) 



    def draw_triangle(self):
        pixmap = QPixmap(800, 600)
        pixmap.fill(Qt.GlobalColor.white)

        painter = QPainter(pixmap)
        painter.setBrush(QColor(0, 0, 255))
        painter.drawEllipse(QPointF(300, 400), 50, 50)

        painter.setBrush(QColor(255, 0, 0))  # Red triangle
        painter.drawPolygon(
            [
                QPointF(200, 100),  # Top vertex
                QPointF(100, 300),  # Bottom left
                QPointF(300, 300),  # Bottom right
            ]
        )
        painter.end()

        self.bg_pixmap_item = self.scene.addPixmap(pixmap)
        self.bg_pixmap_item.setZValue(-1)  # Keep background at bottom

    def eventFilter(self, obj, event):
        if obj is self.view.viewport() and self.current_mode == "draw":
            if (
                event.type() == QEvent.Type.MouseButtonPress
                and event.button() == Qt.MouseButton.LeftButton
            ):
                self.start_pos = self.view.mapToScene(event.pos())
                self.drawing = True
                self.current_rect = RectangleItem(
                    QRectF(self.start_pos, self.start_pos)
                )
                self.scene.addItem(self.current_rect)
                return True
            elif event.type() == QEvent.Type.MouseMove and self.drawing:
                end_pos = self.view.mapToScene(event.pos())
                self.current_rect.setRect(QRectF(self.start_pos, end_pos).normalized())
                return True
            elif event.type() == QEvent.Type.MouseButtonRelease and self.drawing:
                self.drawing = False
                return True
        return super().eventFilter(obj, event)

    def keyPressEvent(self, event):
        if event.key() == Qt.Key.Key_B:
            self.current_mode = "draw" if self.current_mode == "select" else "select"
            self.view.viewport().setCursor(
                Qt.CursorShape.CrossCursor
                if self.current_mode == "draw"
                else Qt.CursorShape.ArrowCursor
            )
        elif event.key() == Qt.Key.Key_Delete:
            for item in self.scene.selectedItems():
                self.scene.removeItem(item)
        else:
            super().keyPressEvent(event)

    def save_image(self):
        file_path, _ = QFileDialog.getSaveFileName(
            self, "Save Image", "output.png", "PNG Files (*.png)"
        )
        if file_path:
            image = self.view.grab()
            image_width, image_height = image.width(), image.height()
            print(f"Image w h {image_width} {image_height}")
            image.save(file_path)

            if self.save_bboxes_checkbox.isChecked():
                bbox_data = []
                scene_rect = self.scene.sceneRect()
                scene_width, scene_height = scene_rect.width(), scene_rect.height()
                print(f"Scene w h {scene_width} {scene_height}")
                for item in self.scene.items():
                    if isinstance(item, RectangleItem):
                        rect = item.rect()
                        # norm_x = rect.x() / scene_width
                        # norm_y = rect.y() / scene_height
                        norm_x = (rect.x() + rect.width() / 2) / scene_width
                        norm_y = (rect.y() + rect.height() / 2) / scene_height
                        norm_w = rect.width() / scene_width
                        norm_h = rect.height() / scene_height
                        bbox_data.append(
                            f"0 {norm_x:.6f} {norm_y:.6f} {norm_w:.6f} {norm_h:.6f}"
                        )

                        x_center = (rect.x() + rect.width() / 2) / image_width
                        y_center = (rect.y() + rect.height() / 2) / image_height
                        norm_width = rect.width() / image_width
                        norm_height = rect.height() / image_height

                        bbox_data.append(
                            f"1 {x_center:.6f} {y_center:.6f} {norm_width:.6f} {norm_height:.6f}"
                        )

                with open(file_path.replace(".png", ".txt"), "w") as f:
                    f.write("\n".join(bbox_data))


if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = StickerApp()
    window.show()
    sys.exit(app.exec())

To run it, just run python on it (I use a image, so you must replace this with a jpg you have) , then press B and draw the image. check the box and save the image.

If you do this, you will get an image and a text file. Then with this simple script

import cv2
image_path = "./debu2.png"
txt_path = "./debu2.txt"

colors = [ (0, 255, 0),(255, 0, 0)]

image = cv2.imread(image_path)
if image is None:
    print("Error: Could not load image")
    exit()

img_height, img_width = image.shape[:2]
print(f"({img_width},{img_height})")

# Read and process bounding box file
with open(txt_path, 'r') as f:
    for line in f:
        parts = line.strip().split()
        if len(parts) != 5:
            continue
            
        # Parse values
        class_id = parts[0]
        c = int (class_id)
        x_center = float(parts[1]) * img_width
        y_center = float(parts[2]) * img_height
        width = float(parts[3]) * img_width
        height = float(parts[4]) * img_height

        # Convert from center coordinates to corner coordinates
        x1 = int(x_center - width/2)
        y1 = int(y_center - height/2)
        x2 = int(x_center + width/2)
        y2 = int(y_center + height/2)

        print(f"{x1},{y1},{x2},{y2}")

        # Draw rectangle and label
        cv2.rectangle(image, (x1, y1), (x2, y2), colors[c], 2)
        cv2.putText(image, class_id, (x1, y1-5), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[c], 2)
        

# Display result
cv2.imshow("Bounding Box Visualization", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

I get

The results are the blue ones. You obviously see it is wrong.

However if in the first script you comment the self.load_background_image() and uncomment the self.draw_triangle() and run the same, you will see that this time it works.

As you can see the blue bboxes work here.

How can I correct the part where an image is loaded?

The original image is

I can see clearly that the script is modifying the size of the images which is undesirable. How can I correct this script

科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始

python - Error in calculating bbox coordinate Y when uploading a background image - Stack Overflow

与本文相关的文章

评论列表(0)