I am writing a script that loads an image and we can draw bboxes on it by pressing the key B and then dragging the mouse. However for some reason the calculation of the bboxes coordinates (specially the Y coordinate) gets wrong.
I have managed to get a minimal reproducible code. Here it is
import sys
import os
from PyQt6.QtWidgets import (
QApplication,
QGraphicsView,
QGraphicsScene,
QGraphicsRectItem,
QPushButton,
QVBoxLayout,
QWidget,
QHBoxLayout,
QCheckBox,
QFileDialog,
)
from PyQt6.QtGui import QPen, QPixmap, QPainter, QColor
from PyQt6.QtCore import Qt, QEvent, QRectF, QPointF
class RectangleItem(QGraphicsRectItem):
def __init__(self, rect, parent=None):
super().__init__(rect, parent)
self.setPen(QPen(Qt.GlobalColor.green, 1, Qt.PenStyle.DashLine))
self.setFlags(
QGraphicsRectItem.GraphicsItemFlag.ItemIsMovable
| QGraphicsRectItem.GraphicsItemFlag.ItemIsSelectable
| QGraphicsRectItem.GraphicsItemFlag.ItemIsFocusable
)
class StickerApp(QWidget):
def __init__(self):
super().__init__()
self.setWindowTitle("Bounding Box Debugger")
self.resize(800, 600)
self.view = QGraphicsView()
self.scene = QGraphicsScene()
self.view.setScene(self.scene)
self.view.viewport().installEventFilter(self)
self.save_bboxes_checkbox = QCheckBox("Save Bounding Boxes")
self.save_bboxes_checkbox.setChecked(False)
self.save_button = QPushButton("Save Image")
self.save_button.clicked.connect(self.save_image)
button_layout = QVBoxLayout()
button_layout.addWidget(self.save_bboxes_checkbox)
button_layout.addWidget(self.save_button)
main_layout = QHBoxLayout()
main_layout.addWidget(self.view)
main_layout.addLayout(button_layout)
self.setLayout(main_layout)
self.current_mode = "select" # 'select' or 'draw'
self.drawing = False
self.start_pos = None
self.current_rect = None
self.bg_pixmap_item = None
self.load_background_image()
# self.draw_triangle() #<--- THIS SEEMS TO WORK!
def load_background_image(self):
file_path = "./HarpAkademileriTunnel.jpg"
if file_path:
self.input_folder = os.path.dirname(file_path)
self.loaded_image_path = file_path
pixmap = QPixmap(file_path)
if self.bg_pixmap_item:
self.scene.removeItem(self.bg_pixmap_item)
self.bg_pixmap_item = self.scene.addPixmap(pixmap)
self.bg_pixmap_item.setZValue(-1)
def draw_triangle(self):
pixmap = QPixmap(800, 600)
pixmap.fill(Qt.GlobalColor.white)
painter = QPainter(pixmap)
painter.setBrush(QColor(0, 0, 255))
painter.drawEllipse(QPointF(300, 400), 50, 50)
painter.setBrush(QColor(255, 0, 0)) # Red triangle
painter.drawPolygon(
[
QPointF(200, 100), # Top vertex
QPointF(100, 300), # Bottom left
QPointF(300, 300), # Bottom right
]
)
painter.end()
self.bg_pixmap_item = self.scene.addPixmap(pixmap)
self.bg_pixmap_item.setZValue(-1) # Keep background at bottom
def eventFilter(self, obj, event):
if obj is self.view.viewport() and self.current_mode == "draw":
if (
event.type() == QEvent.Type.MouseButtonPress
and event.button() == Qt.MouseButton.LeftButton
):
self.start_pos = self.view.mapToScene(event.pos())
self.drawing = True
self.current_rect = RectangleItem(
QRectF(self.start_pos, self.start_pos)
)
self.scene.addItem(self.current_rect)
return True
elif event.type() == QEvent.Type.MouseMove and self.drawing:
end_pos = self.view.mapToScene(event.pos())
self.current_rect.setRect(QRectF(self.start_pos, end_pos).normalized())
return True
elif event.type() == QEvent.Type.MouseButtonRelease and self.drawing:
self.drawing = False
return True
return super().eventFilter(obj, event)
def keyPressEvent(self, event):
if event.key() == Qt.Key.Key_B:
self.current_mode = "draw" if self.current_mode == "select" else "select"
self.view.viewport().setCursor(
Qt.CursorShape.CrossCursor
if self.current_mode == "draw"
else Qt.CursorShape.ArrowCursor
)
elif event.key() == Qt.Key.Key_Delete:
for item in self.scene.selectedItems():
self.scene.removeItem(item)
else:
super().keyPressEvent(event)
def save_image(self):
file_path, _ = QFileDialog.getSaveFileName(
self, "Save Image", "output.png", "PNG Files (*.png)"
)
if file_path:
image = self.view.grab()
image_width, image_height = image.width(), image.height()
print(f"Image w h {image_width} {image_height}")
image.save(file_path)
if self.save_bboxes_checkbox.isChecked():
bbox_data = []
scene_rect = self.scene.sceneRect()
scene_width, scene_height = scene_rect.width(), scene_rect.height()
print(f"Scene w h {scene_width} {scene_height}")
for item in self.scene.items():
if isinstance(item, RectangleItem):
rect = item.rect()
# norm_x = rect.x() / scene_width
# norm_y = rect.y() / scene_height
norm_x = (rect.x() + rect.width() / 2) / scene_width
norm_y = (rect.y() + rect.height() / 2) / scene_height
norm_w = rect.width() / scene_width
norm_h = rect.height() / scene_height
bbox_data.append(
f"0 {norm_x:.6f} {norm_y:.6f} {norm_w:.6f} {norm_h:.6f}"
)
x_center = (rect.x() + rect.width() / 2) / image_width
y_center = (rect.y() + rect.height() / 2) / image_height
norm_width = rect.width() / image_width
norm_height = rect.height() / image_height
bbox_data.append(
f"1 {x_center:.6f} {y_center:.6f} {norm_width:.6f} {norm_height:.6f}"
)
with open(file_path.replace(".png", ".txt"), "w") as f:
f.write("\n".join(bbox_data))
if __name__ == "__main__":
app = QApplication(sys.argv)
window = StickerApp()
window.show()
sys.exit(app.exec())
To run it, just run python on it (I use a image, so you must replace this with a jpg you have) , then press B and draw the image. check the box and save the image.
If you do this, you will get an image and a text file. Then with this simple script
import cv2
image_path = "./debu2.png"
txt_path = "./debu2.txt"
colors = [ (0, 255, 0),(255, 0, 0)]
image = cv2.imread(image_path)
if image is None:
print("Error: Could not load image")
exit()
img_height, img_width = image.shape[:2]
print(f"({img_width},{img_height})")
# Read and process bounding box file
with open(txt_path, 'r') as f:
for line in f:
parts = line.strip().split()
if len(parts) != 5:
continue
# Parse values
class_id = parts[0]
c = int (class_id)
x_center = float(parts[1]) * img_width
y_center = float(parts[2]) * img_height
width = float(parts[3]) * img_width
height = float(parts[4]) * img_height
# Convert from center coordinates to corner coordinates
x1 = int(x_center - width/2)
y1 = int(y_center - height/2)
x2 = int(x_center + width/2)
y2 = int(y_center + height/2)
print(f"{x1},{y1},{x2},{y2}")
# Draw rectangle and label
cv2.rectangle(image, (x1, y1), (x2, y2), colors[c], 2)
cv2.putText(image, class_id, (x1, y1-5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[c], 2)
# Display result
cv2.imshow("Bounding Box Visualization", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
I get
The results are the blue ones. You obviously see it is wrong.
However if in the first script you comment the self.load_background_image()
and uncomment the self.draw_triangle()
and run the same, you will see that this time it works.
As you can see the blue bboxes work here.
How can I correct the part where an image is loaded?
The original image is
I can see clearly that the script is modifying the size of the images which is undesirable. How can I correct this script