python - How to crop overhangs from merged images

I want to use Python and OpenCV to align two rectangular images then merge them into one. The rectangles will mostly overlap, but the resulting image will have overhangs. I need to obtain the largest rectangle where each pixel contains data from both originals then crop the outside. ChatGPT has produced code which aligns and merges them. However despite much "discussion" it doesn't crop the overhangs properly. The problem seems to be in "find_corners." The first drawing shows what I want that def to do, find the top-most, bottom-most, left-most and right-most corners of one image within the frame opencv stores it in. The second drawing shows that using the x and y coords of the corners, I use the 2nd smallest y value for the top limit of the final rectangle, the 2nd largest y value for the bottom etc. The dotted lines of those values outline the final rectangle. It is the largest upright rectangle that can be made from the image. The same has to be done for the other image and the smaller values of the two used so that the result only gets the overlapping area.

def find_corners(image) is supposed to find the coords of these corners

The dotted lines outline the largest usable area

#! /usr/bin/python3

import cv2
import numpy as np

def find_corners(image):

    # <<<<< This bit from ChatGPT doesn't seem to find the corners>>>>>

    """
    Finds the extreme corners of the valid image region (non-black pixels).
    """
    coords = np.column_stack(np.where(np.any(image > 0, axis=2)))
    if coords.size == 0:
        return None  # No valid pixels found
    
    bottom_y, bottom_x = coords[np.argmax(coords[:, 0])]
    right_y, right_x = coords[np.argmax(coords[:, 1])]
    top_y, top_x = coords[np.argmin(coords[:, 0])]
    left_y, left_x = coords[np.argmin(coords[:, 1])]
    print('Bottom', bottom_x, bottom_y, 'right', right_x, right_y, 'top', top_x, top_y, 'left',left_x, left_y)
    return bottom_x, bottom_y, right_x, right_y, top_x, top_y, left_x, left_y

def find_alignment(imageL, imageR):
    """
    Finds the rotation and vertical shift needed to align imageR to imageL using ECC.
    Only vertical shifting and rotation are allowed (no horizontal shift).
    """
    grayL = cv2.cvtColor(imageL, cv2.COLOR_BGR2GRAY)
    grayR = cv2.cvtColor(imageR, cv2.COLOR_BGR2GRAY)
    
    warp_matrix = np.eye(2, 3, dtype=np.float32)
    criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 50, 1e-6)
    cc, warp_matrix = cv2.findTransformECC(grayL, grayR, warp_matrix, cv2.MOTION_AFFINE, criteria)
    
    angle = np.arctan2(warp_matrix[1, 0], warp_matrix[0, 0]) * (180.0 / np.pi)
    vertical_shift = int(warp_matrix[1, 2])
    warp_matrix[0, 2] = 0
    
    alignedR = cv2.warpAffine(imageR, warp_matrix, (imageR.shape[1], imageR.shape[0]))
    
    print(f"ECC Alignment → Rotation: {angle:.2f}° | Vertical Shift: {vertical_shift} pixels")
    
    return imageL, alignedR

def get_overlap_region(imageL, imageR):
    # Compute the largest overlapping area after alignment.

    left_corners = find_corners(imageL)
    right_corners = find_corners(imageR)

    left_B_x, left_B_y, left_R_x, left_R_y, left_T_x, left_T_y, left_L_x, left_L_y = left_corners
    right_B_x, right_B_y, right_R_x, right_R_y, right_T_x, right_T_y, right_L_x, right_L_y = right_corners
        # [edited] to simplify this bit and make it more robust
        # for left-image make a list of the y values, sort them
        # choose the 2 inner ones for minimum y (top boundary line) and maximum y (bottom boundary line)
    left_y = [left_L_y, left_T_y, left_R_y, left_B_y]; left_y.sort(); lefttop = left_y[1]; leftbot = left_y[2]
    left_x = [left_L_x, left_T_x, left_R_x, left_B_x]; left_x.sort(); leftleft = left_x[1]; leftright = left_x[2]

        # for right image
    right_y = [right_L_y, right_T_y, right_R_y, right_B_y]; right_y.sort(); righttop = right_y[1]; rightbot = right_y[2]
    right_x = [right_L_x, right_T_x, right_R_x, right_B_x]; right_x.sort(); rightleft = right_x[1]; rightright = right_x[2]
    
    # Find the innermost values from the 2 images
    top_limit = max(lefttop, righttop)
    bottom_limit = min(leftbot, rightbot)
    left_limit = max(leftleft, rightleft)
    right_limit = min(leftright, rightright)
   
    return imageL[top_limit:bottom_limit, left_limit:right_limit], imageR[top_limit:bottom_limit, left_limit:right_limit]


def create_anaglyph(imageL, imageR):
    if imageL is None or imageR is None:
        print("Error: Cropped images are invalid.")
        return None
    
    red_channel = imageL[:, :, 2]
    green_channel = imageR[:, :, 1]
    blue_channel = imageR[:, :, 0]
    return cv2.merge((blue_channel, green_channel, red_channel))

if __name__ == "__main__":
    file = input("Enter: ")
    fileL = file + 'L.jpg'
    fileR = file + 'R.jpg'
    
    imageL = cv2.imread(fileL, cv2.IMREAD_COLOR)
    imageR = cv2.imread(fileR, cv2.IMREAD_COLOR)
    
    if imageL is None or imageR is None:
        print("Error: Could not load one or both images.")
        exit(1)
    
    imageL_aligned, imageR_aligned = find_alignment(imageL, imageR)
    imageL_cropped, imageR_cropped = get_overlap_region(imageL_aligned, imageR_aligned)
    
    if imageL_cropped is None or imageR_cropped is None:
        print("Error: Unable to generate anaglyph due to invalid cropping region.")
        exit(1)
    
    final_height = min(imageL_cropped.shape[0], imageR_cropped.shape[0])
    final_width = min(imageL_cropped.shape[1], imageR_cropped.shape[1])
    
    imageL_cropped = imageL_cropped[:final_height, :final_width]
    imageR_cropped = imageR_cropped[:final_height, :final_width]
    
    anaglyph_image = create_anaglyph(imageL_cropped, imageR_cropped)
    if anaglyph_image is not None:
        cv2.imwrite(file+"-anaglyph.jpg", anaglyph_image)

def find_corners(image) is supposed to find the coords of these corners

The dotted lines outline the largest usable area

#! /usr/bin/python3

import cv2
import numpy as np

def find_corners(image):

    # <<<<< This bit from ChatGPT doesn't seem to find the corners>>>>>

    """
    Finds the extreme corners of the valid image region (non-black pixels).
    """
    coords = np.column_stack(np.where(np.any(image > 0, axis=2)))
    if coords.size == 0:
        return None  # No valid pixels found
    
    bottom_y, bottom_x = coords[np.argmax(coords[:, 0])]
    right_y, right_x = coords[np.argmax(coords[:, 1])]
    top_y, top_x = coords[np.argmin(coords[:, 0])]
    left_y, left_x = coords[np.argmin(coords[:, 1])]
    print('Bottom', bottom_x, bottom_y, 'right', right_x, right_y, 'top', top_x, top_y, 'left',left_x, left_y)
    return bottom_x, bottom_y, right_x, right_y, top_x, top_y, left_x, left_y

def find_alignment(imageL, imageR):
    """
    Finds the rotation and vertical shift needed to align imageR to imageL using ECC.
    Only vertical shifting and rotation are allowed (no horizontal shift).
    """
    grayL = cv2.cvtColor(imageL, cv2.COLOR_BGR2GRAY)
    grayR = cv2.cvtColor(imageR, cv2.COLOR_BGR2GRAY)
    
    warp_matrix = np.eye(2, 3, dtype=np.float32)
    criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 50, 1e-6)
    cc, warp_matrix = cv2.findTransformECC(grayL, grayR, warp_matrix, cv2.MOTION_AFFINE, criteria)
    
    angle = np.arctan2(warp_matrix[1, 0], warp_matrix[0, 0]) * (180.0 / np.pi)
    vertical_shift = int(warp_matrix[1, 2])
    warp_matrix[0, 2] = 0
    
    alignedR = cv2.warpAffine(imageR, warp_matrix, (imageR.shape[1], imageR.shape[0]))
    
    print(f"ECC Alignment → Rotation: {angle:.2f}° | Vertical Shift: {vertical_shift} pixels")
    
    return imageL, alignedR

def get_overlap_region(imageL, imageR):
    # Compute the largest overlapping area after alignment.

    left_corners = find_corners(imageL)
    right_corners = find_corners(imageR)

    left_B_x, left_B_y, left_R_x, left_R_y, left_T_x, left_T_y, left_L_x, left_L_y = left_corners
    right_B_x, right_B_y, right_R_x, right_R_y, right_T_x, right_T_y, right_L_x, right_L_y = right_corners
        # [edited] to simplify this bit and make it more robust
        # for left-image make a list of the y values, sort them
        # choose the 2 inner ones for minimum y (top boundary line) and maximum y (bottom boundary line)
    left_y = [left_L_y, left_T_y, left_R_y, left_B_y]; left_y.sort(); lefttop = left_y[1]; leftbot = left_y[2]
    left_x = [left_L_x, left_T_x, left_R_x, left_B_x]; left_x.sort(); leftleft = left_x[1]; leftright = left_x[2]

        # for right image
    right_y = [right_L_y, right_T_y, right_R_y, right_B_y]; right_y.sort(); righttop = right_y[1]; rightbot = right_y[2]
    right_x = [right_L_x, right_T_x, right_R_x, right_B_x]; right_x.sort(); rightleft = right_x[1]; rightright = right_x[2]
    
    # Find the innermost values from the 2 images
    top_limit = max(lefttop, righttop)
    bottom_limit = min(leftbot, rightbot)
    left_limit = max(leftleft, rightleft)
    right_limit = min(leftright, rightright)
   
    return imageL[top_limit:bottom_limit, left_limit:right_limit], imageR[top_limit:bottom_limit, left_limit:right_limit]


def create_anaglyph(imageL, imageR):
    if imageL is None or imageR is None:
        print("Error: Cropped images are invalid.")
        return None
    
    red_channel = imageL[:, :, 2]
    green_channel = imageR[:, :, 1]
    blue_channel = imageR[:, :, 0]
    return cv2.merge((blue_channel, green_channel, red_channel))

if __name__ == "__main__":
    file = input("Enter: ")
    fileL = file + 'L.jpg'
    fileR = file + 'R.jpg'
    
    imageL = cv2.imread(fileL, cv2.IMREAD_COLOR)
    imageR = cv2.imread(fileR, cv2.IMREAD_COLOR)
    
    if imageL is None or imageR is None:
        print("Error: Could not load one or both images.")
        exit(1)
    
    imageL_aligned, imageR_aligned = find_alignment(imageL, imageR)
    imageL_cropped, imageR_cropped = get_overlap_region(imageL_aligned, imageR_aligned)
    
    if imageL_cropped is None or imageR_cropped is None:
        print("Error: Unable to generate anaglyph due to invalid cropping region.")
        exit(1)
    
    final_height = min(imageL_cropped.shape[0], imageR_cropped.shape[0])
    final_width = min(imageL_cropped.shape[1], imageR_cropped.shape[1])
    
    imageL_cropped = imageL_cropped[:final_height, :final_width]
    imageR_cropped = imageR_cropped[:final_height, :final_width]
    
    anaglyph_image = create_anaglyph(imageL_cropped, imageR_cropped)
    if anaglyph_image is not None:
        cv2.imwrite(file+"-anaglyph.jpg", anaglyph_image)

Share Improve this question edited Mar 23 at 17:05 Christoph Rackwitz 15.9k5 gold badges39 silver badges51 bronze badges asked Mar 21 at 20:35 ChrisOfBristol 14 bronze badges

1 some pics, if you would. this is important. -- sounds like you want the largest axis-aligned inscribed rectangle to some polygon. -- we've got a bunch of questions with answers on that family of problems. – Christoph Rackwitz Commented Mar 21 at 21:09
1 This question is similar to: largest inscribed rectangle in arbitrary polygon. If you believe it’s different, please edit the question, make it clear how it’s different and/or how the answers on that question are not helpful for your problem. – Christoph Rackwitz Commented Mar 21 at 21:10
yes, the family of problems spans from "trivial" to "impossible". I think your task might be trivial. most likely it's not trivial but solvable with reasonable effort. I'm trying to scare up what I have in mind for the "mask" I'm asking for... i.sstatic/fRYRvW6t.png and i.sstatic/rUB8g3lk.png (source) does that look like something you might have? could you create pictures like that, but for your data? take the computed alignment, apply to fully white images instead. could be done with BGRA. – Christoph Rackwitz Commented Mar 21 at 21:35
Having studied the link, it looks like one is solving a much more complicated problem - a random shape, and in the other the final rectangle is not necessarily axis-aligned (I assume this means upright). I've simplified the code and done some testing, and my bit works, it's ChatGPT's "def find_corners(image)" that doesn't. I'm not familiar enough with OpenCV work out what it's trying to do. I only need to solve this for one image, I've put rather a lot of irrelevant code in as I wasn't quite sure which bits were. – ChrisOfBristol Commented Mar 22 at 17:50
if you only need this for one image, like literally one and done, then I'd really recommend pulling it into a photo editor (mspaint, "Paint.NET", gimp, whatever) and doing it manually, with your best judgment and repeated pulling on the corners (and maybe eyeing the "pixel area" calculation that some can show you). any decent photo editor lets you zoom in to adjust a selection to single pixel increments with ease. -- your drawings look like you're asking for a bounding box, a circumscribed axis-aligned rectangle. that's trivial. I previously thought you were looking for an inscribed one. – Christoph Rackwitz Commented Mar 22 at 22:36

| Show 7 more comments

1 Answer 1

Sorted by: Reset to default 0

As christoph-rackwitz suggests in his link, I want an inscribed rectangle . Thanks Chris for reading my question thoroughly then discussing it in a polite and unpatronising manner.
I eventually thought of some better instructions for ChatGPT and it came up with a good answer. I've included it in the app and tested it on several image pairs and it crops them effectively. Here is the relevant code:

def find_corners(image):
  # Finds the four extreme corners of the valid image region (non-black pixels).
  coords = np.column_stack(np.where(np.any(image > 0, axis=2)))
    
  top_left = coords[np.argmin(np.sum(coords, axis=1))]
  bot_left = coords[np.argmax(coords[:, 0] - coords[:, 1])]
  bot_right = coords[np.argmax(np.sum(coords, axis=1))]
  top_right = coords[np.argmax(coords[:, 1] - coords[:, 0])]

  return top_left, bot_left, bot_right, top_right

def get_overlap_region(imageL, imageR):
    #Compute the largest overlapping area after alignment.

    left_corners = find_corners(imageL)
    right_corners = find_corners(imageR)
       
    left_TL, left_BL, left_BR, left_TR = left_corners
    right_TL, right_BL, right_BR, right_TR = right_corners
    
    top_limit = max(left_TL[0], left_TR[0], right_TL[0], right_TR[0])
    bot_limit = min(left_BL[0], left_BR[0], right_BL[0], right_BR[0])
    left_limit = max(left_TL[1], left_BL[1], right_TL[1], right_BL[1])
    right_limit = min(left_TR[1], left_BR[1], right_TR[1], right_BR[1])
          
    return imageL[top_limit:bot_limit, left_limit:right_limit], imageR[top_limit:bot_limit, left_limit:right_limit]

-----

    imageLaligned, imageRaligned = find_alignment(imageL, imageR)
    imageLcropped, imageRcropped = get_overlap_region(imageLaligned, imageRaligned)
      
    cropH = min(imageLcropped.shape[0], imageRcropped.shape[0])
    cropW = min(imageLcropped.shape[1], imageRcropped.shape[1])
    
    imageLcropped = imageLcropped[:cropH, :cropW]
    imageRcropped = imageRcropped[:cropH, :cropW]

科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始

python - How to crop overhangs from merged images - Stack Overflow

1 Answer 1

与本文相关的文章

评论列表(0)