Aayush Bajaj
z5362216
27/06/2025

I had to redo this notebook because the original version was 35MB 😔

In [110]:
import cv2
import matplotlib.pyplot as plt

img1 = cv2.imread("img1.png")
img2 = cv2.imread("img2.png")

img1 = cv2.resize(img1, (800, 800))
img2 = cv2.resize(img2, (800, 800))

fig, axes = plt.subplots(1,2, figsize=(5,8))
axes = axes.ravel()

axes[0].imshow(cv2.cvtColor(img1, cv2.COLOR_BGR2RGB))
axes[0].set_title("image 1 original")
axes[1].imshow(cv2.cvtColor(img2, cv2.COLOR_BGR2RGB))    
axes[1].set_title("image 2 original")
axes[0].axis('off')
axes[1].axis('off')

plt.tight_layout()
plt.show()
print(img1.shape, img2.shape)
No description has been provided for this image
(800, 800, 3) (800, 800, 3)

a. keypoints¶

In [111]:
sift = cv2.SIFT_create()
keypoints1, descriptors1 = sift.detectAndCompute(img1, None)
keypoints2, descriptors2 = sift.detectAndCompute(img2, None)

print(f"{len(keypoints1)} keypoints in image 1")
print(f"{len(keypoints2)} keypoints in image 2")
img1_kp = cv2.drawKeypoints(img1, keypoints1, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
img2_kp = cv2.drawKeypoints(img2, keypoints2, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
fig, axes = plt.subplots(1,2, figsize=(12,6))
axes = axes.ravel()
axes[0].imshow(cv2.cvtColor(img1_kp, cv2.COLOR_BGR2RGB))
axes[0].set_title("image 1 keypoints")
axes[1].imshow(cv2.cvtColor(img2_kp, cv2.COLOR_BGR2RGB))    
axes[1].set_title("image 2 keypoints")
axes[0].axis('off')
axes[1].axis('off')

plt.show()
2906 keypoints in image 1
3007 keypoints in image 2
No description has been provided for this image

b. top 20¶

In [136]:
# reduce number of keypoints to best 20:
keypoints1_top20 = sorted(keypoints1, key=lambda x: -(x.response))[:20]
keypoints2_top20 = sorted(keypoints2, key=lambda x: -(x.response))[:20]

for kp in keypoints1_top20:
    kp.size = 200
for kp in keypoints2_top20:
    kp.size = 200

img1_kp = cv2.drawKeypoints(img1, keypoints1_top20, None, (0,0,255), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
img2_kp = cv2.drawKeypoints(img2, keypoints2_top20, None, (0,0,255), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

fig, axes = plt.subplots(1, 2, figsize=(10, 5))
axes[0].imshow(cv2.cvtColor(img1_kp, cv2.COLOR_BGR2RGB))
axes[0].set_title("image 1 – top 20 keypoints")
axes[0].axis('off')

axes[1].imshow(cv2.cvtColor(img2_kp, cv2.COLOR_BGR2RGB))
axes[1].set_title("image 2 – top 20 keypoints")
axes[1].axis('off')

plt.tight_layout()
plt.show()
No description has been provided for this image

b. varying contrast threshold:¶

In [ ]:
# varying parameter contrastThreshold
sift = cv2.SIFT_create(contrastThreshold=0.11)
keypoints1, descriptors1 = sift.detectAndCompute(img1, None)
keypoints2, descriptors2 = sift.detectAndCompute(img2, None)
keypoints1_top20 = sorted(keypoints1, key=lambda x: -(x.response))[:20]
keypoints2_top20 = sorted(keypoints2, key=lambda x: -(x.response))[:20]

for kp in keypoints1_top20:
    kp.size = 200
for kp in keypoints2_top20:
    kp.size = 200
img1_kp = cv2.drawKeypoints(img1, keypoints1_top20, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
img2_kp = cv2.drawKeypoints(img2, keypoints2_top20, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
fig, axes = plt.subplots(1,2, figsize=(10,5))
axes = axes.ravel()
axes[0].imshow(cv2.cvtColor(img1_kp, cv2.COLOR_BGR2RGB))
axes[0].set_title("image 1 keypoints with contrastThreshold=0.01")
axes[1].imshow(cv2.cvtColor(img2_kp, cv2.COLOR_BGR2RGB))    
axes[1].set_title("image 2 keypoints with contrastThreshold=0.01")
axes[0].axis('off')
axes[1].axis('off')
Out[ ]:
(np.float64(-0.5), np.float64(799.5), np.float64(799.5), np.float64(-0.5))
No description has been provided for this image

b. Discussion¶

  • the contrast approach seems to be a better idea to eventually stitch the panorama
  • though the professor's code seems to not be using my same keypoint style...have i done something wrong?

task 2¶

In [137]:
# major refactor to mimic profs keypoint aesthetic

import cv2
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
from skimage.util import random_noise

# transforms
def scale_120(img):
    return cv2.resize(img, None, fx=1.05, fy=1.05)

def rotate60(img):
    h, w = img.shape[:2]
    M = cv2.getRotationMatrix2D((w // 2, h // 2), -60, 1.0)
    return cv2.warpAffine(img, M, (w, h))

def add_salt_pepper(img):
    noisy = random_noise(img / 255.0, mode='s&p', amount=0.02)
    return (noisy * 255).astype(np.uint8)

# draw top 20 keypoints

def draw_keypoints_manual(ax, image, keypoints, top_k=20):
    indices = np.argsort([-kp.response * kp.size for kp in keypoints])[:top_k]
    top_kps = [keypoints[i] for i in indices]

    ax.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    for kp in top_kps:
        circ = Circle(kp.pt, kp.size / 2, color='red', linewidth=3, fill=False)
        ax.add_patch(circ)
    ax.axis('off')

    return top_kps

def process_and_plot(img1, img2, transform_fn, title1, title2, match_title):
    img1_t, img2_t = transform_fn(img1), transform_fn(img2)
    sift = cv2.SIFT_create(contrastThreshold=0.12)
    kp1, desc1 = sift.detectAndCompute(img1_t, None)
    kp2, desc2 = sift.detectAndCompute(img2_t, None)

    print(f"{match_title}")
    print(f"Image 1: {len(kp1)} keypoints detected")
    print(f"Image 2: {len(kp2)} keypoints detected")

    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    top_kp1 = draw_keypoints_manual(axs[0], img1_t, kp1)
    axs[0].set_title(title1)

    top_kp2 = draw_keypoints_manual(axs[1], img2_t, kp2)
    axs[1].set_title(title2)

    print(f"Image 1: {len(top_kp1)} keypoints drawn")
    print(f"Image 2: {len(top_kp2)} keypoints drawn\n")

    plt.tight_layout()
    plt.show()

    plt.show()

img1_crop = cv2.imread("img1.png")
img2_crop = cv2.imread("img2.png")
img1_crop = cv2.resize(img1_crop, (800, 800))
img2_crop = cv2.resize(img2_crop, (800, 800))

process_and_plot(img1_crop, img2_crop, lambda x: x, "Original Image 1", "Original Image 2", "Matches: Original")
process_and_plot(img1_crop, img2_crop, scale_120, "Scaled Image 1", "Scaled Image 2", "Matches: Scaled")
process_and_plot(img1_crop, img2_crop, rotate60, "Rotated Image 1", "Rotated Image 2", "Matches: Rotated")
process_and_plot(img1_crop, img2_crop, add_salt_pepper, "Noisy Image 1", "Noisy Image 2", "Matches: Noisy")
Matches: Original
Image 1: 1079 keypoints detected
Image 2: 1012 keypoints detected
Image 1: 20 keypoints drawn
Image 2: 20 keypoints drawn

No description has been provided for this image
Matches: Scaled
Image 1: 1088 keypoints detected
Image 2: 979 keypoints detected
Image 1: 20 keypoints drawn
Image 2: 20 keypoints drawn

No description has been provided for this image
Matches: Rotated
Image 1: 1077 keypoints detected
Image 2: 926 keypoints detected
Image 1: 20 keypoints drawn
Image 2: 20 keypoints drawn

No description has been provided for this image
Matches: Noisy
Image 1: 1339 keypoints detected
Image 2: 1233 keypoints detected
Image 1: 20 keypoints drawn
Image 2: 20 keypoints drawn

No description has been provided for this image

task 2 discussion¶

  • the custom circles are much nicer! :D
  • anyways, clearly the keypoints remain (mostly) identical across the transforms and the SIFT algorithm stands true to its name: Scale-Invariant Feature Transform.
    • I notice however that the rotation transform causes the keypoints to change.
    • the car stays the same all throughout
    • scaling also seems to have affected some of the choices for the keypoints.

conclusion¶

SIFT is pretty good, but not entirely robust to affine transformations and translations!

task 3¶

a. matching wing bfmatcher and knnmethod.¶

In [ ]:
def get_best_matches(desc1, desc2, k=2, ratio=0.75, max_matches=20):
    bf = cv2.BFMatcher(cv2.NORM_L2)
    matches = bf.knnMatch(desc1, desc2, k=k)
    good = []
    for m, n in matches:
        if m.distance < ratio * n.distance:
            good.append(m)
    good = sorted(good, key=lambda x: x.distance)[:max_matches]
    return good

def draw_matches_custom(img1, img2, kp1, kp2, desc1, desc2, title):
    matches = get_best_matches(desc1, desc2)
    h1, w1 = img1.shape[:2]
    h2, w2 = img2.shape[:2]
    vis = np.zeros((max(h1, h2), w1 + w2, 3), dtype=np.uint8)
    vis[:h1, :w1] = img1
    vis[:h2, w1:] = img2

    fig, ax = plt.subplots(figsize=(12, 6))
    ax.imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB))
    for m in matches:
        pt1 = tuple(np.round(kp1[m.queryIdx].pt).astype(int))
        pt2 = tuple(np.round(kp2[m.trainIdx].pt).astype(int) + np.array([w1, 0]))
        ax.plot([pt1[0], pt2[0]], [pt1[1], pt2[1]], color='blue', linewidth=2)
    ax.set_title(title)
    ax.axis('off')
    plt.show()

def match_and_plot(img1, img2, transform_fn, match_title):
    img1_t, img2_t = transform_fn(img1), transform_fn(img2)
    sift = cv2.SIFT_create(contrastThreshold=0.12)
    kp1, desc1 = sift.detectAndCompute(img1_t, None)
    kp2, desc2 = sift.detectAndCompute(img2_t, None)

    indices1 = sorted(range(len(kp1)), key=lambda i: -kp1[i].response)[:20]
    indices2 = sorted(range(len(kp2)), key=lambda i: -kp2[i].response)[:20]
    top_kp1 = [kp1[i] for i in indices1]
    top_desc1 = desc1[indices1]
    top_kp2 = [kp2[i] for i in indices2]
    top_desc2 = desc2[indices2]

    draw_matches_custom(img1_t, img2_t, top_kp1, top_kp2, top_desc1, top_desc2, match_title)

match_and_plot(img1_crop, img2_crop, lambda x: x, "Matches: Original")
match_and_plot(img1_crop, img2_crop, scale_120, "Matches: Scaled")
match_and_plot(img1_crop, img2_crop, rotate60, "Matches: Rotated")
match_and_plot(img1_crop, img2_crop, add_salt_pepper, "Matches: Noisy")
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [135]:
def ransac_stitch_new(img1, img2, kp1, kp2, desc1, desc2, threshold=5.0):
    matches = get_best_matches(desc1, desc2)
    if len(matches) < 4:
        print("Not enough matches for RANSAC")
        return None, None, None
    print("Matches found:", len(matches))

    src_pts = np.float32([kp1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
    dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)

    M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, threshold)
    if M is None:
        return None, None, None

    h1, w1 = img1.shape[:2]
    h2, w2 = img2.shape[:2]
    
    # get corners of img1 and transform them to img2's coordinate system
    corners_img1 = np.float32([[0, 0], [w1, 0], [w1, h1], [0, h1]]).reshape(-1, 1, 2)
    transformed_corners = cv2.perspectiveTransform(corners_img1, M)
    
    # get bounding box containing both images
    all_corners = np.concatenate([
        transformed_corners.reshape(-1, 2),
        np.float32([[0, 0], [w2, 0], [w2, h2], [0, h2]])
    ])
    
    x_min, y_min = np.int32(all_corners.min(axis=0).ravel())
    x_max, y_max = np.int32(all_corners.max(axis=0).ravel())
    
    # scale transformation matrix.
    translation_dist = [-x_min, -y_min]
    H_translation = np.array([[1, 0, translation_dist[0]], 
                             [0, 1, translation_dist[1]], 
                             [0, 0, 1]])
    
    output_width = x_max - x_min
    output_height = y_max - y_min
    
    # warp img1 with adjusted homography
    result = cv2.warpPerspective(img1, H_translation.dot(M), (output_width, output_height))
    
    # insert img2
    img2_offset_x = -x_min
    img2_offset_y = -y_min
    
    # blend
    mask = np.zeros((output_height, output_width), dtype=np.uint8)
    mask[img2_offset_y:img2_offset_y+h2, img2_offset_x:img2_offset_x+w2] = 255
    
    result[img2_offset_y:img2_offset_y+h2, img2_offset_x:img2_offset_x+w2] = img2
    
    # transform corners for boundary drawing
    final_corners_img1 = cv2.perspectiveTransform(corners_img1, H_translation.dot(M))
    corners_img2 = np.float32([[img2_offset_x, img2_offset_y], 
                              [img2_offset_x+w2, img2_offset_y], 
                              [img2_offset_x+w2, img2_offset_y+h2], 
                              [img2_offset_x, img2_offset_y+h2]]).reshape(-1, 1, 2)
    
    return result, final_corners_img1, corners_img2

def draw_stitching_boundaries(result, corners_img1, corners_img2):
    """surely this is bonus marks; draws the forbidden boundaries around stitched images"""
    result_with_boundaries = result.copy()
    
    # img1; red
    if corners_img1 is not None: # so much undefined behaviour in this black box
        pts1 = np.int32(corners_img1).reshape((-1, 1, 2))
        cv2.polylines(result_with_boundaries, [pts1], True, (0, 0, 255), 3)
    
    # img2; green
    if corners_img2 is not None:
        pts2 = np.int32(corners_img2).reshape((-1, 1, 2))
        cv2.polylines(result_with_boundaries, [pts2], True, (0, 255, 0), 3)
    
    return result_with_boundaries

def ransac_and_plot_2(img1, img2, kp1, kp2, desc1, desc2):
    stitched, corners1, corners2 = ransac_stitch_new(img1, img2, kp1, kp2, desc1, desc2)
    
    if stitched is not None:
        # finished product to export
        plt.figure(figsize=(15, 8))
        plt.subplot(1, 2, 1)
        plt.imshow(cv2.cvtColor(stitched, cv2.COLOR_BGR2RGB))
        plt.title("naked finished")
        plt.axis('off')
        
        # bounded images
        result_with_boundaries = draw_stitching_boundaries(stitched, corners1, corners2)
        plt.subplot(1, 2, 2)
        plt.imshow(cv2.cvtColor(result_with_boundaries, cv2.COLOR_BGR2RGB))
        plt.title("homography with boundaries!")
        plt.axis('off')
        
        plt.tight_layout()
        plt.savefig("stitched_result.png")
        plt.show()
        
        return stitched
    else:
        print("oh no, stitching failed")
        return None

stitched_result = ransac_and_plot_2(img1_crop, img2_crop, keypoints1, keypoints2, descriptors1, descriptors2)
Matches found: 20
No description has been provided for this image