#%pip install torch torchvision torchaudio
#%pip install faiss-cpu --no-build-isolation --no-cache-dir

from PIL import Image
image = Image.open("image.jpg").resize((224, 224))

#pip cache purge  # Clears cached versions
#pip install imgaug
#pip install numpy==1.26.4 pandas scikit-learn matplotlib
#conda install -c pytorch faiss-cpu
#conda install pytorch cpuonly -c pytorch

import os
import re
import pandas as pd
import imgaug.augmenters as iaa
import cv2
import numpy as np

#pip install kagglehub
#import kagglehub
#
## Download latest version
#path = kagglehub.dataset_download("paramaggarwal/fashion-product-images-dataset")
#
#print("Path to dataset files:", path)

# Load images from a folder and extract embeddings
image_folder = "fashion-product-images-dataset/training_images"  
image_paths = [os.path.join(image_folder, img) for img in os.listdir(image_folder)]

# load test folder
image_folder_test = "fashion-product-images-dataset/test_images"  
image_paths_test = [os.path.join(image_folder_test, img) for img in os.listdir(image_folder_test)]

def load_images_from_folder(folder):
    images, filenames = [], []
    valid_exts = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff')
    for filename in os.listdir(folder):
        if filename.lower().endswith(valid_exts):
            path = os.path.join(folder, filename)
            img = cv2.imread(path)
            if img is not None:
                images.append(img)
                filenames.append(filename)
            else:
                print(f"❌ Failed to load: {path}")
    return images, filenames

def apply_augmentation(images):
    augmenter = iaa.Sequential([
        iaa.Fliplr(0.5),
        iaa.Affine(rotate=(-20, 20)),
        iaa.GaussianBlur(sigma=(0.5, 2)),
        iaa.AdditiveGaussianNoise(scale=(5, 25)),
        iaa.Multiply((0.8, 1.2)),
    ])
    return augmenter.augment_images(images)

def save_images(folder, images, filenames, prefix="aug_"):
    os.makedirs(folder, exist_ok=True)
    for img, fname in zip(images, filenames):
        save_path = os.path.join(folder, prefix + fname)
        cv2.imwrite(save_path, img)

def main(input_folder, output_folder):
    images, filenames = load_images_from_folder(input_folder)
    if not images:
        print("❌ No images loaded.")
        return
    print(f"✅ Loaded {len(images)} images. Augmenting...")
    aug_images = apply_augmentation(images)
    save_images(output_folder, aug_images, filenames)
    print(f"✅ Augmented images saved to: {output_folder}")

if __name__ == "__main__":
    input_folder = image_folder_test
    output_folder = image_folder
    main(input_folder, output_folder)

✅ Loaded 100 images. Augmenting...
✅ Augmented images saved to: fashion-product-images-dataset/training_images

import torch
import torchvision.transforms as transforms
import torchvision.models as models
import faiss
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt

# Load a pre-trained ResNet model (without the classification head)
model = models.resnet18(pretrained=True)
model = torch.nn.Sequential(*list(model.children())[:-1])  # Remove last layer
model.eval()  # Set model to evaluation mode

C:\Users\mrezv\anaconda3\envs\faiss-env\lib\site-packages\torchvision\models\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
  warnings.warn(
C:\Users\mrezv\anaconda3\envs\faiss-env\lib\site-packages\torchvision\models\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
  warnings.warn(msg)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (5): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (6): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (7): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (8): AdaptiveAvgPool2d(output_size=(1, 1))
)

# Image transformation (resize, normalize, and convert to tensor)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Function to extract embeddings
def get_embedding(image_path):
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        embedding = model(image).squeeze().numpy()  # Extract features
    return embedding.flatten()  # Convert to 1D array

# Get image embedding for training set
embeddings = np.array([get_embedding(img) for img in image_paths])

# Get image embedding for test set
embeddings_test = np.array([get_embedding(img) for img in image_paths_test])

import random
#from sklearn.neighbors import KNeighborsClassifier

# Assuming embeddings is a numpy array of shape (N, D), and labels contains the image labels.
dimension = embeddings.shape[1]

# Create and build the FAISS (Facebook AI Similarity Search) index
index = faiss.IndexFlatL2(dimension)  # L2 distance for Euclidean similarity
index.add(embeddings)  # Add embeddings to the FAISS index

# FAISS only accepts np.float32 arrays. If you're passing float64 (default for NumPy), 
#it crashes or causes undefined behavior.
embeddings = embeddings.astype('float32')

# Number of neighbors to retrieve
k = 5  # Retrieve 3 nearest neighbors for each query

# Perform search for a query embedding (example query_embedding is from a test image)
query_embedding = embeddings[0].reshape(1, -1).astype('float32')  # Example: querying the first image

print("Embeddings shape:", embeddings.shape)
print("Embeddings dtype:", embeddings.dtype)

Embeddings shape: (3432, 512)
Embeddings dtype: float32

distances, indices = index.search(query_embedding, k)

# indices will give us the indices of the 5 nearest neighbors
nearest_neighbors = indices[0]

for i in range (10):
    # Query an image and find similar images
    query_image_path = image_paths_test[i]  # Change this to test different queries
    query_embedding = np.array([get_embedding(query_image_path)])
    
    distances, indices = index.search(query_embedding, k)
    
    # indices will give us the indices of the 5 nearest neighbors
    nearest_neighbors = indices[0]
    
    # Display query and similar images
    fig, axes = plt.subplots(1, 1, figsize=(10, 3))
    axes.imshow(Image.open(image_paths_test[i]))
    axes.set_title("Query Image")
    
    fig, axes = plt.subplots(1, k, figsize=(15, 5))
    for i, idx in enumerate(nearest_neighbors):
        axes[i].imshow(Image.open(image_paths[idx]))
        axes[i].set_title(f"Match {i+1}")
    
    plt.show()

ground_truth = np.zeros((len(image_paths_test), k))
for itest in range (len(image_paths_test)):
    # Query an image and find similar images
    query_image_path = image_paths_test[itest]  # Change this to test different queries
    query_embedding = np.array([get_embedding(query_image_path)])
    
    distances, indices = index.search(query_embedding, k)

    for i, idx in enumerate(indices[0]):
        if "aug_" in image_paths[idx]:
            ground_truth[itest, i] = 1

ground_truth[:10]

array([[0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.]])

def mean_reciprocal_rank(y_true):
    reciprocal_ranks = []
    for row in y_true:
        ranks = np.where(row == 1)[0]
        if ranks.size > 0:
            reciprocal_ranks.append(1.0 / (ranks[0] + 1))  # ranks are 0-based
        else:
            reciprocal_ranks.append(0.0)
    return np.mean(reciprocal_ranks)

mrr = mean_reciprocal_rank(ground_truth)
print(f"mean_reciprocal_rank: {mrr:.4f}")

mean_reciprocal_rank: 0.4260

def recall_at_k(y_true, k):
    """
    y_true: numpy array of shape (n_queries, top_k) with binary labels (1=relevant, 0=not)
    Assumes each query has exactly one relevant item.
    """
    clipped = y_true[:, :k]
    return np.mean(np.sum(clipped, axis=1))  # equivalent to mean of hit rate at K


rec = recall_at_k(ground_truth, k)
print(f"Recall@{k}: {rec:.2f}")

Recall@5: 0.56

def precision_at_k(y_true, k):
    """
    y_true: numpy array of shape (n_queries, top_k) with binary labels (1=relevant, 0=not)
    k: cutoff rank
    """
    clipped = y_true[:, :k]
    return np.mean(np.sum(clipped, axis=1) / k)

p_at_k = precision_at_k(ground_truth, k)
print(f"Precision@{k}: {p_at_k:.2f}")

Precision@5: 0.11

def average_precision(row):
    precisions = []
    num_hits = 0
    for i, val in enumerate(row):
        if val == 1:
            num_hits += 1
            precisions.append(num_hits / (i + 1))
    return np.mean(precisions) if precisions else 0.0

def mean_average_precision(y_true):
    return np.mean([average_precision(row) for row in y_true])

print("mean_average_precision:", mean_average_precision(ground_truth))

mean_average_precision: 0.4230000000000001

def dcg(row):
    return np.sum([
        rel / np.log2(idx + 2)  # +2 because idx starts at 0
        for idx, rel in enumerate(row)
    ])

def ndcg(row):
    ideal_row = sorted(row, reverse=True)
    idcg = dcg(ideal_row)
    return dcg(row) / idcg if idcg > 0 else 0.0

def mean_ndcg(y_true):
    return np.mean([ndcg(row) for row in y_true])

ndcg_score = mean_ndcg(ground_truth)

print(f"nDCG: {ndcg_score:.4f}")

nDCG: 0.4554

#train_image_name = []
#for i_image in image_paths:
#    # Extract filename without extension
#    filename = os.path.splitext(os.path.basename(i_image))[0]
#
#    # Extract numeric part using regex
#    match = re.search(r'\d+', filename)
#    number = match.group() if match else None
#    train_image_name.append(int(number))
##
#test_image_name = []
#for i_image in image_paths_test:
#    # Extract filename without extension
#    filename = os.path.splitext(os.path.basename(i_image))[0]
#
#    # Extract numeric part using regex
#    match = re.search(r'\d+', filename)
#    number = match.group() if match else None
#    test_image_name.append(int(number))

#styles = pd.read_csv("./fashion-product-images-dataset/styles.csv", on_bad_lines='skip')
#ground_truth_test = []
#for i in range(len(test_image_name)):
#    season = styles[styles.id==test_image_name[i]].season.tolist()[0]
#    gender = styles[styles.id==test_image_name[i]].gender.tolist()[0]
#    articleType = styles[styles.id==test_image_name[i]].articleType.tolist()[0]
#    masterCategory = styles[styles.id==test_image_name[i]].masterCategory.tolist()[0]
#    subCategory = styles[styles.id==test_image_name[i]].subCategory.tolist()[0]
#    baseColour = styles[styles.id==test_image_name[i]].baseColour.tolist()[0]
#    usage = styles[styles.id==test_image_name[i]].usage.tolist()[0]
#    productDisplayName = styles[styles.id==test_image_name[i]].productDisplayName.tolist()[0]
#    
#    id_ = styles[(styles.season==season) &
#        (styles.gender==gender) &
#        (styles.articleType==articleType) &
#       (styles.masterCategory==masterCategory) &
#       (styles.subCategory==subCategory) &
#       (styles.baseColour==baseColour) &
#       (styles.productDisplayName==productDisplayName)]
#    ground_truth_test.append(id_.id.tolist())

Image ID	Feature Vector (Embeddings)	Metadata (Optional)
`img_001`	`[0.12, 0.85, 0.43, ..., 0.91]`	"Red car, outdoor, daytime"
`img_002`	`[0.78, 0.22, 0.56, ..., 0.34]`	"Blue truck, highway, night"
`img_003`	`[0.44, 0.67, 0.89, ..., 0.12]`	"Black dog, park, running"
`img_004`	`[0.31, 0.77, 0.23, ..., 0.65]`	"Sunset over ocean, waves"
`img_005`	`[0.99, 0.05, 0.48, ..., 0.78]`	"Person with sunglasses, city"

Feature	Description
User ID	Unique identifier
Preferences	Categories of interest
Search History	Past queries and clicks
Location & Device	Used for personalization

Feature	Description
Clicks	How often a user clicks an image
Dwell Time	Time spent viewing an image
Purchases	Whether the user bought the item
Likes/Saves	Whether the user favorited the image

Table of Contents

Introduction¶

How Visual Search Works¶

Representation Learning for Visual Search Systems¶

Tools & Frameworks for Implementation¶

Image Data Processing & Feature Engineering¶

Image Preprocessing (Feature Engineering)¶

User & User-Image Interaction Data Engineering¶

User Metadata¶

User-Image Interaction Features¶

Feature Engineering for User-Image Interactions¶

Example: Implementation of Visual Search System¶

Fashion Product Images Dataset ¶

Augmentation for Test set¶

Pre-trained ResNet Model¶

Image transformation¶

Image Embedding for Train Set¶

Example for Prediction¶

Get Ground Truth Images for Test set¶

Offline Evaluation Metrics¶

Mean Reciprocal Rank (MRR)¶

Recall@K¶

Precision@K¶

Mean Average Precision (mAP)¶

Normalized Discounted Cumulative Gain (nDCG)¶

Appendix¶

Table of Contents

Introduction¶

How Visual Search Works¶

Representation Learning for Visual Search Systems¶

Tools & Frameworks for Implementation¶

Image Data Processing & Feature Engineering¶

Image Preprocessing (Feature Engineering)¶

User & User-Image Interaction Data Engineering¶

User Metadata¶

User-Image Interaction Features¶

Feature Engineering for User-Image Interactions¶

Example: Implementation of Visual Search System¶

Fashion Product Images Dataset¶

Augmentation for Test set¶

Pre-trained ResNet Model¶

Image transformation¶

Image Embedding for Train Set¶

Example for Prediction¶

Get Ground Truth Images for Test set¶

Offline Evaluation Metrics¶

Mean Reciprocal Rank (MRR)¶

Recall@K¶

Precision@K¶

Mean Average Precision (mAP)¶

Normalized Discounted Cumulative Gain (nDCG)¶

Appendix¶

Fashion Product Images Dataset ¶