Skip to content

FAISS

Multimodal Graph Database with embeadded RAG

The product nodes are embedded using CLIP (image + text) and stored in a FAISS index for efficient semantic similarity search. This allows retrieving the most relevant products for a given user query based on content similarity.

Code Snippet

def get_clip_embedding(row):
    # Remove _number suffix from product_name
    sanitized_name = re.sub(r'_\d+$', '', row['product_name'])
    text = f"{sanitized_name}.{row['category']}.{row['description']}"
    text_tokens = clip.tokenize([text]).to(device)

    image_path = row['image_path']
    image_path = re.sub(r'(_\d+)(\.\w+)$', r'\2', row['image_path'])
    with torch.no_grad():
        text_features = model.encode_text(text_tokens)

        if os.path.exists(image_path):
            try:
                image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
                image_features = model.encode_image(image)
                combined = (image_features + text_features) / 2
                return combined.cpu().numpy()
            except Exception as e:
                print(f"Skipping image for {row['product_name']} due to error: {e}")
                return text_features.cpu().numpy()
        else:
            print(f"Image not found for {row['product_name']}, using text only.")
            return text_features.cpu().numpy()

embeddings = [get_clip_embedding(row) for idx, row in df.iterrows()]

def search_faiss(query_embedding, k=5):
    D, I = index_with_ids.search(query_embedding, k)
    return I[0].tolist()