Spaces:
Runtime error
Runtime error
| from datasets import load_dataset, concatenate_datasets | |
| from sentence_transformers import SentenceTransformer | |
| from torchvision import transforms | |
| from models.encoder import Encoder | |
| from indexer import Indexer | |
| import numpy as np | |
| import torch | |
| import os | |
| model = SentenceTransformer('intfloat/multilingual-e5-base') | |
| encoder = Encoder() | |
| encoder.load_state_dict(torch.load('./models/encoder.bin', map_location=torch.device('cpu'))) | |
| dataset = load_dataset("Ransaka/youtube_recommendation_data", token=os.environ.get('HF')) | |
| dataset = concatenate_datasets([dataset['train'], dataset['test']]) | |
| latent_data = torch.load("data/latent_data_final.bin") | |
| embeddings = torch.load("data/embeddings.bin") | |
| def row_wise_normalize_and_concatenate(array1, array2): | |
| normalized_array1 = array1 / np.linalg.norm(array1, axis=1, keepdims=True) | |
| normalized_array2 = array2 / np.linalg.norm(array2, axis=1, keepdims=True) | |
| concatenated_array = np.concatenate((normalized_array1, normalized_array2), axis=1) | |
| return concatenated_array | |
| # result_array = row_wise_normalize_and_concatenate(latent_data, embeddings) | |
| # index = Indexer(result_array) | |
| index = Indexer(latent_data) | |
| def get_recommendations(image, title, k): | |
| title_embeds = model.encode([title], normalize_embeddings=True) | |
| image = transforms.ToTensor()(image.convert("L")) | |
| image_embeds = encoder(image).detach().numpy() | |
| # image_embeds = image_embeds / np.linalg.norm(image_embeds, axis=1, keepdims=True) | |
| final_embeds = np.concatenate((image_embeds,title_embeds), axis=1) | |
| # candidates = index.topk(final_embeds,k=k) | |
| candidates = index.topk(image_embeds,k=k) | |
| final_candidates = [] | |
| final_candidates.append(list(candidates[0])) | |
| final_candidates = sum(final_candidates,[]) | |
| results_dict = {"image":[], "title":[]} | |
| for candidate in final_candidates: | |
| results_dict['image'].append(dataset['image'][candidate]) | |
| results_dict['title'].append(dataset['title'][candidate]) | |
| return results_dict |