I have an ML model written in torch which produces an embedding of size 512.
I wrote a custom script utilizing the model and sending the embedding to the collection (which is empty).
class FaceEmbedding(Model): def __init__(self, MONGO_CONNECTION_STRING:str=MONGO_CONNECTION_STRING) -> None: super().__init__() self.FaceEmbeddingModel = torch.load('Model/model2.pth') self.device = next(self.FaceEmbeddingModel.parameters()).device self.T = v2.Compose([ v2.ToImage(), v2.ToDtype(torch.float32), v2.Resize(256), v2.CenterCrop(224), v2.Normalize(mean = [0.48235, 0.45882, 0.40784], std=[0.00392156862745098, 0.00392156862745098, 0.00392156862745098]) ]) self.dbName = "FaceSimilarity" self.collectionName = "Embeddings" self.client = MongoClient(MONGO_CONNECTION_STRING) self.collection = self.client[self.dbName][self.collectionName] def __makeEucEmbeddings(self, img:np.ndarray)->np.ndarray: img_t = self.T(img) img_t = torch.unsqueeze(img_t, dim=0) embedding = self.FaceEmbeddingModel.pos(img_t.to(self.device)) del img_t torch.cuda.empty_cache() return embedding.squeeze().cpu().detach().numpy() def makeEmbeddings(self, img:np.ndarray, k:int): face_locations = fr.face_locations(img) sorted(face_locations, key = lambda rect: abs(rect[2]-rect[0])*abs(rect[1]-rect[3])) face_locations = face_locations[:k][::-1] EucEmb = [] FREmb = [] for face in face_locations: top, right, bottom, left = face face_img = img[top:bottom, left:right] FREmb.append(fr.face_encodings(img, [face])[0]) Euc = self.__makeEucEmbeddings(face_img) EucEmb.append(Euc.tolist()) return EucEmb, FREmb, face_locations def __make_pipeline(self, EucEmb): pipeline = [{"$vectorSearch": {"index": "vector_index","path": "EuclidianEmbedding","queryVector":EucEmb,"numCandidates":200,"limit":10 } }] return pipeline def saveEmbedding(self, embeddings)->None: data = [] for EucEmb, FREmb in embeddings: data.append({"EuclidianEmbedding":EucEmb,"FREmbedding":FREmb }) self.collection.insert_many(data) def __vectorSearch(self, img, k): EucEmb, FREmb, face_locations = self.makeEmbeddings(img, k) ResEmb = [] if isinstance(EucEmb, list): for emb in EucEmb: ResEmb.append(self.collection.aggregate(self.__make_pipeline(emb))) RecFace = [] NotRecFace = [] for emb in range(len(FREmb)): match = fr.compare_faces([i['FREmbedding'] for i in ResEmb], FREmb[emb])[0] if True in match: idx = match.index(True) RecFace.append(ResEmb[idx]) else: idx = FREmb.index(emb) NotRecFace.append([ EucEmb[emb], FREmb[emb] ]) return RecFace, NotRecFace, face_locations def vectorSearch(self, img, k, SaveNotRecFace=False): RecFace, NotRecFace, face_locations = self.__vectorSearch(img, k) if SaveNotRecFace: for embedding in NotRecFace: self.saveEmbedding(embedding) return RecFace, NotRecFace, face_locations
I am getting this error upon running the script on various images with one face common among them.
OperationFailure: PlanExecutor error during aggregation :: caused by :: EuclidianEmbedding_1 is not indexed as knnVector, full error: {'ok': 0.0, 'errmsg': 'PlanExecutor error during aggregation :: caused by :: EuclidianEmbedding_1 is not indexed as knnVector', 'code': 8, 'codeName': 'UnknownError', '$clusterTime': {'clusterTime': Timestamp(1716546450, 2), 'signature': {'hash': b'\xf4B1N\xc2\xffG\x9d$J}\xea\xad\xfe\xdfz\x83Cx\x80', 'keyId': 7345064297216606213}}, 'operationTime': Timestamp(1716546450, 2)}
Defined Indexes in the Mongo Compass for the collection
In Atlas Vector Search, I defined this JSON
{"fields": [ {"numDimensions": 512,"path": "EuclideanEmbedding","similarity": "euclidean","type": "vector" } ]}
I searched this up on the docs of MongoDB and came across the knnVector type index in Atlas Vector Search but, changing the type in the JSON gives error again.