Quantcast
Channel: Active questions tagged mongodb-atlas - Stack Overflow
Viewing all articles
Browse latest Browse all 219

PlanExecutor error caused by embedding index not indexed as knnVector

$
0
0

I have an ML model written in torch which produces an embedding of size 512.

I wrote a custom script utilizing the model and sending the embedding to the collection (which is empty).

class FaceEmbedding(Model):    def __init__(self, MONGO_CONNECTION_STRING:str=MONGO_CONNECTION_STRING) -> None:        super().__init__()        self.FaceEmbeddingModel = torch.load('Model/model2.pth')        self.device = next(self.FaceEmbeddingModel.parameters()).device        self.T = v2.Compose([            v2.ToImage(),            v2.ToDtype(torch.float32),            v2.Resize(256),            v2.CenterCrop(224),            v2.Normalize(mean = [0.48235, 0.45882, 0.40784],                          std=[0.00392156862745098, 0.00392156862745098, 0.00392156862745098])        ])        self.dbName = "FaceSimilarity"        self.collectionName = "Embeddings"        self.client = MongoClient(MONGO_CONNECTION_STRING)        self.collection = self.client[self.dbName][self.collectionName]    def __makeEucEmbeddings(self, img:np.ndarray)->np.ndarray:        img_t = self.T(img)        img_t = torch.unsqueeze(img_t, dim=0)        embedding = self.FaceEmbeddingModel.pos(img_t.to(self.device))        del img_t        torch.cuda.empty_cache()        return embedding.squeeze().cpu().detach().numpy()    def makeEmbeddings(self, img:np.ndarray, k:int):        face_locations = fr.face_locations(img)        sorted(face_locations, key = lambda rect: abs(rect[2]-rect[0])*abs(rect[1]-rect[3]))        face_locations = face_locations[:k][::-1]        EucEmb = []        FREmb = []        for face in face_locations:            top, right, bottom, left = face            face_img = img[top:bottom, left:right]            FREmb.append(fr.face_encodings(img, [face])[0])            Euc = self.__makeEucEmbeddings(face_img)            EucEmb.append(Euc.tolist())        return EucEmb, FREmb, face_locations    def __make_pipeline(self, EucEmb):         pipeline = [{"$vectorSearch": {"index": "vector_index","path": "EuclidianEmbedding","queryVector":EucEmb,"numCandidates":200,"limit":10            }        }]        return pipeline    def saveEmbedding(self, embeddings)->None:        data = []        for EucEmb, FREmb in embeddings:            data.append({"EuclidianEmbedding":EucEmb,"FREmbedding":FREmb            })        self.collection.insert_many(data)    def __vectorSearch(self, img, k):        EucEmb, FREmb, face_locations = self.makeEmbeddings(img, k)        ResEmb = []        if isinstance(EucEmb, list):            for emb in EucEmb:                ResEmb.append(self.collection.aggregate(self.__make_pipeline(emb)))        RecFace = []        NotRecFace = []        for emb in range(len(FREmb)):            match = fr.compare_faces([i['FREmbedding'] for i in ResEmb], FREmb[emb])[0]            if True in match:                idx = match.index(True)                RecFace.append(ResEmb[idx])            else:                idx = FREmb.index(emb)                NotRecFace.append([                    EucEmb[emb],                    FREmb[emb]                ])        return RecFace, NotRecFace, face_locations    def vectorSearch(self, img, k, SaveNotRecFace=False):        RecFace, NotRecFace, face_locations = self.__vectorSearch(img, k)                if SaveNotRecFace:            for embedding in NotRecFace:                self.saveEmbedding(embedding)        return RecFace, NotRecFace, face_locations

I am getting this error upon running the script on various images with one face common among them.

OperationFailure: PlanExecutor error during aggregation :: caused by :: EuclidianEmbedding_1 is not indexed as knnVector, full error: {'ok': 0.0, 'errmsg': 'PlanExecutor error during aggregation :: caused by :: EuclidianEmbedding_1 is not indexed as knnVector', 'code': 8, 'codeName': 'UnknownError', '$clusterTime': {'clusterTime': Timestamp(1716546450, 2), 'signature': {'hash': b'\xf4B1N\xc2\xffG\x9d$J}\xea\xad\xfe\xdfz\x83Cx\x80', 'keyId': 7345064297216606213}}, 'operationTime': Timestamp(1716546450, 2)}

Defined Indexes in the Mongo Compass for the collection

In Atlas Vector Search, I defined this JSON

{"fields": [    {"numDimensions": 512,"path": "EuclideanEmbedding","similarity": "euclidean","type": "vector"    }  ]}

I searched this up on the docs of MongoDB and came across the knnVector type index in Atlas Vector Search but, changing the type in the JSON gives error again.


Viewing all articles
Browse latest Browse all 219

Trending Articles





<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>