def test_remove_id_map_binary(self): sub_index = faiss.IndexBinaryFlat(40) xb = np.zeros((10, 5), dtype='uint8') xb[:, 0] = np.arange(10) + 100 index = faiss.IndexBinaryIDMap2(sub_index) index.add_with_ids(xb, np.arange(10) + 1000) assert index.reconstruct(1004)[0] == 104 index.remove_ids(np.array([1003])) assert index.reconstruct(1004)[0] == 104 try: index.reconstruct(1003) except: pass else: assert False, 'should have raised an exception' # while we are there, let's test I/O as well... _, tmpnam = tempfile.mkstemp() try: faiss.write_index_binary(index, tmpnam) index = faiss.read_index_binary(tmpnam) finally: os.remove(tmpnam) assert index.reconstruct(1004)[0] == 104 try: index.reconstruct(1003) except: pass else: assert False, 'should have raised an exception'
def create(hashes: t.Iterable[PDQ_HASH_TYPE], custom_ids: t.Iterable[int] = None) -> "PDQFlatHashIndex": """ Creates a PDQFlatHashIndex for use searching against the provided hashes. Parameters ---------- hashes: sequence of PDQ Hashes The PDQ hashes to create the index with custom_ids: sequence of custom ids for the PDQ Hashes (optional) Optional sequence of custom id values to use for the PDQ hashes for any method relating to indexes (e.g., hash_at). If provided, the nth item in custom_ids will be used as the id for the nth hash in hashes. If not provided then the ids for the hashes will be assumed to be their respective index in hashes (i.e., the nth hash would have id n, starting from 0). Returns ------- a PDQFlatHashIndex of these hashes """ hash_bytes = [binascii.unhexlify(hash) for hash in hashes] vectors = list( map(lambda h: numpy.frombuffer(h, dtype=numpy.uint8), hash_bytes)) index = faiss.index_binary_factory(BITS_IN_PDQ, "BFlat") if custom_ids != None: index = faiss.IndexBinaryIDMap2(index) i64_ids = list(map(uint64_to_int64, custom_ids)) index.add_with_ids(numpy.array(vectors), numpy.array(i64_ids)) else: index.add(numpy.array(vectors)) return PDQFlatHashIndex(index)
def __init__(self, nhash: int = 16): bits_per_hashmap = BITS_IN_PDQ // nhash faiss_index = faiss.IndexBinaryIDMap2( faiss.IndexBinaryMultiHash(BITS_IN_PDQ, nhash, bits_per_hashmap)) super().__init__(faiss_index) self.__construct_index_rev_map()
def __init__(self): faiss_index = faiss.IndexBinaryIDMap2( faiss.index_binary_factory(BITS_IN_PDQ, "BFlat")) super().__init__(faiss_index)