Exemplo n.º 1
0
 def test_remove_id_map(self):
     sub_index = faiss.IndexFlat(5)
     xb = np.zeros((10, 5), dtype='float32')
     xb[:, 0] = np.arange(10) + 1000
     index = faiss.IndexIDMap2(sub_index)
     index.add_with_ids(xb, np.arange(10) + 100)
     assert index.reconstruct(104)[0] == 1004
     index.remove_ids(np.array([103]))
     assert index.reconstruct(104)[0] == 1004
     try:
         index.reconstruct(103)
     except:
         pass
     else:
         assert False, 'should have raised an exception'
Exemplo n.º 2
0
 def __init__(self, dim: int, save_path: str, num_threads: int = None):
     """
     Constructor.
     :param dim:
     :param save_path:
     :param num_threads
     """
     self.dim = dim
     if num_threads is not None and num_threads > 0:
         faiss.omp_set_num_threads(num_threads)
     if isfile(save_path):
         logging.debug("restore: %s", save_path)
         self._index = faiss.read_index(save_path)
     else:
         self._sub_index = faiss.IndexFlat(dim)
         self._index = faiss.IndexIDMap2(self._sub_index)
Exemplo n.º 3
0
    def __init__(self, d, k, use_gpu=False, add_with_ids=False):
        """
        Initialize the class with the dimension of vectors
        :param k: Number of neighbors to search
        :param d: dimension of the database and query vectors
        """
        self.d = d
        self.index = faiss.IndexFlatL2(self.d)
        self.add_with_ids = add_with_ids
        if self.add_with_ids:
            self.index = faiss.IndexIDMap2(self.index)
        self.use_gpu = use_gpu
        if self.use_gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = "0"
            self.convert_to_gpu()
            # self.index = faiss.GpuIndexFlatL2(res, self.d, flat_config)  # Does brute force neighbor search

        # self.index = faiss.IndexFlatIP(d)
        self.k = k
Exemplo n.º 4
0
def create_faiss_model(item_embedding,
                       item_list,
                       faiss_path,
                       size=128,
                       mode="train"):
    item_embedding = np.array(item_embedding, dtype=np.float32)
    ids = np.array(item_list).astype("int")
    if mode == "train":
        index = faiss.index_factory(size, "IVF100,Flat",
                                    faiss.METRIC_INNER_PRODUCT)
        index.nprobe = 20
        index.train(item_embedding)
        # 初始化make_direct_map,reconstruct 重建向量
        index.make_direct_map()
        index_id = faiss.IndexIDMap2(index)
    elif mode == "update":
        index_id = faiss.read_index(faiss_path)
    index_id.add_with_ids(item_embedding, ids)
    # index保存
    faiss.write_index(index_id, faiss_path)

    return index
Exemplo n.º 5
0
 def __init__(self, d=64, index_path='/workspace/zhiyi/data/faiss.index'):
     self.faiss_sub_index = faiss.IndexFlatL2(d)
     self.faiss_index = faiss.IndexIDMap2(self.faiss_sub_index)
     self.index_path = index_path
 def __init__(self, num_dimensions):
     self.num_dimensions = num_dimensions
     self.index = faiss.IndexFlatL2(num_dimensions)
     self.index_id_map = faiss.IndexIDMap2(self.index)
Exemplo n.º 7
0
import numpy as np

d = 64
nb = 100
np.random.seed(1234)
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.

import faiss

_sub_index = faiss.IndexFlatL2(d)
index = faiss.IndexIDMap2(_sub_index)
print(index.is_trained)
index.add_with_ids(xb, np.arange(start=10, stop=10 + nb))  # type: ignore
print(index.ntotal)

k = 4
D, I = index.search(xb[:5], k)  # type: ignore

arr = []
ids = index.get_ids()  # type: ignore
print(ids)
print(ids[0])
def delete_products(current_products, products_to_update):
    """
    Удаление продуктов, которых нет в новом xml или которые удалили из директории с дополнительными изображениями.
    Удаление индексов продуктов из модели с faiss индексами
    :param current_products: датафрейм из нового xml-файла
    :param products_to_update: текущий датафрейм
    :return: датафрейм products_to_update с удаленными продуктами
    """
    logging.info('Удаление продуктов')
    idx_to_remove = []

    # удаление продуктов, которых нет в новом xml
    # смотрим, какие продукты нужно удалить, исходя из отсутствия артикулов продуктов текущей модели в новом xml
    products_to_update_vendor_code = set(products_to_update['vendor_code'])
    current_products_vendor_code = set(current_products['vendor_code'])
    products_to_update_vendor_code.difference_update(
        current_products_vendor_code)
    if products_to_update_vendor_code:
        for index, row in products_to_update.iterrows():
            if row['vendor_code'] in products_to_update_vendor_code:
                idx_to_remove.append(index)
                products_to_update.drop(index, inplace=True)

    # удаление строк с изображениями, которых уже нет в директории
    current_product_files = set(
        glob.glob(config.PATH_TO_PRODUCT_FOLDER + '/*/*/*'))
    product_files_to_update = set(
        products_to_update[~products_to_update['picture'].str.
                           startswith('http')]['picture'].values)
    product_files_to_update.difference_update(current_product_files)
    if product_files_to_update:
        for file in product_files_to_update:
            index = products_to_update[products_to_update['picture'] ==
                                       file].index[0]
            products_to_update.drop(index, inplace=True)
            idx_to_remove.append(index)

    products_to_update.reset_index(inplace=True, drop=True)

    # если ничего не удаляли, возвращаем неизмененный датафрейм
    if not idx_to_remove:
        logging.info('Продуктов для удаления нет')
        return products_to_update

    # удаление из faiss соответствующих векторов
    logging.info(f'Удаление индексов из faiss [{len(idx_to_remove)} строк]')
    index = faiss.read_index(config.PATH_TO_FAISS_INDEX)
    vectors = [index.reconstruct(i) for i in range(index.ntotal)]
    vectors_without_removed = [
        vectors[i] for i in range(len(vectors)) if i not in idx_to_remove
    ]

    updated_index = faiss.IndexFlatL2(2048)
    updated_index = faiss.IndexIDMap2(updated_index)
    updated_index.add_with_ids(
        np.vstack(vectors_without_removed),
        np.hstack([i for i in range(len(vectors_without_removed))]))

    logging.info('Запись')
    faiss.write_index(updated_index, config.PATH_TO_FAISS_INDEX)
    products_to_update.to_pickle(config.PATH_TO_PRODUCT_DATASET)
    assert updated_index.ntotal == products_to_update.shape[0]

    logging.info(f'Удалено {len(idx_to_remove)} строк')
    return products_to_update
                port=33335,
                log_level="info")

from pydantic import BaseModel
from fastapi import FastAPI, File, Form, HTTPException, Response, status
import faiss
from os import listdir
import numpy as np
from tqdm import tqdm
import cv2
import sqlite3
import io
conn = sqlite3.connect('rgb_histograms.db')
IMAGE_PATH = "./../../../public/images"
sub_index = faiss.IndexFlat(4096, faiss.METRIC_L1)
index_id_map = faiss.IndexIDMap2(sub_index)


def init_index():
    global index_flat
    all_ids = get_all_ids()
    for image_id in tqdm(all_ids):
        features = convert_array(get_rgb_histogram_by_id(image_id))
        index_id_map.add_with_ids(np.array([features]), np.int64([image_id]))
    print("Index is ready")


def read_img_file(image_data):
    return np.fromstring(image_data, np.uint8)

Exemplo n.º 10
0
    def build(self, config):
        '''
            build index from scratch
        '''
        operation_method = config.get("index_operation", "new").lower()

        gallery_images, gallery_docs = split_datafile(
            config['data_file'], config['image_root'], config['delimiter'])

        # when remove data in index, do not need extract fatures
        if operation_method != "remove":
            gallery_features = self._extract_features(gallery_images, config)
        assert operation_method in [
            "new", "remove", "append"
        ], "Only append, remove and new operation are supported"

        # vector.index: faiss index file
        # id_map.pkl: use this file to map id to image_doc
        if operation_method in ["remove", "append"]:
            # if remove or append, vector.index and id_map.pkl must exist
            assert os.path.join(
                config["index_dir"], "vector.index"
            ), "The vector.index dose not exist in {} when 'index_operation' is not None".format(
                config["index_dir"])
            assert os.path.join(
                config["index_dir"], "id_map.pkl"
            ), "The id_map.pkl dose not exist in {} when 'index_operation' is not None".format(
                config["index_dir"])
            index = faiss.read_index(
                os.path.join(config["index_dir"], "vector.index"))
            with open(os.path.join(config["index_dir"], "id_map.pkl"),
                      'rb') as fd:
                ids = pickle.load(fd)
            assert index.ntotal == len(ids.keys(
            )), "data number in index is not equal in in id_map"
        else:
            if not os.path.exists(config["index_dir"]):
                os.makedirs(config["index_dir"], exist_ok=True)
            index_method = config.get("index_method", "HNSW32")

            # if IVF method, cal ivf number automaticlly
            if index_method == "IVF":
                index_method = index_method + str(
                    min(int(len(gallery_images) // 8), 65536)) + ",Flat"

            # for binary index, add B at head of index_method
            if config["dist_type"] == "hamming":
                index_method = "B" + index_method

            #dist_type
            dist_type = faiss.METRIC_INNER_PRODUCT if config[
                "dist_type"] == "IP" else faiss.METRIC_L2

            #build index
            if config["dist_type"] == "hamming":
                index = faiss.index_binary_factory(config["embedding_size"],
                                                   index_method)
            else:
                index = faiss.index_factory(config["embedding_size"],
                                            index_method, dist_type)
                index = faiss.IndexIDMap2(index)
            ids = {}

        if config["index_method"] == "HNSW32":
            logger.warning(
                "The HNSW32 method dose not support 'remove' operation")

        if operation_method != "remove":
            # calculate id for new data
            start_id = max(ids.keys()) + 1 if ids else 0
            ids_now = (
                np.arange(0, len(gallery_images)) + start_id).astype(np.int64)

            # only train when new index file
            if operation_method == "new":
                if config["dist_type"] == "hamming":
                    index.add(gallery_features)
                else:
                    index.train(gallery_features)

            if not config["dist_type"] == "hamming":
                index.add_with_ids(gallery_features, ids_now)

            for i, d in zip(list(ids_now), gallery_docs):
                ids[i] = d
        else:
            if config["index_method"] == "HNSW32":
                raise RuntimeError(
                    "The index_method: HNSW32 dose not support 'remove' operation"
                )
            # remove ids in id_map, remove index data in faiss index
            remove_ids = list(
                filter(lambda k: ids.get(k) in gallery_docs, ids.keys()))
            remove_ids = np.asarray(remove_ids)
            index.remove_ids(remove_ids)
            for k in remove_ids:
                del ids[k]

        # store faiss index file and id_map file
        if config["dist_type"] == "hamming":
            faiss.write_index_binary(
                index, os.path.join(config["index_dir"], "vector.index"))
        else:
            faiss.write_index(
                index, os.path.join(config["index_dir"], "vector.index"))

        with open(os.path.join(config["index_dir"], "id_map.pkl"), 'wb') as fd:
            pickle.dump(ids, fd)