Пример #1
0
 def test01_small_invalid_dimension(self):
     index = HnswIndex(30)
     this_is_abnormal = False
     try:
         index.load(self.model_fname)
         this_is_abnormal = True
     except:
         pass
     finally:
         del index
     self.assertFalse(this_is_abnormal)
Пример #2
0
 def test04_batch_search_by_ids(self):
     index = HnswIndex(self.dim)
     index.load(self.model_fname)
     T = [random.randrange(0, self.data_num) for y in xrange(100)]
     batch_res = index.batch_search_by_ids(T,
                                           10,
                                           num_threads=12,
                                           include_distances=True)
     normal_res = [
         index.search_by_id(t, 10, include_distances=True) for t in T
     ]
     self.assertEqual(batch_res, normal_res)
Пример #3
0
 def test04_batch_search_by_vectors(self):
     index = HnswIndex(self.dim)
     index.load(self.model_fname)
     T = [[random.gauss(0, 1) for z in xrange(self.dim)]
          for y in xrange(100)]
     batch_res = index.batch_search_by_vectors(T,
                                               10,
                                               num_threads=12,
                                               include_distances=True)
     normal_res = [
         index.search_by_vector(t, 10, include_distances=True) for t in T
     ]
     self.assertEqual(batch_res, normal_res)
Пример #4
0
 def test03_small_add_data_after_loading(self):
     index = HnswIndex(self.dim)
     index.load(self.model_fname)
     this_is_abnormal = False
     try:
         v = [random.gauss(0, 1) for z in xrange(self.dim)]
         index.add_data(v)
         this_is_abnormal = True
     except:
         pass
     finally:
         del index
     self.assertFalse(this_is_abnormal)
Пример #5
0
class N2(BaseANN):
    def __init__(self, m, ef_construction, n_threads, ef_search, metric):
        self._m = m
        self._m0 = m * 2
        self._ef_construction = ef_construction
        self._n_threads = n_threads
        self._ef_search = ef_search
        self._index_name = os.path.join(
            INDEX_DIR, "youtube_n2_M%d_efCon%d_n_thread%s" %
            (m, ef_construction, n_threads))
        self.name = "N2_M%d_efCon%d_n_thread%s_efSearch%d" % (
            m, ef_construction, n_threads, ef_search)
        self._metric = metric

        d = os.path.dirname(self._index_name)
        if not os.path.exists(d):
            os.makedirs(d)

    def fit(self, X):
        from n2 import HnswIndex
        if self._metric == 'euclidean':
            self._n2 = HnswIndex(X.shape[1], 'L2')
        else:
            self._n2 = HnswIndex(X.shape[1])
        if os.path.exists(self._index_name):
            logging.debug("Loading index from file")
            self._n2.load(self._index_name)
        else:
            logging.debug("Index file is not exist: {0}".format(
                self._index_name))
            logging.debug("Start fitting")

            for i, x in enumerate(X):
                self._n2.add_data(x.tolist())
            self._n2.build(m=self._m,
                           max_m0=self._m0,
                           ef_construction=self._ef_construction,
                           n_threads=self._n_threads)
            self._n2.save(self._index_name)

    def query(self, v, n):
        return self._n2.search_by_vector(v.tolist(), n, self._ef_search)

    def __str__(self):
        return self.name
Пример #6
0
class N2(BaseANN):
    def __init__(self, m, ef_construction, n_threads, ef_search, metric, batch):
        self.name = "N2_M%d_efCon%d_n_thread%s_efSearch%d%s" % (m, ef_construction, n_threads, ef_search,
                                                                '_batch' if batch else '')
        self._m = m
        self._m0 = m * 2
        self._ef_construction = ef_construction
        self._n_threads = n_threads
        self._ef_search = ef_search
        self._index_name = os.path.join(CACHE_DIR, "index_n2_%s_M%d_efCon%d_n_thread%s"
                                        % (args.dataset, m, ef_construction, n_threads))
        self._metric = metric

    def fit(self, X):
        if self._metric == 'euclidean':
            self._n2 = HnswIndex(X.shape[1], 'L2')
        elif self._metric == 'dot':
            self._n2 = HnswIndex(X.shape[1], 'dot')
        else:
            self._n2 = HnswIndex(X.shape[1])

        if os.path.exists(self._index_name):
            n2_logger.info("Loading index from file")
            self._n2.load(self._index_name, use_mmap=False)
            return

        n2_logger.info("Create Index")
        for i, x in enumerate(X):
            self._n2.add_data(x)
        self._n2.build(m=self._m, max_m0=self._m0, ef_construction=self._ef_construction, n_threads=self._n_threads)
        self._n2.save(self._index_name)

    def query(self, v, n):
        return self._n2.search_by_vector(v, n, self._ef_search)

    def batch_query(self, X, n):
        self.b_res = self._n2.batch_search_by_vectors(X, n, self._ef_search, self._n_threads)

    def get_batch_results(self):
        return self.b_res

    def __str__(self):
        return self.name
Пример #7
0
from n2 import HnswIndex
import random

f = 3
t = HnswIndex(f)  # HnswIndex(f, "L2 or angular")
for i in xrange(1000):
    v = [random.gauss(0, 1) for z in xrange(f)]
    t.add_data(v)

t.build(m=5, max_m0=10, n_threads=4)
t.save('test.n2')

u = HnswIndex(f, "angular")
u.load('test.n2')

search_id = 1
k = 3
neighbor_ids = u.search_by_id(search_id, k)
print(
    "[search_by_id]: Nearest neighborhoods of id {}: {}".format(
        search_id,
        neighbor_ids))

example_vector_query = [random.gauss(0, 1) for z in xrange(f)]
nns = u.search_by_vector(example_vector_query, k, include_distances=True)
print(
    "[search_by_vector]: Nearest neighborhoods of vector {}: {}".format(
        example_vector_query,
        nns))