Exemplo n.º 1
0
 def fit(self, X):
     if self.hash_args:
         self.index = puffinn.Index(self.metric, X.shape[1], self.space,\
                 hash_function=self.hash_function, hash_source=self.hash_source,\
                 hash_args=self.hash_args)
     else:
         self.index = puffinn.Index(self.metric, X.shape[1], self.space,\
                 hash_function=self.hash_function, hash_source=self.hash_source)
     for i, x in enumerate(X):
         self.index.insert(x.tolist())
     self.index.rebuild(10)
Exemplo n.º 2
0
 def fit(self, X):
     if self.hash_args:
         self.index = puffinn.Index(self.metric, len(X[0]), self.space,\
                 hash_function=self.hash_function, hash_source=self.hash_source,\
                 hash_args=self.hash_args)
     else:
         self.index = puffinn.Index(self.metric, len(X[0]), self.space,\
                 hash_function=self.hash_function, hash_source=self.hash_source)
     for i, x in enumerate(X):
         if self.metric == 'angular':
             x = x.tolist()
         self.index.insert(x)
     self.index.rebuild(10)
Exemplo n.º 3
0
    def fit(self, X):
        if self.metric == 'angular':
            dimensions = len(X[0])
        else:
            dimensions = 0
            for x in X:
                dimensions = max(dimensions, max(x)+1)

        if self.hash_args:
            self.index = puffinn.Index(self.metric, dimensions, self.space,\
                    hash_function=self.hash_function, hash_source=self.hash_source,\
                    hash_args=self.hash_args)
        else:
            self.index = puffinn.Index(self.metric, dimensions, self.space,\
                    hash_function=self.hash_function, hash_source=self.hash_source)
        for i, x in enumerate(X):
            x = x.tolist()
            self.index.insert(x)
        self.index.rebuild()
Exemplo n.º 4
0
    def fit(self, X, y=None) -> PuffinnLSH:
        """ Build the puffinn LSH index and insert data from X.

        Parameters
        ----------
        X: np.array
            Data to be indexed
        y: any
            Ignored

        Returns
        -------
        self: Puffinn
            An instance of Puffinn with a built index
        """
        if y is None:
            X = check_array(X)
        else:
            X, y = check_X_y(X, y)
            self.y_train_ = y

        if self.metric not in self.valid_metrics:
            warnings.warn(
                f'Invalid metric "{self.metric}". Using "euclidean" instead')
            self.metric = 'euclidean'
        try:
            self.effective_metric = self.metric_map[self.metric]
        except KeyError:
            self.effective_metric = self.metric

        # Reduce default memory consumption for unit tests
        if "pytest" in sys.modules:
            self.memory = 3 * 1024**2

        # Construct the index
        index = puffinn.Index(
            self.effective_metric,
            X.shape[1],
            self.memory,
        )

        if self.verbose:
            iter_X = tqdm(X, desc='Indexing', total=len(X))
        else:
            iter_X = X
        for v in iter_X:
            index.insert(v.tolist())
        index.rebuild(num_threads=self.n_jobs)

        self.index_ = index
        self.X_train_ = X  # remove, once we can retrieve vectors from the index itself

        return self
Exemplo n.º 5
0
    def fit(self, X, y=None):
        """ Build the puffinn LSH index and insert data from X.

        Parameters
        ----------
        X: np.array
            Data to be indexed
        y: any
            Ignored

        Returns
        -------
        self: Puffinn
            An instance of Puffinn with a built index
        """
        if y is None:
            X = check_array(X)
        else:
            X, y = check_X_y(X, y)
            self.y_train_ = y

        if self.metric not in self.valid_metrics:
            warnings.warn(
                f'Invalid metric "{self.metric}". Using "euclidean" instead')
            self.metric = 'euclidean'
        try:
            self._effective_metric = self.metric_map[self.metric]
        except KeyError:
            self._effective_metric = self.metric

        # Larger memory means many iterations (time-recall trade-off)
        memory = max(np.multiply(*X.shape) * 8 * 500, 1024**2)
        if self.memory is not None:
            memory = max(self.memory, memory)

        # Construct the index
        index = puffinn.Index(
            self._effective_metric,
            X.shape[1],
            memory,
        )

        disable_tqdm = False if self.verbose else True
        for v in tqdm(X, desc='Indexing', disable=disable_tqdm):
            index.insert(v.tolist())
        index.rebuild()

        self.index_ = index
        self.n_indexed_ = X.shape[0]
        self.X_indexed_norm_ = np.linalg.norm(X, ord=2, axis=1).reshape(-1, 1)

        return self
Exemplo n.º 6
0
def create_index():
    index = puffinn.Index('angular', 10, 1024**2)
    print(index)
    return True