示例#1
0
 def build_item_projector(self, v):
     cholesky_items = self.item_cholesky_factor
     if cholesky_items is not None:
         if self.verbose:
             print(
                 f'Building {self.data.fields.itemid} projector for {self.method}'
             )
         msg = Template('    Solving triangular system: $time')
         with track_time(verbose=self.verbose, message=msg):
             self.factors['items_projector_left'] = cholesky_items.T.solve(
                 v)
         msg = Template('    Applying Cholesky factor: $time')
         with track_time(verbose=self.verbose, message=msg):
             self.factors['items_projector_right'] = cholesky_items.dot(v)
示例#2
0
    def build(self, *args, **kwargs):
        matrix = self.get_training_matrix(sparse_format='coo', dtype='f8')
        user_idx, item_idx = matrix.nonzero()
        interactions = (user_idx, item_idx, matrix.data)
        nonzero_count = (matrix.getnnz(axis=1), matrix.getnnz(axis=0))
        rank = self.rank
        lrate = self.learn_rate
        sigma = self.sigma
        num_epochs = self.num_epochs
        tol = self.tolerance
        self.rmse_history = []
        self.iterations_time = []

        general_config = dict(seed=self.seed,
                              verbose=self.show_rmse,
                              iter_errors=self.rmse_history,
                              iter_time=self.iterations_time)

        with track_time(self.training_time,
                        verbose=self.verbose,
                        model=self.method):
            P, Q = self.optimizer(interactions, matrix.shape, nonzero_count,
                                  rank, lrate, sigma, num_epochs, tol, *args,
                                  **kwargs, **general_config)

        self.factors[self.data.fields.userid] = P
        self.factors[self.data.fields.itemid] = Q
示例#3
0
    def build(self):
        # prepare input matrix for learning the model
        Xs, lbls = stack_features(self.item_data, normalize=False) # item-features sparse matrix
        Xu = self.get_training_matrix().T # item-user sparse matrix

        n_nbrs = min(self.max_neighbours, int(math.sqrt(Xs.shape[0])))
        A = construct_A(Xs, n_nbrs, binary=self.binary_features)
        
        with track_time(self.training_time, verbose=self.verbose, model=self.method):
            W, Hu, Hs = LCE(Xs, Xu, A,
                            k=self.rank,
                            alpha=self.alpha,
                            beta=self.beta,
                            lamb=self.regularization,
                            epsilon=self.tolerance,
                            maxiter=self.max_iterations,
                            seed=self.seed,
                            verbose=self.show_error)
        
        userid = self.data.fields.userid
        itemid = self.data.fields.itemid
        self.factors[userid] = Hu.T
        self.factors[itemid] = W
        self.factors['item_features'] = Hs.T
        self.feature_labels = lbls
示例#4
0
    def build(self):
        user_item_matrix = self.get_training_matrix()
        if self.implicit:
            # np.sign allows for negative values as well
            user_item_matrix.data = np.sign(user_item_matrix.data)

        with track_time(self.training_time,
                        verbose=self.verbose,
                        model=self.method):
            i2i_matrix = user_item_matrix.T.dot(
                user_item_matrix)  # gives CSC format
            i2i_matrix.setdiag(0)  # exclude "self-links"
            i2i_matrix.eliminate_zeros()

        self._i2i_matrix = i2i_matrix
示例#5
0
    def _update_cholesky_inplace(self, entity):
        entity_similarity = self.data.get_relations_matrix(entity)
        if self._sparse_mode:
            weight = self.features_weight
            beta = (1.0 - weight) / weight
            if self.verbose:
                print(
                    'Updating Cholesky decomposition inplace for {} similarity'
                    .format(entity))

            msg = Template('    Cholesky decomposition update time: $time')
            with track_time(verbose=self.verbose, message=msg):
                self._cholesky[entity].update_inplace(entity_similarity, beta)
        else:
            raise NotImplementedError
示例#6
0
    def build(self):
        self._model = LightFM(no_components=self.rank,
                              item_alpha=self.item_alpha,
                              user_alpha=self.user_alpha,
                              loss=self.loss,
                              learning_rate=self.learning_rate,
                              learning_schedule=self.learning_schedule,
                              max_sampled=self.max_sampled,
                              random_state=self.seed)
        fit = getattr(self._model, self.fit_method)

        matrix = self.get_training_matrix(
            sparse_format='coo')  # as reqired by LightFM

        try:
            item_index = self.data.index.itemid.training
        except AttributeError:
            item_index = self.data.index.itemid

        if self.item_features is not None:
            item_features = self.item_features.reindex(item_index.old.values,
                                                       fill_value=[])
            self._item_features_csr, self.item_features_labels = stack_features(
                item_features,
                add_identity=self.item_identity,
                normalize=self.normalize_item_features,
                dtype='f4')
        if self.user_features is not None:
            user_features = self.user_features.reindex(
                self.data.index.userid.training.old.values, fill_value=[])
            self._user_features_csr, self.user_features_labels = stack_features(
                user_features,
                add_identity=self.user_identity,
                normalize=self.normalize_user_features,
                dtype='f4')

        with track_time(self.training_time,
                        verbose=self.verbose,
                        model=self.method):
            fit(matrix,
                item_features=self._item_features_csr,
                user_features=self._user_features_csr,
                **self.fit_params)
示例#7
0
 def build(self):
     # define iALS model instance
     self._model = implicit.bpr.BayesianPersonalizedRanking(
         factors=self.rank,
         learning_rate=self.learning_rate,
         regularization=self.regularization,
         iterations=self.num_epochs,
         num_threads=self.num_threads,
         #random_state = self.random_state # doesn't support yet
     )
     self._model.random_state = self.random_state  # for future releases
     # prepare input matrix for learning the model
     matrix = self.get_training_matrix()  # user_by_item sparse matrix
     with track_time(self.training_time,
                     verbose=self.verbose,
                     model=self.method):
         # build the model
         # implicit takes item_by_user matrix as input, need to transpose
         self._model.fit(matrix.T, show_progress=self.show_progress)
示例#8
0
    def build(self):
        idx, val, shp = self.data.to_coo(tensor_mode=True)

        with track_time(self.training_time,
                        verbose=self.verbose,
                        model=self.method):
            (users_factors, items_factors, feedback_factors,
             core) = hooi(idx,
                          val,
                          shp,
                          self.mlrank,
                          growth_tol=self.growth_tol,
                          num_iters=self.num_iters,
                          verbose=self.show_output,
                          parallel_ttm=self.parallel_ttm,
                          seed=self.seed)

        self.factors[self.data.fields.userid] = users_factors
        self.factors[self.data.fields.itemid] = items_factors
        self.factors[self.data.fields.feedback] = feedback_factors
        self.factors['core'] = core
示例#9
0
    def _update_cholesky_factor(self, entity):
        entity_similarity = self.data.get_relations_matrix(entity)
        if entity_similarity is None:
            self._cholesky[entity] = None
        else:
            if self._sparse_mode:
                cholesky_decomp = cholesky_decomp_sparse
                mode = 'sparse'
            else:
                raise NotImplementedError

            weight = self.features_weight
            beta = (1.0 - weight) / weight
            if self.verbose:
                print('Performing {} Cholesky decomposition for {} similarity'.
                      format(mode, entity))

            msg = Template('Cholesky decomposition computation time: $time')
            with track_time(verbose=self.verbose, message=msg):
                self._cholesky[entity] = CholeskyFactor(
                    cholesky_decomp(entity_similarity, beta=beta))
示例#10
0
    def build(self):
        # define iALS model instance
        self._model = implicit.als.AlternatingLeastSquares(
            factors=self.rank,
            regularization=self.regularization,
            iterations=self.num_epochs,
            num_threads=self.num_threads)

        # prepare input matrix for learning the model
        matrix = self.get_training_matrix()  # user_by_item sparse matrix
        matrix.data = self.confidence(matrix.data,
                                      alpha=self.alpha,
                                      weight=self.weight_func,
                                      epsilon=self.epsilon)

        with track_time(self.training_time,
                        verbose=self.verbose,
                        model=self.method):
            # build the model
            # implicit takes item_by_user matrix as input, need to transpose
            self._model.fit(matrix.T)
示例#11
0
    def build(self, operator=None, return_factors='vh'):
        if operator is not None:
            svd_matrix = operator
        else:
            svd_matrix = self.get_training_matrix(dtype=np.float64)

        svd_params = dict(k=self.rank, return_singular_vectors=return_factors)

        with track_time(self.training_time,
                        verbose=self.verbose,
                        model=self.method):
            user_factors, sigma, item_factors = svds(svd_matrix, **svd_params)

        if user_factors is not None:
            user_factors = np.ascontiguousarray(user_factors[:, ::-1])
        if item_factors is not None:
            item_factors = np.ascontiguousarray(item_factors[::-1, :]).T
        if sigma is not None:
            sigma = np.ascontiguousarray(sigma[::-1])

        self.factors[self.data.fields.userid] = user_factors
        self.factors[self.data.fields.itemid] = item_factors
        self.factors['singular_values'] = sigma
示例#12
0
def mf_sgd_boilerplate(interactions,
                       shape,
                       nonzero_count,
                       rank,
                       lrate,
                       lambd,
                       num_epochs,
                       tol,
                       sgd_sweep_func=None,
                       transform=None,
                       transform_params=None,
                       adjust_gradient=None,
                       adjustment_params=None,
                       seed=None,
                       verbose=False,
                       iter_errors=None,
                       iter_time=None):
    assert isinstance(interactions, tuple)  # required by numba
    assert isinstance(nonzero_count, tuple)  # required by numba

    nrows, ncols = shape
    row_shp = (nrows, rank)
    col_shp = (ncols, rank)

    rnds = np.random if seed is None else np.random.RandomState(seed)
    row_factors = rnds.normal(scale=0.1, size=row_shp)
    col_factors = rnds.normal(scale=0.1, size=col_shp)

    sgd_sweep_func = sgd_sweep_func or generalized_sgd_sweep
    transform = transform or identity
    transform_params = transform_params or ((), ())
    adjust_gradient = adjust_gradient or identity
    adjustment_params = adjustment_params or ((), ())

    nnz = len(interactions[-1])
    last_err = np.finfo('f8').max
    training_time = []
    for epoch in range(num_epochs):
        if adjust_gradient in [adagrad, rmsprop]:
            adjustment_params = ((np.zeros(row_shp, dtype='f8'), ),
                                 (np.zeros(col_shp, dtype='f8'), ))
        if adjust_gradient is gnprop:
            adjustment_params = ((np.zeros(nrows, dtype='f8'), ),
                                 (np.zeros(ncols, dtype='f8'), ))
        if adjust_gradient is adam:
            adjustment_params = ((np.zeros(row_shp, dtype='f8'),
                                  np.zeros(row_shp, dtype='f8'),
                                  np.zeros(nrows, dtype='intp')),
                                 (np.zeros(col_shp, dtype='f8'),
                                  np.zeros(col_shp, dtype='f8'),
                                  np.zeros(ncols, dtype='intp')))

        with track_time(training_time, verbose=False):
            new_err = sgd_sweep_func(*interactions, row_factors, col_factors,
                                     lrate, lambd, *nonzero_count, transform,
                                     transform_params, adjust_gradient,
                                     adjustment_params)

        refined = abs(last_err - new_err) / last_err
        last_err = new_err
        rmse = sqrt(new_err / nnz)
        if iter_errors is not None:
            iter_errors.append(rmse)
        if verbose:
            print('Epoch: {}. RMSE: {}'.format(epoch, rmse))
        if refined < tol:
            break
    if iter_time is not None:
        iter_time.extend(training_time)
    return row_factors, col_factors