def format_URM_positive_user_compressed(URM: csr_matrix):
    """
    Format positive interactions of an URM in the way that is needed for the FM model.
    Here, however, users information are grouped w.r.t. items, meaning that, we will have:
    - We have #warm_items @row
    - We have #users+items+1 @cols
    - We have #(interactions)+(warm_items*2) @data

    Each row is representing a warm item and all users that interacted with that item are stored in that row.

    :param URM: URM to be preprocessed
    :return: preprocessed URM in sparse matrix csr format
    """

    warm_items_mask = np.ediff1d(URM.tocsc().indptr) > 0
    warm_items = np.arange(URM.shape[1])[warm_items_mask]

    new_train = URM.copy().tocoo()
    fm_matrix = coo_matrix((warm_items.size, URM.shape[0] + URM.shape[1] + 1),
                           dtype=np.int8)

    # Index offset
    item_offset = URM.shape[0]

    # Set up initial vectors
    row_v = np.zeros(new_train.data.size + (warm_items.size * 2))
    col_v = np.zeros(new_train.data.size + (warm_items.size * 2))
    data_v = np.zeros(new_train.data.size +
                      (warm_items.size * 2))  # Already ok, nothing to be added

    # For all the items, set up its content
    j = 0  # Index to scan and modify the vectors
    URM_train_csc = URM.copy().tocsc()
    for i, item in enumerate(warm_items):
        # Find all users who liked that item
        users_who_liked_item = URM_train_csc[:, item].indices
        offset = users_who_liked_item.size
        if offset > 0:
            col_v[j:j + offset] = users_who_liked_item
            row_v[j:j + offset] = i
            data_v[j:j + offset] = 1

            col_v[j + offset] = item + item_offset
            row_v[j + offset] = i
            data_v[j + offset] = 1

            col_v[j + offset + 1] = fm_matrix.shape[1] - 1
            row_v[j + offset + 1] = i
            data_v[j + offset + 1] = 1

            j = j + offset + 2
        else:
            raise RuntimeError("Illegal state")

    # Setting new information
    fm_matrix.row = row_v
    fm_matrix.col = col_v
    fm_matrix.data = data_v

    return fm_matrix.tocsr()
def search_hyperparameter_to_recommenders(urm_train_split: csr_matrix,
                                          urm_validation_split: csr_matrix,
                                          urm_test_split: csr_matrix,
                                          urm_impressions: csr_matrix,
                                          recommender: Type[BaseRecommender]):
    URM_train = urm_train_split.copy()
    URM_validation = urm_validation_split.copy()
    URM_test = urm_test_split.copy()
    URM_impressions = urm_impressions.copy()

    if any(not isspmatrix_csr(split) for split in
           [URM_train, URM_validation, URM_test, URM_impressions]):
        raise ValueError("The matrices are not all CSR matrices.")

    assert_implicit_data([URM_train, URM_validation, URM_test])
    assert_disjoint_matrices([URM_train, URM_validation, URM_test])

    if recommender_class.RECOMMENDER_NAME == Random.RECOMMENDER_NAME:
        evaluator_validation = EvaluatorHoldout(URM_validation,
                                                cutoff_list=[10],
                                                parallel=False)

        evaluator_test = EvaluatorHoldout(URM_test,
                                          cutoff_list=[5, 10, 20],
                                          parallel=False)
    else:
        evaluator_validation = EvaluatorHoldout(URM_validation,
                                                cutoff_list=[10],
                                                parallel=True,
                                                num_workers=NUM_WORKERS)

        evaluator_test = EvaluatorHoldout(URM_test,
                                          cutoff_list=[5, 10, 20],
                                          parallel=True,
                                          num_workers=NUM_WORKERS)

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        URM_train_last_test=URM_train + URM_validation,
        metric_to_optimize=METRIC_TO_OPTIMIZE,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=EXPERIMENTS_FOLDER_PATH,
        parallelizeKNN=False,
        allow_weighting=True,
        resume_from_saved=True,
        n_cases=NUM_CASES,
        n_random_starts=NUM_RANDOM_STARTS,
        URM_impressions=URM_impressions)

    try:
        runParameterSearch_Collaborative_partial(recommender)
    except Exception as e:
        logging.exception(f"On recommender {recommender} Exception {e}")
示例#3
0
def _distance_to_connectivity(distances: csr_matrix,
                              *,
                              max_value: float = None) -> csr_matrix:
    """Get a weighted adjacency matrix from a distance matrix.

    A distance of 1 (in the sparse matrix) corresponds to an actual distance of 0.
    An actual distance of 0 corresponds to a connectivity of 1.

    A distance of 0 (in the sparse matrix) corresponds to an actual distance of
    infinity. An actual distance of infinity corresponds to a connectivity of 0.

    Parameters
    ----------
    distances
        sparse distance matrix
    max_value
        The max_value is used to normalize the distances, i.e. distances
        are divided by this value. If not specified it will
        be the max. of the input matrix.
    """
    if not isinstance(distances, csr_matrix):
        raise ValueError("Distance matrix must be in CSR format.")

    if max_value is None:
        max_value = np.max(distances)

    connectivities = distances.copy()
    d = connectivities.data - 1

    # structure of the matrix stays the same, we can safely change the data only
    connectivities.data = (max_value - d) / max_value
    connectivities.eliminate_zeros()

    return connectivities
def sparse_clip(x: csr_matrix, min_, max_, inplace=False):
    if inplace:
        out = x
    else:
        out = x.copy()
    out.data = np.clip(x.data, min_, max_)
    return out
def dense_sparse_mul(a: np.array, b: csr_matrix, inplace=False):
    if inplace:
        out = b
    else:
        out = b.copy()
    out.data *= a[b.indices]
    return out
示例#6
0
def sparse_reshape(adj_matrix: sp.csr_matrix,
                   shape: tuple = None) -> sp.csr_matrix:
    """

    Parameters
    ----------
        adj_matrix: Scipy matrix or Numpy array or a list of them 
            Single or a list of Scipy sparse matrices or Numpy arrays.
        shape: new shape.

    Returns
    -------
        Single or a list of Scipy sparse matrix or Numpy matrices.

    See also
    ----------
        graphgallery.functional.SparseReshape          

    """
    if shape is None:
        return adj_matrix.copy()
    else:
        M1, N1 = shape
        M2, N2 = adj_matrix.shape
        assert (M1 >= M2) and (N1 >= N2)
        edge_index, edge_weight = sparse_adj_to_edge(adj_matrix)
        return sp.csr_matrix((edge_weight, edge_index), shape=shape)
示例#7
0
    def augmentURM(cls, URM_train: csr_matrix, W_sparse: csr_matrix,
                   threshold_interactions: int, threshold_similarity: float):
        """
        Augmentation of the URM train.

        :param threshold_interactions: here a threshold on the similarity is considered.
        Similarity matrix W_sparse will be considered for this purpose
        :param threshold_similarity: threshold used to insert a new row.
        In this case it is specified as the minimum number of interactions required to insert a new
        row in the URM train
        :param W_sparse: similarity matrix
        :param URM_train: URM train that will be augmented
        :return: a csr_matrix with augmented interactions according to the threshold
        """
        print("Augmenting URM")
        URM_train = URM_train.copy()

        # Count similarity
        count_W_sparse = URM_train.dot(URM_train.transpose())

        # Selecting new
        print("Selecting new candidates")
        users = np.arange(URM_train.shape[0])
        new_rows_list = []
        for i in range(0, users.size):
            if i % 5000 == 0:
                print("{} done in {}".format(i, users.size))
            candidates = count_W_sparse[i].indices  # users candidates
            data = count_W_sparse[i].data  # data for the candidates

            for j, candidate in enumerate(candidates):
                if candidate > i and data[
                        j] > threshold_interactions and W_sparse[
                            i, candidate] > threshold_similarity:
                    new_rows_list.append([i, candidate])

        print("Candidate list size: {}".format(len(new_rows_list)))

        # Creating the new matrix
        print("Creating new URM...", end="")
        new_URM = None
        for candidate in new_rows_list:
            new_row = URM_train[[candidate[0], candidate[1]]].sum(axis=0)
            new_row = csr_matrix(new_row)
            new_row.data[new_row.data > 1] = 1

            if new_URM is None:
                new_URM = new_row
            else:
                new_URM = vstack([new_URM, new_row], format="csr")

        if new_URM is None:
            new_URM = URM_train
        else:
            new_URM = vstack([URM_train, new_URM], format="csr")

        print("Done")

        return new_URM
示例#8
0
def print_results(urm_test_split: csr_matrix):

    urm_test = urm_test_split.copy()

    n_test_users = np.sum(np.ediff1d(urm_test.indptr) >= 1)

    result_loader = ResultFolderLoader(EXPERIMENTS_FOLDER_PATH,
                                       base_algorithm_list=None,
                                       other_algorithm_list=None,
                                       KNN_similarity_list=KNN_SIMILARITY_LIST,
                                       ICM_names_list=None,
                                       UCM_names_list=None)

    article_metrics_latex_results_filename = os.path.join(RESULTS_EXPORT_FOLDER_PATH,
                                                          "article_metrics_latex_results.txt")
    result_loader.generate_latex_results(article_metrics_latex_results_filename,
                                         metrics_list=["RECALL", "MAP"],
                                         cutoffs_list=METRICS_CUTOFF_TO_REPORT_LIST,
                                         table_title=None,
                                         highlight_best=True)

    beyond_accuracy_metrics_latex_results_filename = os.path.join(RESULTS_EXPORT_FOLDER_PATH,
                                                                  "beyond_accuracy_metrics_latex_results.txt")
    result_loader.generate_latex_results(beyond_accuracy_metrics_latex_results_filename,
                                         metrics_list=["DIVERSITY_MEAN_INTER_LIST",
                                                       "DIVERSITY_HERFINDAHL",
                                                       "COVERAGE_ITEM",
                                                       "DIVERSITY_GINI",
                                                       "SHANNON_ENTROPY"],
                                         cutoffs_list=OTHERS_CUTOFF_TO_REPORT_LIST,
                                         table_title=None,
                                         highlight_best=True)

    all_metrics_latex_results_filename = os.path.join(RESULTS_EXPORT_FOLDER_PATH,
                                                      "all_metrics_latex_results.txt")
    result_loader.generate_latex_results(all_metrics_latex_results_filename,
                                         metrics_list=["PRECISION",
                                                       "RECALL",
                                                       "MAP",
                                                       "MRR",
                                                       "NDCG",
                                                       "F1",
                                                       "HIT_RATE",
                                                       "ARHR",
                                                       "NOVELTY",
                                                       "DIVERSITY_MEAN_INTER_LIST",
                                                       "DIVERSITY_HERFINDAHL",
                                                       "COVERAGE_ITEM",
                                                       "DIVERSITY_GINI",
                                                       "SHANNON_ENTROPY"],
                                         cutoffs_list=OTHERS_CUTOFF_TO_REPORT_LIST,
                                         table_title=None,
                                         highlight_best=True)

    time_latex_results_filename = os.path.join(RESULTS_EXPORT_FOLDER_PATH,
                                               "time_latex_results.txt")
    result_loader.generate_latex_time_statistics(time_latex_results_filename,
                                                 n_evaluation_users=n_test_users,
                                                 table_title=None)
示例#9
0
 def _scale_X(cls, X: sps.csr_matrix, scheme: IALSConfigScaling,
              epsilon: float) -> sps.csr_matrix:
     if scheme is IALSConfigScaling.none:
         return X
     else:
         X_ret: sps.csr_matrix = X.copy()
         X_ret.data = np.log(1 + X_ret.data / epsilon)
         return X_ret
示例#10
0
 def __init__(self, URM: sp.csr_matrix, ICM, exclude_seen=True):
     if not sp.isspmatrix_csr(URM):
         raise TypeError(f"We expected a CSR matrix, we got {type(URM)}")
     self.URM = URM.copy()
     self.ICM = ICM.copy()
     self.predicted_URM = None
     self.exclude_seen = exclude_seen
     self.recommendations = None
示例#11
0
def format_URM_positive_non_compressed(URM: csr_matrix):
    """
    Format positive interactions of an URM in the way that is needed for the FM model.
    - We have #num_ratings row
    - The last column with all the ratings (for implicit dataset it just a col full of 1
    - In each row there are 3 interactions: 1 for the user, 1 for the item, and 1 for the rating
    - Only positive samples are encoded here

    Note: this method works only for implicit dataset

    :param URM: URM to be preprocessed
    :return: csr_matrix containing the URM preprocessed in the described way
    """
    new_train = URM.copy().tocoo()
    fm_matrix = sps.coo_matrix(
        (URM.data.size, URM.shape[0] + URM.shape[1] + 1), dtype=np.int8)

    # Index offset
    item_offset = URM.shape[0]

    # Last col
    last_col = URM.shape[0] + URM.shape[1]

    # Set up initial vectors
    row_v = np.zeros(new_train.data.size *
                     3)  # Row should have (i,i,i) repeated for all the size
    col_v = np.zeros(new_train.data.size * 3)  # This is the "harder" to set
    data_v = np.ones(new_train.data.size *
                     3)  # Already ok, nothing to be added

    # Setting row vector
    for i in range(0, new_train.data.size):
        row_v[3 * i] = i
        row_v[(3 * i) + 1] = i
        row_v[(3 * i) + 2] = i

    # Setting col vector
    for i in range(0, new_train.data.size):
        # Retrieving information
        user = new_train.row[i]
        item = new_train.col[i]

        # Fixing col indices to be added to the new matrix
        user_index = user
        item_index = item + item_offset

        col_v[3 * i] = user_index
        col_v[(3 * i) + 1] = item_index
        col_v[(3 * i) + 2] = last_col

    # Setting new information
    fm_matrix.row = row_v
    fm_matrix.col = col_v
    fm_matrix.data = data_v

    return fm_matrix.tocsr()
示例#12
0
 def get_sorted_best_item_indices(self, URM: sps.csr_matrix,
                                  target_column: np.ndarray,
                                  item_idx: int) -> np.ndarray:
     if self.sorted_indices is None:
         c_URM = URM.copy()
         c_URM.data **= 2
         variances = np.array(
             c_URM.mean(axis=0) - np.power(URM.mean(axis=0), 2)).flatten()
         sorted_indices = np.argsort(variances)[::-1]
         return sorted_indices
     return self.sorted_indices
示例#13
0
def test_jaccard(X: sps.csr_matrix) -> None:
    rec = JaccardKNNRecommender(X, shrinkage=0, top_k=X.shape[1], n_threads=1)
    rec.learn()
    sim = rec.W.toarray()
    X_bin = X.copy()
    X_bin.sort_indices()
    X_bin.data[:] = 1
    manual = X_bin.T.toarray()  # I x U
    norm = manual.sum(axis=1)
    manual = manual.dot(manual.T)
    denom = norm[:, None] + norm[None, :] - manual + 1e-6
    denom[denom <= 1e-10] = 1e-10
    manual = manual / denom
    np.fill_diagonal(manual, 0)
    np.testing.assert_allclose(sim, manual)
示例#14
0
文件: sparse.py 项目: ylytkin/myutils
def normalize_vectors(mx: sparse.csr_matrix, axis: int) -> sparse.csr_matrix:
    """Performs normalization of vectors (i.e. divide each vector
    by its corresponding Euclidean norm).

    Parameter `axis` can be 0 (column-vectors) or 1 (row-vectors)

    :param mx: sparse matrix
    :param axis: 0 or 1
    :return: sparse matrix
    """

    if axis not in {0, 1}:
        raise ValueError('Axis must be either 0 or 1.')

    mx = mx.copy().astype(np.float64)

    mx_norms = mx.copy()
    mx_norms.data **= 2
    mx_norms = mx_norms.sum(axis=axis).A.flatten()**0.5
    mx_norms = mx_norms[mx.nonzero()[1 - axis]]

    mx.data /= mx_norms

    return mx
示例#15
0
def add_UCM_info(fm_matrix: csr_matrix, UCM: csr_matrix, user_offset):
    """
    Given a matrix in the format needed to FM, it adds information concerning the UCM

    Note: no group by items should be applied in this case

    :param fm_matrix: matrix containing dataset for FM models (last column has no rating list)
    :param UCM: UCM information about users
    :param user_offset: starting column index for items in fm_matrix (should be 0)
    :return: new matrix containing also information about the UCM
    """
    fm_matrix_copy = fm_matrix.copy()
    user_fm_matrix = fm_matrix[:,
                               user_offset:user_offset + UCM.shape[0]].copy()
    UCM_fm_matrix = user_fm_matrix.dot(UCM)
    merged_fm = sps.hstack([fm_matrix_copy, UCM_fm_matrix], format="csr")
    return merged_fm
示例#16
0
def add_ICM_info(fm_matrix: csr_matrix, ICM: csr_matrix, item_offset):
    """
    Given a matrix in the format needed for FM, it adds information concerning the ICM

    Note: no group by users should be applied in this case

    :param fm_matrix: matrix concerning dataset for FM models (last column has no rating list)
    :param ICM: ICM information about items
    :param item_offset: starting column index for items in fm_matrix (it should be URM_train.shape[0]
                        of the URM used to construct the fm_matrix)
    :return: new matrix integrating ICM data
    """
    fm_matrix_copy = fm_matrix.copy()
    item_fm_matrix = fm_matrix[:,
                               item_offset:item_offset + ICM.shape[0]].copy()
    ICM_fm_matrix = item_fm_matrix.dot(ICM)
    merged_fm = sps.hstack([fm_matrix_copy, ICM_fm_matrix], format="csr")
    return merged_fm
示例#17
0
def _reorder_empty(
        X: sparse.csr_matrix,
        rows: bool = True,
        cols: bool = True,
        copy: bool = True) -> Tuple[sparse.csr_matrix, np.ndarray, np.ndarray]:
    if copy:
        Y = X.copy()
    else:
        Y = X
    p_row = None
    p_col = None
    if rows:
        p_row = _perm_first_axis(Y)
    if cols:
        p_col = _perm_first_axis(Y.tocsc())

    Y = _apply_perm(X, p_row=p_row, p_col=p_col)

    return Y, p_row, p_col
示例#18
0
    def precompute_best_item_indices(self, URM: sps.csr_matrix):
        URM = URM.copy()
        if self.feature_weighting == "BM25":
            URM = URM.astype(np.float32)
            URM = okapi_BM_25(URM)
            URM = check_matrix(URM, 'csr')

        elif self.feature_weighting == "TF-IDF":
            URM = URM.astype(np.float32)
            URM = TF_IDF(URM)
            URM = check_matrix(URM, 'csr')

        similarity = Compute_Similarity(URM,
                                        shrink=self.shrink,
                                        topK=self.topK,
                                        normalize=self.normalize,
                                        similarity="cosine")
        similarity_matrix = similarity.compute_similarity()
        self.sorted_indices = np.array(
            np.argsort(-similarity_matrix.todense(), axis=1))
示例#19
0
    def _initialize_parameters(self, X: csr_matrix):
        n, d = X.shape
        if self.use_biases:
            self.mu = X.data.mean()
        else:
            self.mu = 0
        if self.init_method == "svd":
            R = X.copy()
            if self.use_biases:
                R.data -= R.data.mean()
            u, s, vt = svds(R, k=self.K)
            self.U = u
            self.V = vt.T
        else:
            self.U = np.random.normal(scale=0.1, size=(n, self.K))
            self.V = np.random.normal(scale=0.1, size=(d, self.K))

        self.user_bias = np.zeros(n)
        self.item_bias = np.zeros(d)
        self.is_initialized = True
示例#20
0
def bandedLU(M: csr, ml, mu):
    """
    Computes standard LU decomposition of a class 'scipy.sparse.csr.csr_matrix'
    banded square matrix M with lower and upper bandwidths ml and mu, respectively.
    Returns L and U as sparse CSR matrices.
    """

    m = M.shape[0]
    u = M.copy()  # can remove to act directly on M

    # Allocating memory to store nnzl number of non-zero entries of L
    nnzl = int(m * (ml + 1) - ml * (ml + 1) / 2)
    l_row = np.zeros(nnzl).astype(np.int_)
    l_val = np.ones(nnzl).astype(M.dtype)

    for i in range(m):
        l_row[i] = i
    l_col = l_row.copy()
    count = i + 1  # counter for the next entry of L

    for k in range(m - 1):
        column_entries_ind = u.indptr[k] + (
            u.indices[u.indptr[k]:u.indptr[min(k + ml + 1, m)]]
            == k).nonzero()[0]
        for i, ind in enumerate(column_entries_ind[1:]):
            l = u.data[ind] / u.data[column_entries_ind[0]]
            l_val[count] = l
            l_col[count] = k
            l_row[count] = int(k + i + 1)
            count += 1

            b = min(mu + 1, m - k)
            u.data[ind + 1:ind + b] -= l * u.data[column_entries_ind[0] +
                                                  1:column_entries_ind[0] + b]
            u.data[ind] = 0.

    u.eliminate_zeros()
    l = csr((l_val, (l_row, l_col)))

    return l, u
示例#21
0
def format_URM_negative_sampling_user_compressed(URM: csr_matrix,
                                                 negative_rate=1,
                                                 check_replacement=False,
                                                 sampling_function=None):
    """
    Format negative interactions of an URM in the way that is needed for the FM model. Here, however, users
    and compressed w.r.t. the items they liked in the negative samples sampled

    In particular you will have:
    - #different_items_sampled @row
    - #users+items+1 @cols
    - #(negative_sample_size)*(different_items_sampled*2) @data

    :param URM: URM to be preprocessed  and from which negative samples are taken
    :param negative_rate: how much negatives samples do you want in proportion to the negative one
    :param check_replacement: whether to check for replacement or not. Checking costs time
    :param sampling_function: sampling function that takes in input the negative sample size
    and the URM from which samples are taken. If None, uniform sampling will be applied
    :return: csr_matrix containing the negative interactions:
    """
    negative_sample_size = int(URM.data.size * negative_rate)
    new_train = URM.copy().tocoo()
    item_offset = URM.shape[0]

    print("Start sampling...")

    if sampling_function is None:
        collected_samples = uniform_sampling_strategy(
            negative_sample_size=negative_sample_size,
            URM=URM,
            check_replacement=check_replacement)
    else:
        collected_samples = sampling_function(
            negative_sample_size=negative_sample_size,
            URM=URM,
            check_replacement=check_replacement)
    # Different items sampled
    different_items_sampled = np.unique(collected_samples[1])

    fm_matrix = coo_matrix(
        (different_items_sampled.size, URM.shape[0] + URM.shape[1] + 1),
        dtype=np.int8)

    row_v = np.zeros(new_train.data.size + (different_items_sampled.size * 2))
    col_v = np.zeros(new_train.data.size + (different_items_sampled.size * 2))
    data_v = np.zeros(new_train.data.size + (different_items_sampled.size * 2))

    print("Matrix builiding...", end="")

    # For all the items, set up its content
    j = 0  # Index to scan and modify the vectors
    URM_train_csc = URM.copy().tocsc()
    for i, item in enumerate(different_items_sampled):
        # Find all users sampled for that item
        item_mask = collected_samples[1] == item
        users_sampled_for_that_item = np.unique(
            collected_samples[0][item_mask])

        offset = users_sampled_for_that_item.size
        if offset > 0:
            col_v[j:j + offset] = users_sampled_for_that_item
            row_v[j:j + offset] = i
            data_v[j:j + offset] = 1

            col_v[j + offset] = item + item_offset
            row_v[j + offset] = i
            data_v[j + offset] = 1

            col_v[j + offset + 1] = fm_matrix.shape[1] - 1
            row_v[j + offset + 1] = i
            data_v[j + offset + 1] = 1

            j = j + offset + 2
        else:
            raise RuntimeError("Illegal state")

    print("Done")

    # Setting new information
    fm_matrix.row = row_v
    fm_matrix.col = col_v
    fm_matrix.data = data_v

    return fm_matrix.tocsr()
 def __init__(self, URM: sp.csr_matrix, ICM, exclude_seen=True, k=3):
     super().__init__(URM.copy(), ICM, exclude_seen)
     self.k = k
def sparse_sub_with_clip(a: csr_matrix, c):
    out = a.copy()
    out.data -= c
    return sparse_pos_clip(out)
示例#24
0
 def __init__(self, P: csr_matrix):
     self.P = P.copy()
     self.exact_errors = []
     self.bh_errors = []
     self.fft_errors = []
 def _eliminate(matrix: sp.csr_matrix, user_indices, item_indices):
     matrix = matrix.copy()
     matrix[user_indices, item_indices] = 0
     matrix.eliminate_zeros()
     return matrix
示例#26
0
 def _eliminate(matrix: sp.csr_matrix, user_indices, item_indices):
     matrix = matrix.copy()
     # `lil_matrix` is too slow
     matrix[list(user_indices), list(item_indices)] = 0
     matrix.eliminate_zeros()
     return matrix
示例#27
0
 def precompute_best_item_indices(self, URM: sps.csr_matrix):
     c_URM = URM.copy()
     c_URM.data **= 2
     variances = np.array(
         c_URM.mean(axis=0) - np.power(URM.mean(axis=0), 2)).flatten()
     self.sorted_indices = np.argsort(variances)[::-1]