Пример #1
0
def calculate_Q(X: csc_matrix, update_in_place: bool = True) -> np.ndarray:
    """
    :param X: a CSC matrix of shape (rows=features, cols=observations).
    :param update_in_place: whether or not to return a new Numpy array or modify the input X.
    :return: the word-word correlation matrix Q as a dense Numpy ndarray.
    """

    n_features, n_observations = X.shape

    diagonal = np.zeros(n_features)

    if not update_in_place:
        X = X.copy()

    for col_idx in range(X.indptr.size - 1):
        col_start = X.indptr[col_idx]
        col_end = X.indptr[col_idx + 1]

        col_entries = X.data[col_start:col_end]
        col_sum = np.sum(col_entries)

        row_indices = X.indices[col_start:col_end]

        # TODO: figure out whether this loop and update by division can be written in more idiomatic Numpy
        diagonal[row_indices] += col_entries / (col_sum * (col_sum - 1))
        X.data[col_start:col_end] = col_entries / sqrt(col_sum * (col_sum - 1))

    Q = X * X.T / n_observations
    Q = np.array(Q.todense(), copy=False)

    diagonal = diagonal / n_observations
    Q = Q - np.diag(diagonal)

    return Q
def SLIM_parallel(A: sp.csc_matrix, alpha: float, l1_ratio: float,
                  max_iter: int):
    '''
    SLIM: Sparse Linear Methods for Top-N Recommender Systems -  Xia Ning ; George Karypis
    https://ieeexplore.ieee.org/document/6137254
    Run Sparse Linear Models over the rating matrix A.
    (code from https://github.com/ruhan/toyslim/blob/master/slim_parallel.py)

    :param A: Rating matrix nxm where m in the number of items. Must be a csc_matrix
    :param alpha: a + b (a and b are the multipliers of the L1 and L2 penalties, respectively)
    :param l1_ratio: a / (a + b) (a and b are the multipliers of the L1 and L2 penalties, respectively)
    :param max_iter: number of iterations to run max for each item
    :return: W weight matrix.
    '''
    warnings.simplefilter("ignore")

    n_items = A.shape[1]

    ranges = generate_slices(n_items)
    separated_tasks = []

    for from_j, to_j in ranges:
        separated_tasks.append(
            [from_j, to_j, A.copy(), alpha, l1_ratio, max_iter])

    with multiprocessing.Pool() as pool:
        results = pool.map(work, separated_tasks)

    W_rows_idxs = list(itertools.chain(*[x[0] for x in results]))
    W_cols_idxs = list(itertools.chain(*[x[1] for x in results]))
    W_data = list(itertools.chain(*[x[2] for x in results]))

    W = sp.csr_matrix((W_data, (W_rows_idxs, W_cols_idxs)),
                      shape=(n_items, n_items))

    return W
Пример #3
0
    def __init__(self, x: np.ndarray, sg: np.ndarray, hess: np.ndarray,
                 scaling: csc_matrix, g_dscaling: csc_matrix, delta: float,
                 theta: float, ub: np.ndarray, lb: np.ndarray, logger: Logger):
        """

        :param x:
            Reference point
        :param sg:
            Gradient in rescaled coordinates
        :param hess:
            Hessian in unscaled coordinates
        :param scaling:
            Matrix that defines scaling transformation
        :param g_dscaling:
            Unscaled gradient multiplied by derivative of scaling
            transformation
        :param delta:
            Trust region Radius in scaled coordinates
        :param theta:
            Stepback parameter that controls how close steps are allowed to
            get to the boundary
        :param ub:
            Upper boundary
        :param lb:
            Lower boundary

        """
        self.x: np.ndarray = x

        self.s: Union[np.ndarray, None] = None
        self.sc: Union[np.ndarray, None] = None
        self.ss: Union[np.ndarray, None] = None

        self.og_s: Union[np.ndarray, None] = None
        self.og_sc: Union[np.ndarray, None] = None
        self.og_ss: Union[np.ndarray, None] = None

        self.sg: np.ndarray = sg.copy()
        self.scaling: csc_matrix = scaling.copy()

        self.delta: float = delta
        self.theta: float = theta

        self.lb: np.ndarray = lb
        self.ub: np.ndarray = ub

        self.br: np.ndarray = np.ones(sg.shape)
        self.minbr: float = 1.0
        self.alpha: float = 1.0
        self.iminbr: np.ndarray = np.array([])

        self.qpval: float = 0.0

        # B_hat (Eq 2.5) [ColemanLi1996]
        self.shess: np.ndarray = np.asarray(scaling * hess * scaling +
                                            g_dscaling)

        self.cg: Union[np.ndarray, None] = None
        self.chess: Union[np.ndarray, None] = None
        self.subspace: Union[np.ndarray, None] = None

        self.s0: np.ndarray = np.zeros(sg.shape)
        self.ss0: np.ndarray = np.zeros(sg.shape)

        self.reflection_indices: set = set()
        self.truncation_indices: set = set()
        self.logger: Logger = logger