Пример #1
0
    def apply_zca(x, U=None, S=None, ldj=None):
        """Assumes x has been zero-centered"""
        x_shp = x.shape
        x = x.reshape(x_shp[0], -1)

        # singular value decomposition
        if U is None:
            assert S is None, "ZCA rotation matrix is None, but scale vector is not None"
            cov = np.cov(x, rowvar=False)  # cov is (N, N)
            cov = cov.astype(np.float32)
            # U, S, _ = np.linalg.svd(cov)  # U is (N, N), S is (N,)
            U, S, _ = scipy_svd(cov,
                                overwrite_a=True)  # U is (N, N), S is (N,)

        # build the ZCA matrix
        epsilon = 1e-5
        zca_matrix = np.dot(U, np.dot(np.diag(1.0 / np.sqrt(S + epsilon)),
                                      U.T))  # (N,N)

        # transform the image data
        z = np.dot(x, zca_matrix)  # zca is (N, d)

        if ldj is None:
            sgn, ldj = np.linalg.slogdet(zca_matrix)
            assert sgn == 1, "Sign of logdetjacobian of zca matrix is not positive"

        return z.reshape(x_shp), U, S, ldj
Пример #2
0
    def update(self, x_bat):
        n_samples = x_bat.shape[0]
        x_bat_indx = 0
        while self.empty_indx + n_samples > self.sketch_len:
            n_inserted = self.sketch_len - self.empty_indx
            x_bat_indx_new = x_bat_indx + n_inserted
            self.sketch_mat[self.empty_indx:, :] = x_bat[
                x_bat_indx:x_bat_indx_new, :] - self.mean
            x_bat_indx = x_bat_indx_new

            try:
                U, s, Vh = svd(self.sketch_mat, full_matrices=False)
            except:
                U, s, Vh = scipy_svd(self.sketch_mat, full_matrices=False)

            s_len = s.shape[0]
            half_ell = self.sketch_len // 2
            if s_len >= half_ell:
                s[:half_ell] = np.sqrt(s[:half_ell]**2 - s[half_ell]**2)
                s[half_ell:] = 0.0
                self.sketch_mat[:half_ell, :] = np.dot(diag(s[:half_ell]),
                                                       Vh[:half_ell, :])
                self.sketch_mat[half_ell:, :] = 0
                self.empty_indx = half_ell

        n_inserted = n_samples - x_bat_indx
        if n_inserted > 0:
            empty_indx_new = self.empty_indx + n_inserted
            self.sketch_mat[self.empty_indx:empty_indx_new, :] = x_bat[
                x_bat_indx:, :] - self.mean
            self.empty_indx = empty_indx_new
Пример #3
0
def svd(X):
    """
	Computes the singular value decomposition of a matrix.
	Uses scipy when use_gpu = False, else pytorch is used.
	"""
    if use_gpu: return svd(X)
    return scipy_svd(X, full_matrices=False, check_finite=False)
Пример #4
0
    def get(self, rotate=False, take_root=True):
        if rotate:
            try:
                [_, s, Vt] = np.linalg.svd(self._sketch, full_matrices=False)
            except np.linalg.LinAlgError:
                [_, s, Vt] = scipy_svd(self._sketch, full_matrices=False)
            if take_root:
                return np.diag(np.sqrt(s[:self.d])) @ Vt[:self.d, :]
            else:
                return np.diag(s[:self.d]) @ Vt[:self.d, :]

        return self._sketch[:self.d, :]
Пример #5
0
    def __rotate__(self):
        try:
            [_, s, Vt] = np.linalg.svd(self._sketch, full_matrices=False)
        except np.linalg.LinAlgError:
            [_, s, Vt] = scipy_svd(self._sketch, full_matrices=False)
        #[_,s,Vt] = scipy_svds(self._sketch, k = self.d)

        sShrunk = np.sqrt(s[:self.d]**2 - s[self.d - 1]**2)
        self._sketch[:self.d:, :] = np.dot(
            np.diag(sShrunk), Vt[:self.d, :])
        self._sketch[self.d:, :] = 0
        self.nextZeroRow = self.d
Пример #6
0
def reconstruct(data, n):
    """ Reconstructs data with n singular components.

    Parameters
    ----------
    data : np.array
        Data matrix subjected to SVD. Assuming *m x n* with m as frequency
        and n as time. But it is actually not important.
    n : int, list or np.array
        Number of used SVD components.
        If a list or array is provided, non pythonic way of numbering is used.
        Meaning first component equals 1.

    Returns
    -------
    res : *mysvd.results*
        Results object.
    """

    # noinspection PyTupleAssignmentBalance
    u, s, vt = scipy_svd(data)
    nlist = []
    if type(n) == int:
        nlist = list(range(n))
    elif type(n) == list:
        n = np.array(n)
        nlist = n - 1
    elif type(n) == np.ndarray:
        nlist = n - 1

    if any(i < 0 for i in nlist) is True:
        raise ValueError('Please chose just positive singular values')
    if len(set(nlist)) != len(nlist):
        raise ValueError('Please choose different singular values.')

    # create m x n singular values matrix
    sigma = np.zeros((u.shape[0], vt.shape[0]))
    sigma[:s.shape[0], :s.shape[0]] = np.diag(s)

    # reconstruct data
    svddata = u[:, nlist].dot(sigma[nlist, :].dot(vt))

    res = Results()
    res.data = data
    res.u = u
    res.s = s
    res.vt = vt
    res.n = n
    res.svddata = svddata

    return res
Пример #7
0
    def add(self, vector):
        if count_nonzero(vector) == 0:
            return

        # If the approximate matrix is full, call the operate method to free half of the columns
        if self.emptyRows <= 0:
            [self.U, self.S, self.Vt] = scipy_svd(self.sketchMatrix,
                                                  full_matrices=True)
            self.reduceRank()

        # Push the new vector to the next zero row and increase the next zero row index
        self.sketchMatrix[self.nextZeroRow, :] = vector
        self.nextZeroRow += 1
        self.emptyRows -= 1
Пример #8
0
    def __rotate__(self):
        try:
            [_, s, Vt] = svd(self._sketch, full_matrices=False)
        except LinAlgError as err:
            [_, s, Vt] = scipy_svd(self._sketch, full_matrices=False)

        if len(s) >= self.ell:
            sShrunk = sqrt(s[:self.ell]**2 - s[self.ell - 1]**2)
            self._sketch[:self.ell:, :] = dot(diag(sShrunk), Vt[:self.ell, :])
            self._sketch[self.ell:, :] = 0
            self.nextZeroRow = self.ell
        else:
            self._sketch[:len(s), :] = dot(diag(s), Vt[:len(s), :])
            self._sketch[len(s):, :] = 0
            self.nextZeroRow = len(s)
    def __rotate__(self):
        try:
            [_,s,Vt] = svd(self._sketch , full_matrices=False)
        except LinAlgError as err:
            [_,s,Vt] = scipy_svd(self._sketch, full_matrices = False)

        if len(s) >= self.ell:
            sShrunk = sqrt(s[:self.ell]**2 - s[self.ell-1]**2)
            self._sketch[:self.ell:,:] = dot(diag(sShrunk), Vt[:self.ell,:])
            self._sketch[self.ell:,:] = 0
            self.nextZeroRow = self.ell
        else:
            self._sketch[:len(s),:] = dot(diag(s), Vt[:len(s),:])
            self._sketch[len(s):,:] = 0
            self.nextZeroRow = len(s)
Пример #10
0
    def fit(self, df: pd.DataFrame, remove_evaluated_items: bool = True):
        # set values
        self.user_id2idx = {
            user_id: index
            for index, user_id in enumerate(df[self.user_col].unique())
        }
        item_id2idx = {
            item_id: index
            for index, item_id in enumerate(df[self.item_col].unique())
        }
        self.item_idx2id = {
            index: item_id
            for item_id, index in item_id2idx.items()
        }

        row = [self.user_id2idx[user_id] for user_id in df[self.user_col]]
        col = [item_id2idx[item_id] for item_id in df[self.item_col]]
        ratings = sparse.coo_matrix((df[self.rate_col], (row, col)))

        # fill values
        rating_df = pd.DataFrame(ratings.T.toarray())
        for index in rating_df.index:
            row = rating_df.loc[index]
            row[row == 0] = row[row > 0].mean()
            rating_df.loc[index] = row

        # calc relations
        U, s, Vh = scipy_svd(rating_df.values.T)
        del rating_df
        gc.collect()

        U.resize((U.shape[0], self.rank))
        s = sparse.diags(s[:self.rank])
        Vh.resize((self.rank, Vh.shape[1]))

        self.r = sparse.csr_matrix(U.dot(s.dot(Vh)))
        del U
        del s
        del Vh
        gc.collect()

        # remove evaluated items
        if remove_evaluated_items:
            self.r = self.r.multiply((self.r > 0) - (ratings > 0))
            self.r.eliminate_zeros()

        return self
Пример #11
0
def wrapper_svd(data):
    """ Simple wrapper for the *scipy.linalg.svd()* function.

    Parameters
    ----------
    data : np.array
        Data matrix subjected to SVD. Assuming *m x n* with m as frequency
        and n as time. But it is actually not important.

    Returns
    -------
    u : np.array
        U matrix. Represents abstract spectra
    s : np.array
        Singular values.
    vt: np.array
        Transposed V matrix. Represents abstract time traces.
    """

    # noinspection PyTupleAssignmentBalance
    u, s, vt = scipy_svd(data)
    return u, s, vt
Пример #12
0
    def fit(self, X, Y, target, output_dimensions):

        self.output_dimensions = output_dimensions

        X, Y = self.preprocessing(X, Y, target)
        X_shape = X.shape
        Y_shape = Y.shape

        #Zero mean X and Y
        X_hat = X - X.mean(axis=1, keepdims=True)
        Y_hat = Y - Y.mean(axis=1, keepdims=True)

        class_freq = dict(Counter(target))
        N = len(target)
        print(X.shape, Y.shape)
        '''
        Creating block diagonal matrix A
        A=[[1](n1*n1)
                    [1](n2*n2)
                            ...
                                ...
                                    ...

                                        [1](nc*nc) ]
        '''
        i = 0
        A = np.array([])
        cumulative_co = 0
        for c in class_freq:
            for j in range(class_freq[c]):
                new_row = np.concatenate(
                    (np.zeros(cumulative_co), np.ones(class_freq[c]),
                     np.zeros(N - cumulative_co - class_freq[c])),
                    axis=0)
                if (len(A) == 0):
                    A = new_row
                else:
                    A = np.vstack([A, new_row])
            cumulative_co += class_freq[c]
            i += 1

        self.C_W = np.matmul(np.matmul(
            X_hat, A), Y_hat.transpose())  #Within class similarity matrix
        self.C_B = -(self.C_W)  #Between class similarity matrix

        Sigma_xy = self.C_W / N
        Sigma_yx = np.matmul(np.matmul(Y_hat, A), X_hat.T) / N
        '''
        regularizing Sigma_xx and Sigma_yy
        '''
        rx = 1e-4  #regulazisation coefficient for x
        ry = 1e-4  #regulazisation coefficient for y
        Sigma_xx = np.matmul(X_hat, X_hat.T) / N + rx * np.identity(X_shape[0])
        Sigma_yy = np.matmul(Y_hat, Y_hat.T) / N + ry * np.identity(Y_shape[0])
        '''
        Finding inverse square root of  Sigma_xx and Sigma_yy
        using A^(-1/2)= PΛ^(-1/2)P'
        where
        P is matrix containing Eigen vectors of A in row form
        Λ is diagonal matrix containing eigen values in diagonal
        '''
        [eigen_values_xx, eigen_vectors_matrix_xx] = np.linalg.eigh(Sigma_xx)
        [eigen_values_yy, eigen_vectors_matrix_yy] = np.linalg.eigh(Sigma_yy)
        Sigma_xx_root_inverse = np.dot(
            np.dot(eigen_vectors_matrix_xx, np.diag(eigen_values_xx**-0.5)),
            eigen_vectors_matrix_xx.T)
        Sigma_yy_root_inverse = np.dot(
            np.dot(eigen_vectors_matrix_yy, np.diag(eigen_values_yy**-0.5)),
            eigen_vectors_matrix_yy.T)

        T = np.matmul(np.matmul(Sigma_xx_root_inverse, Sigma_xy),
                      Sigma_yy_root_inverse)

        U, S, V = scipy_svd(T)

        self.wx = np.dot(Sigma_xx_root_inverse, U[:, 0:self.output_dimensions])
        self.wy = np.dot(Sigma_yy_root_inverse, V[:, 0:self.output_dimensions])

        return None
Пример #13
0
movies_df = movies_df.assign(movieId=pd.to_numeric(
    movies_df.id, errors='coerce').fillna(-1).astype('int64'))

movies_df = ratings_df.merge(movies_df, on='movieId')

#Commented movies pivoted, because could not get column names as movie names
#movies_pivoted = movies_df.pivot(index='userId', columns='movieId', values='rating')
ratings_pivoted = ratings_df.pivot(index='userId',
                                   columns='movieId',
                                   values='rating')

#movie_df_pivoted = movies_pivoted.fillna(0)
ratings_df_pivoted = ratings_pivoted.fillna(0)

#U, Sigma, VT = scipy_svd(movie_df_pivoted)
U, Sigma, VT = scipy_svd(ratings_df_pivoted)

user1 = U[0]

user_v = user1.reshape(1, -1)
heap = []
for i, row in enumerate(U):
    v_row = row.reshape(1, -1)
    heappush(heap, (cosine_similarity(user_v, v_row)[0][0], i))

print(nsmallest(10, heap))

############################################################################

user_id = 236
user_seen_movies_df = ratings_df.groupby('userId').get_group(user_id)[[
Пример #14
0
def show_svs(data, time, wn):
    """ Plots singular values and variance explained.

    Parameters
    ----------
    data : np.array
        Data matrix subjected to SVD. Assuming *m x n* with m as frequency
        and n as time. But it is actually not important.
    time : np.array
        Time array.
    wn : np.array
        Frequency array.
    """

    data, time, wn = pclasses.check_input(data, time, wn)

    # noinspection PyTupleAssignmentBalance
    u, s, vt = scipy_svd(data)
    eig = s**2 / np.sum(s**2)

    if s.size < 8:
        raise RuntimeError('Too less singular values!')
    if s.size < 15:
        num = s.size
    else:
        num = 15
    numlist = list(range(1, num + 1))
    varlimits = [0.8, 0.95, 0.995]
    colors = ['red', 'orange', 'forestgreen']
    fig, axs = plt.subplots(1, 2)
    fig.suptitle('First %i singular values' % num)
    axs[0].plot(numlist, s[:num], 'o-')
    axs[0].set_title('Singular values')
    axs[0].set_ylabel('|s|')
    axs[1].plot(numlist, np.cumsum(eig[:num]) * 100, 'o-')
    axs[1].set_title('Cummulative variance explained')
    axs[1].set_ylabel('variance explained / %')
    for i, limit in enumerate(varlimits):
        axs[1].plot(numlist, np.ones(num) * limit * 100, '--', color=colors[i])
        svs = np.where(np.cumsum(eig) >= limit)[0][0] + 1
        print('%.1f %% variance explained by %i singular values' %
              (limit * 100, svs))

    fig, axs = plt.subplots(2, 4)
    fig.suptitle('Abstract spectra')
    r = 0
    offset = 0
    for i in range(8):
        if i == 4:
            r = 1
            offset = 4
        axs[r, i - offset].plot(wn, u[:, i])

    fig, axs = plt.subplots(2, 4)
    fig.suptitle('Abstract time traces')
    r = 0
    offset = 0
    for i in range(8):
        if i == 4:
            r = 1
            offset = 4
        axs[r, i - offset].plot(time.T, vt[i, :])
        axs[r, i - offset].set_xscale('log')