示例#1
0
    def fit(self, keyToSeriesDF):

        # assign data to class variables

        self._assignData(keyToSeriesDF)
        obs = self.matrix.flatten('F')
        obs_matrix = self.matrix.copy()
        # now produce a thresholdedthresholded/de-noised matrix. this will over-write the original data matrix
        svdMod = SVD(self.matrix, method='numpy')
        (self.sk, self.Uk,
         self.Vk) = svdMod.reconstructMatrix(self.kSingularValues,
                                             returnMatrix=False)
        if self.kSingularValues is None:
            self.kSingularValues = len(self.sk)

        if self.SSVT: self.soft_threshold = svdMod.next_sigma
        # set weights
        self.matrix = tsUtils.matrixFromSVD(self.sk,
                                            self.Uk,
                                            self.Vk,
                                            self.soft_threshold,
                                            probability=self.p)
        for i in range(self.no_ts):
            obs = obs_matrix[:, i::self.no_ts].flatten('F')
            self.imputation_model_score[i] = r2_score(obs,
                                                      self.denoisedTS(ts=i))
        self._computeWeights()
示例#2
0
    def reconstructMatrix(self, kSingularValues, returnMatrix=False):

        (sk, Uk, Vk) = self.decomposeTopK(kSingularValues)
        if (returnMatrix == True):
            return tsUtils.matrixFromSVD(sk, Uk, Vk)
        else:
            return (sk, Uk, Vk)
示例#3
0
    def updateSVD(self, D, method='UP'):
        assert (len(D) % self.N == 0)
        if (self.fill_in_missing == True):
            # impute with the least informative value (middle)
            D = pd.DataFrame(D).fillna(method='ffill').values
            D = pd.DataFrame(D).fillna(method='ffill').values

        else:
            D[np.isnan(D)] = 0
        D = D.reshape([self.N, int(len(D) / self.N)], order='F')

        assert D.shape[0] == self.N
        assert D.shape[1] <= D.shape[0]

        if method == 'UP':
            self.Uk, self.sk, self.Vk = tsUtils.updateSVD2(
                D, self.Uk, self.sk, self.Vk)
            self.M = self.Vk.shape[0]
            self.Ukw, self.skw, self.Vkw = tsUtils.updateSVD2(
                D[:-1, :], self.Ukw, self.skw, self.Vkw)

        elif method == 'folding-in':
            self.Uk, self.sk, self.Vk = tsUtils.updateSVD(
                D, self.Uk, self.sk, self.Vk)
            self.M = self.Vk.shape[0]
            self.Ukw, self.skw, self.Vkw = tsUtils.updateSVD(
                D[:-1, :], self.Ukw, self.skw, self.Vkw)
        # elif method == 'Full':
        #     raise ValueError
        #     self.matrix = np.concatenate((self.matrix,D),1)
        #     U, S, V = np.linalg.svd(self.matrix, full_matrices=False)
        #     self.sk = S[0:self.kSingularValues]
        #     self.Uk = U[:, 0:self.kSingularValues]
        #     self.Vk = V[0:self.kSingularValues,:]
        #     self.Vk = self.Vk.T
        #     self.M = self.Vk.shape[0]
        else:
            raise ValueError

        self.matrix = tsUtils.matrixFromSVD(self.sk,
                                            self.Uk,
                                            self.Vk,
                                            self.soft_threshold,
                                            probability=self.p)
        self.lastRowObservations = self.matrix[-1, :]
        self.TimesUpdated += 1
        newMatrixPInv = tsUtils.pInverseMatrixFromSVD(
            self.skw,
            self.Ukw,
            self.Vkw,
            soft_threshold=self.soft_threshold,
            probability=self.p)
        self.weights = np.dot(newMatrixPInv.T, self.lastRowObservations.T)
示例#4
0
 def denoisedTS(self, ind=None, range=True, return_=True, ts=None):
     if self.matrix is None:
         self.matrix = tsUtils.matrixFromSVD(self.sk,
                                             self.Uk,
                                             self.Vk,
                                             self.soft_threshold,
                                             probability=self.p)
     if not return_:
         return
     if ts is None:
         NewColsDenoised = self.matrix.flatten('F')
     else:
         NewColsDenoised = self.matrix[:, ts::self.no_ts].flatten('F')
     if ind is None:
         return NewColsDenoised
     if range:
         assert len(ind) == 2
         return NewColsDenoised[ind[0]:ind[1]]
     else:
         return NewColsDenoised[ind]
示例#5
0
    def _computeWeights(self):

        ### This is now the same as ALS
        ## this is an expensive step because we are computing the SVD all over again
        ## however, currently, there is no way around it since this is NOT the same matrix as the full
        ## self.matrix, i.e. we have fewer (or just one less) rows

        if (self.lastRowObservations is None):
            raise Exception(
                'Do not call _computeWeights() directly. It should only be accessed via class methods.'
            )

        # need to decide how to produce weights based on whether the N'th data points are to be included for the other time series or not
        # for the seriesToPredictKey we only look at the past. For others, we could be looking at the current data point in time as well.

        matrixDim1 = (self.N * len(self.otherSeriesKeysArray)) + self.N - 1
        matrixDim2 = np.shape(self.matrix)[1]
        eachTSRows = self.N

        if (self.includePastDataOnly == False):
            newMatrix = self.matrix[0:matrixDim1, :]

        else:
            matrixDim1 = (
                (self.N - 1) * len(self.otherSeriesKeysArray)) + self.N - 1
            eachTSRows = self.N - 1

            newMatrix = np.zeros([matrixDim1, matrixDim2])

            rowIndex = 0
            matrixInd = 0

            while (rowIndex < matrixDim1):
                newMatrix[rowIndex:rowIndex +
                          eachTSRows] = self.matrix[matrixInd:matrixInd +
                                                    eachTSRows]

                rowIndex += eachTSRows
                matrixInd += self.N

        svdMod = SVD(newMatrix, method='numpy')
        (self.skw, self.Ukw,
         self.Vkw) = svdMod.reconstructMatrix(self.kSingularValues,
                                              returnMatrix=False)
        soft_threshold = 0
        if self.SSVT: soft_threshold = svdMod.next_sigma
        matrix = tsUtils.matrixFromSVD(self.skw,
                                       self.Ukw,
                                       self.Vkw,
                                       soft_threshold=soft_threshold,
                                       probability=self.p)
        newMatrixPInv = tsUtils.pInverseMatrixFromSVD(
            self.skw,
            self.Ukw,
            self.Vkw,
            soft_threshold=soft_threshold,
            probability=self.p)
        self.weights = np.dot(newMatrixPInv.T, self.lastRowObservations)
        for i in range(self.no_ts):
            self.forecast_model_score[i] = r2_score(
                self.lastRowObservations[i::self.no_ts] / self.p,
                np.dot(matrix[:, i::self.no_ts].T, self.weights))