def fit(self, keyToSeriesDF): # assign data to class variables self._assignData(keyToSeriesDF) obs = self.matrix.flatten('F') obs_matrix = self.matrix.copy() # now produce a thresholdedthresholded/de-noised matrix. this will over-write the original data matrix svdMod = SVD(self.matrix, method='numpy') (self.sk, self.Uk, self.Vk) = svdMod.reconstructMatrix(self.kSingularValues, returnMatrix=False) if self.kSingularValues is None: self.kSingularValues = len(self.sk) if self.SSVT: self.soft_threshold = svdMod.next_sigma # set weights self.matrix = tsUtils.matrixFromSVD(self.sk, self.Uk, self.Vk, self.soft_threshold, probability=self.p) for i in range(self.no_ts): obs = obs_matrix[:, i::self.no_ts].flatten('F') self.imputation_model_score[i] = r2_score(obs, self.denoisedTS(ts=i)) self._computeWeights()
def reconstructMatrix(self, kSingularValues, returnMatrix=False): (sk, Uk, Vk) = self.decomposeTopK(kSingularValues) if (returnMatrix == True): return tsUtils.matrixFromSVD(sk, Uk, Vk) else: return (sk, Uk, Vk)
def updateSVD(self, D, method='UP'): assert (len(D) % self.N == 0) if (self.fill_in_missing == True): # impute with the least informative value (middle) D = pd.DataFrame(D).fillna(method='ffill').values D = pd.DataFrame(D).fillna(method='ffill').values else: D[np.isnan(D)] = 0 D = D.reshape([self.N, int(len(D) / self.N)], order='F') assert D.shape[0] == self.N assert D.shape[1] <= D.shape[0] if method == 'UP': self.Uk, self.sk, self.Vk = tsUtils.updateSVD2( D, self.Uk, self.sk, self.Vk) self.M = self.Vk.shape[0] self.Ukw, self.skw, self.Vkw = tsUtils.updateSVD2( D[:-1, :], self.Ukw, self.skw, self.Vkw) elif method == 'folding-in': self.Uk, self.sk, self.Vk = tsUtils.updateSVD( D, self.Uk, self.sk, self.Vk) self.M = self.Vk.shape[0] self.Ukw, self.skw, self.Vkw = tsUtils.updateSVD( D[:-1, :], self.Ukw, self.skw, self.Vkw) # elif method == 'Full': # raise ValueError # self.matrix = np.concatenate((self.matrix,D),1) # U, S, V = np.linalg.svd(self.matrix, full_matrices=False) # self.sk = S[0:self.kSingularValues] # self.Uk = U[:, 0:self.kSingularValues] # self.Vk = V[0:self.kSingularValues,:] # self.Vk = self.Vk.T # self.M = self.Vk.shape[0] else: raise ValueError self.matrix = tsUtils.matrixFromSVD(self.sk, self.Uk, self.Vk, self.soft_threshold, probability=self.p) self.lastRowObservations = self.matrix[-1, :] self.TimesUpdated += 1 newMatrixPInv = tsUtils.pInverseMatrixFromSVD( self.skw, self.Ukw, self.Vkw, soft_threshold=self.soft_threshold, probability=self.p) self.weights = np.dot(newMatrixPInv.T, self.lastRowObservations.T)
def denoisedTS(self, ind=None, range=True, return_=True, ts=None): if self.matrix is None: self.matrix = tsUtils.matrixFromSVD(self.sk, self.Uk, self.Vk, self.soft_threshold, probability=self.p) if not return_: return if ts is None: NewColsDenoised = self.matrix.flatten('F') else: NewColsDenoised = self.matrix[:, ts::self.no_ts].flatten('F') if ind is None: return NewColsDenoised if range: assert len(ind) == 2 return NewColsDenoised[ind[0]:ind[1]] else: return NewColsDenoised[ind]
def _computeWeights(self): ### This is now the same as ALS ## this is an expensive step because we are computing the SVD all over again ## however, currently, there is no way around it since this is NOT the same matrix as the full ## self.matrix, i.e. we have fewer (or just one less) rows if (self.lastRowObservations is None): raise Exception( 'Do not call _computeWeights() directly. It should only be accessed via class methods.' ) # need to decide how to produce weights based on whether the N'th data points are to be included for the other time series or not # for the seriesToPredictKey we only look at the past. For others, we could be looking at the current data point in time as well. matrixDim1 = (self.N * len(self.otherSeriesKeysArray)) + self.N - 1 matrixDim2 = np.shape(self.matrix)[1] eachTSRows = self.N if (self.includePastDataOnly == False): newMatrix = self.matrix[0:matrixDim1, :] else: matrixDim1 = ( (self.N - 1) * len(self.otherSeriesKeysArray)) + self.N - 1 eachTSRows = self.N - 1 newMatrix = np.zeros([matrixDim1, matrixDim2]) rowIndex = 0 matrixInd = 0 while (rowIndex < matrixDim1): newMatrix[rowIndex:rowIndex + eachTSRows] = self.matrix[matrixInd:matrixInd + eachTSRows] rowIndex += eachTSRows matrixInd += self.N svdMod = SVD(newMatrix, method='numpy') (self.skw, self.Ukw, self.Vkw) = svdMod.reconstructMatrix(self.kSingularValues, returnMatrix=False) soft_threshold = 0 if self.SSVT: soft_threshold = svdMod.next_sigma matrix = tsUtils.matrixFromSVD(self.skw, self.Ukw, self.Vkw, soft_threshold=soft_threshold, probability=self.p) newMatrixPInv = tsUtils.pInverseMatrixFromSVD( self.skw, self.Ukw, self.Vkw, soft_threshold=soft_threshold, probability=self.p) self.weights = np.dot(newMatrixPInv.T, self.lastRowObservations) for i in range(self.no_ts): self.forecast_model_score[i] = r2_score( self.lastRowObservations[i::self.no_ts] / self.p, np.dot(matrix[:, i::self.no_ts].T, self.weights))