def core(self, input_ds): """ Performs sliding-window mssa_decomposition and prediction of each input feature. """ # get the size of the input dataset, try if there are more than one column, else, assign number of columns as 1 try: self.rows_d, self.cols_d = input_ds.shape except: (self.rows_d,) = input_ds.shape self.cols_d = 1 input_ds = input_ds.reshape(self.rows_d, self.cols_d) if self.conf.window_size > self.rows_d // 5: print("The window_size must be at maximum 1/5th of the rows of the input dataset") sys.exit() # create an empty array with the estimated output shape self.output_ds = np.empty(shape=(self.rows_d-(self.conf.window_size), self.cols_d)) # center the input_ds before fitting in_means = np.nanmean(input_ds, axis=0) input_ds = input_ds - in_means # calculate the output by performing MSSA on <segments> number of windows of data of size window_size segments = (self.rows_d - (2*self.conf.window_size + self.conf.forward_ticks)) grouped_output = [] for i in range(0, segments): #progress = i*100/segments #print("Segment: ",i,"/",segments, " Progress: ", progress," %" ) # verify if i+(2*self.conf.window_size) is the last observation first = i if (i != segments-1): last = i + (2 * self.conf.window_size) else: last = self.rows_d # slice the input_ds dataset in 2*self.conf.window_size ticks segments s_data_w = input_ds[first : last,:] # center the data before fitting # only the first time, run svht, in following iterations, use the same n_components, without executing the svht algo if i == 0: # uses SVHT for selecting number of components if required from the conf parameters if self.conf.num_components == 0: mssa = MSSA(n_components='svht', window_size=self.conf.window_size, verbose=False) mssa.fit(s_data_w) print("Automatically Selected Rank (number of components)= ",str(mssa.rank_)) rank = int(mssa.rank_) else: rank = self.conf.num_components mssa = MSSA(n_components=rank, window_size=self.conf.window_size, verbose=False) mssa.fit(s_data_w) else: mssa = MSSA(n_components=rank, window_size=self.conf.window_size, verbose=False) mssa.fit(s_data_w) # TODO : Con las componentes, generar la predicción y luego los plots para cada feature del input_ds fc = mssa.forecast(self.conf.forward_ticks, timeseries_indices=None) # extracts the required tick from prediction for each feature in fc_col fc_col = fc[:,self.conf.forward_ticks-1] (rows_o,) = fc_col.shape # transpose the predictions into a row fc_row = fc_col.reshape(1,rows_o) # extract the row of components for all features into a single column comp_col = mssa.components_[:,(2 * self.conf.window_size) -1 , :].sum(axis=1) (rows_o,) = comp_col.shape # transpose the sum of channels per feature into a row comp_row = comp_col.reshape(1,rows_o) # concatenate otput array with the new predictions (5 tick fw) and the component sum (last tick in segment before prediction) in another array for plotting if i == 0: self.output_ds = fc_row denoised = comp_row else: self.output_ds = np.concatenate((self.output_ds, fc_row), axis = 0) denoised = np.concatenate((denoised, comp_row), axis = 0) # TODO: calculate error per feature # calcluate shape of output_ds try: rows_o, cols_o = self.output_ds.shape except: (rows_o,) = self.output_ds.shape cols_o = 1 self.output_ds = self.output_ds.reshape(rows_o, cols_o) # calculate error on the last half of the input dataset #r2 = r2_score(input_ds[(2 * self.conf.window_size) + self.conf.forward_ticks-1 : self.rows_d-self.conf.forward_ticks-1, feature], self.output_ds[:rows_o-self.conf.forward_ticks, feature]) #r2 = r2_score(input_ds[(2 * self.conf.window_size) + self.conf.forward_ticks-1 + (self.rows_d//2): self.rows_d-self.conf.forward_ticks-1, 0], self.output_ds[(self.rows_d//2):rows_o-self.conf.forward_ticks, 0]) r2 = r2_score(input_ds[(self.rows_d-self.conf.forward_ticks-1)-(self.rows_d//2): self.rows_d-self.conf.forward_ticks-1, 0], self.output_ds[(rows_o-self.conf.forward_ticks)-(self.rows_d//2) :rows_o-self.conf.forward_ticks, 0]) mse = mean_squared_error(input_ds[(self.rows_d-self.conf.forward_ticks-1)-(self.rows_d//2): self.rows_d-self.conf.forward_ticks-1, 0], self.output_ds[(rows_o-self.conf.forward_ticks)-(self.rows_d//2) :rows_o-self.conf.forward_ticks, 0]) mae = mean_absolute_error(input_ds[(self.rows_d-self.conf.forward_ticks-1)-(self.rows_d//2): self.rows_d-self.conf.forward_ticks-1, 0], self.output_ds[(rows_o-self.conf.forward_ticks)-(self.rows_d//2) :rows_o-self.conf.forward_ticks, 0]) self.error = r2 # plots th original data, predicted data and denoised data. if self.conf.plot_prefix != None: # Graficar matriz de correlaciones del primero y agrupar aditivamente los mas correlated. # genera gráficas para cada componente con valores agrupados # for the 5th and the next components, save plots containing the original and cummulative timeseries for the first data column # TODO: QUITAR CUANDO DE HAGA PARA TODO SEGMENTO EN EL DATASET; NO SOLO EL PRIMERO # TODO : QUITAR: TEST de tamaño de grouped_components_ dictionary feature = 0 for feature in range(self.cols_d): fig, ax = plt.subplots(figsize=(18, 7)) ax.plot(self.output_ds[:rows_o-self.conf.forward_ticks, feature], lw=3, c='steelblue', alpha=0.8, label='predicted') ax.plot(denoised[self.conf.forward_ticks:, feature], lw=3, c='darkgoldenrod', alpha=0.6, label='denoised') ax.plot(input_ds[(2 * self.conf.window_size) + self.conf.forward_ticks-1 : self.rows_d-self.conf.forward_ticks-1, feature], lw=3, alpha=0.2, c='k', label='original') ax.set_title('Forecast R2 = {:.3f} MSE = {:.3f} MAE = {:.3f}'.format(r2,mse,mae)) ax.legend() fig.savefig(self.conf.plot_prefix + str(feature) + '.png', dpi=600) # shows error if self.conf.show_error == True: for feature in range(self.cols_d): print("Feature = ", str(feature), "R2 score = ", str(r2)) return self.output_ds
def perform_mssa(self, fVectors, n_components): L = 150 # Length of the time window mssa = MSSA(n_components='variance_threshold', variance_explained_threshold=0.99, window_size=L, verbose=True) mssa.fit(fVectors) idx = 3 indexes = np.arange(mssa.components_.shape[1]) '''for comp in range(10): fig, ax = plt.subplots(figsize=(18, 7)) ax.plot(indexes, fVectors[:, idx], lw=3, alpha=0.2, c='k', label="program 3") ax.plot(indexes, mssa.components_[idx, :, comp], lw=3, c='steelblue', alpha=0.8, label='component={}'.format(comp)) ax.legend() plt.show()''' base_dir = "./results/test3" self.create_directory(base_dir) for idx in [-1, -2, -3]: self.create_directory(base_dir + "/program{}".format(idx)) cumulative_recon = np.zeros_like(fVectors[:, idx]) for comp in range(mssa.components_.shape[2]): fig, ax = plt.subplots(figsize=(18, 7)) current_component = mssa.components_[idx, :, comp] cumulative_recon = cumulative_recon + current_component ax.plot(indexes, fVectors[:, idx], lw=3, alpha=0.2, c='k', label="program 3") ax.plot(indexes, cumulative_recon, lw=3, c='darkgoldenrod', alpha=0.6, label='cumulative'.format(comp)) ax.plot(indexes, current_component, lw=3, c='steelblue', alpha=0.8, label='component={}'.format(comp)) ax.legend() plt.savefig( "results/test3/program{}/cumulation_of_{}_components_for_index{}_2" .format(idx, comp, idx)) plt.show() print(mssa.component_ranks_[0:10]) print(mssa.component_ranks_explained_variance_[0:10]) total_comps = mssa.components_[0, :, :] print(total_comps.shape) total_wcorr = mssa.w_correlation(total_comps) total_wcorr_abs = np.abs(total_wcorr) fig, ax = plt.subplots(figsize=(12, 9)) sns.heatmap(np.abs(total_wcorr_abs), cmap='coolwarm', ax=ax) ax.set_title('component w-correlations') plt.show() plt.savefig("results/test3/correlation_matrix") print(mssa.component_ranks_.shape) return mssa.component_ranks_.T
segments = (num_ticks // (2 * p_window_size)) for i in range(0, segments): # verify if i+(2*p_window_size) is the last observation first = i * (2 * p_window_size) if (i != segments - 1): last = (i + 1) * (2 * p_window_size) else: last = num_ticks # slice the data in 2*p_window_size ticks segments s_data_w = s_data[first:last, :] # only the first time, run svht, in following iterations, use the same n_components, without executing the svht algo if i == 0: mssa = MSSA(n_components='svht', window_size=p_window_size, verbose=True) mssa.fit(s_data_w) print("Selected Rank = ", str(mssa.rank_)) #rank = int(mssa.rank_) rank = int(p_n_components) else: mssa = MSSA(n_components=rank, window_size=p_window_size, verbose=True) mssa.fit(s_data_w) # concatenate otput array with the new components if i == 0: output = copy.deepcopy(mssa.components_) else: np.concatenate((output, mssa.components_), axis=1)
def core(self, input_ds): """ Performs mssa_decomposition. """ # get the size of the input dataset self.rows_d, self.cols_d = input_ds.shape # create an empty array with the estimated output shape self.output_ds = np.empty(shape=(self.rows_d-self.conf.window_size, 1)) # calculate the output by performing MSSA on <segments> number of windows of data of size window_size segments = (self.rows_d // (2*self.conf.window_size)) for i in range(0, segments): # verify if i+(2*self.conf.window_size) is the last observation first = i * (2 * self.conf.window_size) if (i != segments-1): last = (i+1) * (2 * self.conf.window_size) else: last = self.rows_d # slice the input_ds dataset in 2*self.conf.window_size ticks segments s_data_w = input_ds[first : last,:] # only the first time, run svht, in following iterations, use the same n_components, without executing the svht algo if i == 0: # uses SVHT for selecting number of components if required from the conf parameters if self.conf.num_components == 0: mssa = MSSA(n_components='svht', window_size=self.conf.window_size, verbose=True) mssa.fit(s_data_w) print("Automatically Selected Rank (number of components)= ",str(mssa.rank_)) rank = int(mssa.rank_) else: rank = self.conf.num_components mssa = MSSA(n_components=rank, window_size=self.conf.window_size, verbose=True) mssa.fit(s_data_w) else: mssa = MSSA(n_components=rank, window_size=self.conf.window_size, verbose=True) mssa.fit(s_data_w) # concatenate otput array with the new components if i == 0: output_ds = copy.deepcopy(mssa.components_) else: np.concatenate((output_ds, mssa.components_), axis = 1) #TODO: concatenate grouped output print("Grouping correlated components (manually set list)") # use the same groups for all the features # load the groups from a json file grouped_output = [] if self.conf.group_file != None: # TODO: QUITAR GUARDADO DE JSON DE EJEMPLO ts0_groups = [[0],[1],[2],[3],[4,5],[6],[7],[8],[9,10],[11],[12]] with open(self.conf.group_file, 'w') as f: json.dump(ts0_groups, f) with open(self.conf.group_file) as json_file: ts0_groups = json.load(json_file) for j in range(0, self.cols_d): # draw correlation matrix for the first segment mssa.set_ts_component_groups(j, ts0_groups) ts0_grouped = mssa.grouped_components_[j] # concatenate otput array with the new components if i == 0: grouped_output.append(copy.deepcopy(mssa.grouped_components_[j])) else: grouped_output[j] = np.concatenate((grouped_output[j], copy.deepcopy(mssa.grouped_components_[j])), axis = 0) # save the correlation matrix only for the first segment if (i == 0) and (self.conf.plot_correlations != None): # save grouped component correlation matrix ts0_grouped_wcor = mssa.w_correlation(ts0_grouped) fig, ax = plt.subplots(figsize=(12,9)) sns.heatmap(np.abs(ts0_grouped_wcor), cmap='coolwarm', ax=ax) ax.set_title('grouped component w-correlations') fig.savefig(self.conf.plot_correlations + str(j) + 'grouped.png', dpi=200) self.output_ds = grouped_output else: grouped_output = self.output_ds # show progress progress = i*100/segments print("Segment: ",i,"/",segments, " Progress: ", progress," %" ) if self.conf.plot_prefix != None: # Graficar matriz de correlaciones del primero y agrupar aditivamente los mas correlated. # genera gráficas para cada componente con valores agrupados # for the 5th and the next components, save plots containing the original and cummulative timeseries for the first data column # TODO: QUITAR CUANDO DE HAGA PARA TODO SEGMENTO EN EL DATASET; NO SOLO EL PRIMERO cumulative_recon = np.zeros_like(s_data[:, 0]) # TODO : QUITAR: TEST de tamaño de grouped_components_ dictionary for comp in range(len(grouped_output[0][0])): fig, ax = plt.subplots(figsize=(18, 7)) current_component = grouped_output[0][:, comp] cumulative_recon = cumulative_recon + current_component ax.plot(s_data[:, 0], lw=3, alpha=0.2, c='k', label='original') ax.plot(cumulative_recon, lw=3, c='darkgoldenrod', alpha=0.6, label='cumulative'.format(comp)) ax.plot(current_component, lw=3, c='steelblue', alpha=0.8, label='component={}'.format(comp)) ax.legend() fig.savefig(self.conf.plot_prefix + '_' + str(comp) + '.png', dpi=600) return self.output_ds
def core(self, input_ds): """ Performs mssa_decomposition. """ # get the size of the input dataset, try if there are more than one column, else, assign number of columns as 1 try: self.rows_d, self.cols_d = input_ds.shape except: (self.rows_d, ) = input_ds.shape self.cols_d = 1 input_ds = input_ds.reshape(self.rows_d, self.cols_d) # create an empty array with the estimated output shape self.output_ds = np.empty(shape=(self.rows_d - self.conf.window_size, self.cols_d)) # center the input_ds before fitting in_means = np.nanmean(input_ds, axis=0) input_ds = input_ds - in_means # calculate the output by performing MSSA on <segments> number of windows of data of size window_size segments = (self.rows_d // (2 * self.conf.window_size)) grouped_output = [] for i in range(0, segments): # verify if i+(2*self.conf.window_size) is the last observation first = i * (2 * self.conf.window_size) if (i != segments - 1): last = (i + 1) * (2 * self.conf.window_size) else: last = self.rows_d # slice the input_ds dataset in 2*self.conf.window_size ticks segments s_data_w = input_ds[first:last, :] # only the first time, run svht, in following iterations, use the same n_components, without executing the svht algo if i == 0: # uses SVHT for selecting number of components if required from the conf parameters if self.conf.num_components == 0: mssa = MSSA(n_components='svht', window_size=self.conf.window_size, verbose=True) mssa.fit(s_data_w) print( "Automatically Selected Rank (number of components)= ", str(mssa.rank_)) rank = int(mssa.rank_) else: rank = self.conf.num_components mssa = MSSA(n_components=rank, window_size=self.conf.window_size, verbose=True) mssa.fit(s_data_w) else: mssa = MSSA(n_components=rank, window_size=self.conf.window_size, verbose=True) mssa.fit(s_data_w) # concatenate otput array with the new components if i == 0: if self.conf.group_file == None: self.output_ds = np.array(mssa.components_) else: if self.conf.group_file == None: self.output_ds = np.concatenate( (self.output_ds, mssa.components_), axis=1) # load the groups from a json file, use the same groups for all the features if self.conf.group_file != None: print("Grouping correlated components (manually set list)") with open(self.conf.group_file) as json_file: ts0_groups = json.load(json_file) for j in range(0, self.cols_d): # draw correlation matrix for the first segment mssa.set_ts_component_groups(j, ts0_groups) ts0_grouped = mssa.grouped_components_[j] # concatenate otput array with the new components if i == 0: grouped_output.append( copy.deepcopy(mssa.grouped_components_[j])) else: grouped_output[j] = np.concatenate( (grouped_output[j], copy.deepcopy(mssa.grouped_components_[j])), axis=0) # save the correlation matrix only for the first segment if (i == 0) and (self.conf.w_prefix != None): # save grouped component correlation matrix ts0_grouped_wcor = mssa.w_correlation(ts0_grouped) fig, ax = plt.subplots(figsize=(12, 9)) sns.heatmap(np.abs(ts0_grouped_wcor), cmap='coolwarm', ax=ax) ax.set_title('grouped component w-correlations') fig.savefig(self.conf.w_prefix + str(j) + '_grouped.png', dpi=200) self.output_ds = np.array(grouped_output) else: # save the correlation matrix only for the first segment for j in range(0, self.cols_d): if (i == 0) and (self.conf.w_prefix != None): total_comps = mssa.components_[j, :, :] # save grouped component correlation matrix ts0_wcor = mssa.w_correlation(total_comps) fig, ax = plt.subplots(figsize=(12, 9)) sns.heatmap(np.abs(ts0_wcor), cmap='coolwarm', ax=ax) ax.set_title('component w-correlations') fig.savefig(self.conf.w_prefix + str(j) + '.png', dpi=200) grouped_output = self.output_ds.tolist() # show progress # save the correlation matrix only for the first segment if (i == 0) and (self.conf.w_prefix != None): # save grouped component correlation matrix ts0_grouped_wcor = mssa.w_correlation(ts0_grouped) fig, ax = plt.subplots(figsize=(12, 9)) sns.heatmap(np.abs(ts0_grouped_wcor), cmap='coolwarm', ax=ax) ax.set_title('grouped component w-correlations') fig.savefig(self.conf.w_prefix + str(j) + '.png', dpi=200) progress = i * 100 / segments print("Segment: ", i, "/", segments, " Progress: ", progress, " %") if self.conf.plot_prefix != None: # Graficar matriz de correlaciones del primero y agrupar aditivamente los mas correlated. # genera gráficas para cada componente con valores agrupados # for the 5th and the next components, save plots containing the original and cummulative timeseries for the first data column cumulative_recon = np.zeros_like(input_ds[:, 0]) for comp in range(len(grouped_output[0][0])): fig, ax = plt.subplots(figsize=(18, 7)) current_component = self.output_ds[0, :, comp] cumulative_recon = cumulative_recon + current_component ax.plot(input_ds[:, 0], lw=3, alpha=0.2, c='k', label='original') ax.plot(cumulative_recon, lw=3, c='darkgoldenrod', alpha=0.6, label='cumulative'.format(comp)) ax.plot(current_component, lw=3, c='steelblue', alpha=0.8, label='component={}'.format(comp)) ax.legend() fig.savefig(self.conf.plot_prefix + '_' + str(comp) + '.png', dpi=600) print("pre self.output_ds.shape = ", self.output_ds.shape) # transforms the dimensions from (features, ticks, channels) to (ticks, feats*channels) ns_output = [] for n in range(self.output_ds.shape[1]): row = [] for p in range(self.output_ds.shape[0]): for c in range(self.output_ds.shape[2]): #row.append(self.output_ds[p,n,c]) row.append(self.output_ds[p, n, c]) ns_output.append(row) # convert to np array self.output_ds = np.array(ns_output) print("new self.output_ds.shape = ", self.output_ds.shape) return self.output_ds
pca.fit(df_small) fit = pca.fit(df_small) trans = pca.fit_transform(df_small) #_df.adjclose.plot() trans.iloc[:, 0].plot() plt.show() print(fit.column_correlations(df_small)) from pyts.decomposition import SingularSpectrumAnalysis from pymssa import MSSA window_size = 20 groups = [np.arange(i, i+5) for i in range(0, 20, 5)] ssa = SingularSpectrumAnalysis(window_size= window_size) X_ssa = ssa.fit_transform(df_small) mssa = MSSA(n_components=5, window_size=21, verbose=True) mssa.fit(_df.adjclose) pd.DataFrame(mssa.components_[0,:,:], index=_df.index).plot() plt.show() _df.adjclose.plot() plt.show()
def get_ssa(self, ncomp: int = None, wsize=60): model = MSSA(n_components=ncomp, window_size=wsize, verbose=True) model.fit(self.data) self.model_ssa = model