Python MSSA примеры использования

Язык программирования: Python

Пространство имен/Пакет: pymssa

Класс/Тип: MSSA

Примеров на hotexamples.com: 7

Python MSSA - 7 примеров найдено. Это лучшие примеры Python кода для pymssa.MSSA, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

MSSA(7)

fit(7)

set_ts_component_groups(3)

w_correlation(3)

forecast(1)

Пример #1

Показать файл

Файл: mssa_predictor.py Проект: harveybc/feature-extractor

    def core(self, input_ds):
        """ Performs sliding-window mssa_decomposition and prediction of each input feature. """
        # get the size of the input dataset, try if there are more than one column, else, assign number of columns as 1
        try:
            self.rows_d, self.cols_d = input_ds.shape
        except:
            (self.rows_d,) = input_ds.shape
            self.cols_d = 1
            input_ds = input_ds.reshape(self.rows_d, self.cols_d)
        if self.conf.window_size > self.rows_d // 5:
            print("The window_size must be at maximum 1/5th of the rows of the input dataset")
            sys.exit()
        # create an empty array with the estimated output shape
        self.output_ds = np.empty(shape=(self.rows_d-(self.conf.window_size), self.cols_d))
        
        # center the input_ds before fitting
        in_means = np.nanmean(input_ds, axis=0)
        input_ds = input_ds - in_means

        # calculate the output by performing MSSA on <segments> number of windows of data of size window_size
        segments = (self.rows_d - (2*self.conf.window_size + self.conf.forward_ticks))
        grouped_output = []
        for i in range(0, segments):
            #progress = i*100/segments
            #print("Segment: ",i,"/",segments, "     Progress: ", progress," %" )
            # verify if i+(2*self.conf.window_size) is the last observation
            first = i 
            if (i != segments-1):
                last = i + (2 * self.conf.window_size)
            else:
                last = self.rows_d
            # slice the input_ds dataset in 2*self.conf.window_size ticks segments
            s_data_w = input_ds[first : last,:]
            # center the data before fitting
            # only the first time, run svht, in following iterations, use the same n_components, without executing the svht algo
            if i == 0:
                # uses SVHT for selecting number of components if required from the conf parameters
                if self.conf.num_components == 0:
                    mssa = MSSA(n_components='svht', window_size=self.conf.window_size, verbose=False)
                    mssa.fit(s_data_w)
                    print("Automatically Selected Rank (number of components)= ",str(mssa.rank_))
                    rank = int(mssa.rank_)
                else:
                    rank = self.conf.num_components
                    mssa = MSSA(n_components=rank, window_size=self.conf.window_size, verbose=False)
                    mssa.fit(s_data_w)
            else:
                mssa = MSSA(n_components=rank, window_size=self.conf.window_size, verbose=False)
                mssa.fit(s_data_w)

            # TODO : Con las componentes, generar la predicción y luego los plots para cada feature del input_ds
            fc = mssa.forecast(self.conf.forward_ticks, timeseries_indices=None)        
            
            # extracts the required tick from prediction for each feature in fc_col
            fc_col = fc[:,self.conf.forward_ticks-1]
            (rows_o,) = fc_col.shape
            # transpose the predictions into a row 
            fc_row = fc_col.reshape(1,rows_o)
            # extract the row of components for all features into a single column
            comp_col = mssa.components_[:,(2 * self.conf.window_size) -1 , :].sum(axis=1)
            (rows_o,) = comp_col.shape
            # transpose the sum of channels per feature into a row
            comp_row = comp_col.reshape(1,rows_o)
            
            
            # concatenate otput array with the new predictions (5 tick fw) and the component sum (last tick in segment before prediction) in another array for plotting
            if i == 0:
                self.output_ds = fc_row
                denoised = comp_row                
            else:
                self.output_ds = np.concatenate((self.output_ds, fc_row), axis = 0)
                denoised = np.concatenate((denoised, comp_row), axis = 0)
            # TODO: calculate error per feature
        # calcluate shape of output_ds
        try:
            rows_o, cols_o = self.output_ds.shape
        except:
            (rows_o,) = self.output_ds.shape
            cols_o = 1
            self.output_ds = self.output_ds.reshape(rows_o, cols_o)

        # calculate error on the last half of the input dataset
        #r2 = r2_score(input_ds[(2 * self.conf.window_size) + self.conf.forward_ticks-1 : self.rows_d-self.conf.forward_ticks-1, feature], self.output_ds[:rows_o-self.conf.forward_ticks, feature])
        #r2 = r2_score(input_ds[(2 * self.conf.window_size) + self.conf.forward_ticks-1 + (self.rows_d//2): self.rows_d-self.conf.forward_ticks-1, 0], self.output_ds[(self.rows_d//2):rows_o-self.conf.forward_ticks, 0])
        r2 = r2_score(input_ds[(self.rows_d-self.conf.forward_ticks-1)-(self.rows_d//2): self.rows_d-self.conf.forward_ticks-1, 0], self.output_ds[(rows_o-self.conf.forward_ticks)-(self.rows_d//2) :rows_o-self.conf.forward_ticks, 0])
        mse = mean_squared_error(input_ds[(self.rows_d-self.conf.forward_ticks-1)-(self.rows_d//2): self.rows_d-self.conf.forward_ticks-1, 0], self.output_ds[(rows_o-self.conf.forward_ticks)-(self.rows_d//2) :rows_o-self.conf.forward_ticks, 0])
        mae = mean_absolute_error(input_ds[(self.rows_d-self.conf.forward_ticks-1)-(self.rows_d//2): self.rows_d-self.conf.forward_ticks-1, 0], self.output_ds[(rows_o-self.conf.forward_ticks)-(self.rows_d//2) :rows_o-self.conf.forward_ticks, 0])
        self.error = r2
        # plots th original data, predicted data and denoised data.
        if self.conf.plot_prefix != None:
            # Graficar matriz de correlaciones del primero y  agrupar aditivamente los mas correlated.
            # genera gráficas para cada componente con valores agrupados
            # for the 5th and the next components, save plots containing the original and cummulative timeseries for the first data column
            # TODO: QUITAR CUANDO DE HAGA PARA TODO SEGMENTO EN EL DATASET; NO SOLO EL PRIMERO
            # TODO : QUITAR: TEST de tamaño de grouped_components_ dictionary
            feature = 0
            for feature in range(self.cols_d):
                fig, ax = plt.subplots(figsize=(18, 7))
                ax.plot(self.output_ds[:rows_o-self.conf.forward_ticks, feature], lw=3, c='steelblue', alpha=0.8, label='predicted')
                ax.plot(denoised[self.conf.forward_ticks:, feature], lw=3, c='darkgoldenrod', alpha=0.6, label='denoised')
                ax.plot(input_ds[(2 * self.conf.window_size) + self.conf.forward_ticks-1 : self.rows_d-self.conf.forward_ticks-1, feature], lw=3, alpha=0.2, c='k', label='original') 
                ax.set_title('Forecast R2 = {:.3f}   MSE = {:.3f}   MAE = {:.3f}'.format(r2,mse,mae))
                ax.legend() 
                fig.savefig(self.conf.plot_prefix + str(feature) + '.png', dpi=600)

        # shows error
        if self.conf.show_error == True:
            for feature in range(self.cols_d):
                print("Feature = ", str(feature), "R2 score = ", str(r2))
        return self.output_ds

Пример #2

Показать файл

Файл: functional_analyzer.py Проект: tomneutens/log_data_analysis

    def perform_mssa(self, fVectors, n_components):
        L = 150  # Length of the time window
        mssa = MSSA(n_components='variance_threshold',
                    variance_explained_threshold=0.99,
                    window_size=L,
                    verbose=True)
        mssa.fit(fVectors)
        idx = 3
        indexes = np.arange(mssa.components_.shape[1])
        '''for comp in range(10):
            fig, ax = plt.subplots(figsize=(18, 7))
            ax.plot(indexes, fVectors[:, idx], lw=3, alpha=0.2, c='k',
                    label="program 3")
            ax.plot(indexes, mssa.components_[idx, :, comp], lw=3, c='steelblue', alpha=0.8,
                    label='component={}'.format(comp))
            ax.legend()
            plt.show()'''

        base_dir = "./results/test3"
        self.create_directory(base_dir)

        for idx in [-1, -2, -3]:
            self.create_directory(base_dir + "/program{}".format(idx))
            cumulative_recon = np.zeros_like(fVectors[:, idx])
            for comp in range(mssa.components_.shape[2]):
                fig, ax = plt.subplots(figsize=(18, 7))
                current_component = mssa.components_[idx, :, comp]
                cumulative_recon = cumulative_recon + current_component

                ax.plot(indexes,
                        fVectors[:, idx],
                        lw=3,
                        alpha=0.2,
                        c='k',
                        label="program 3")
                ax.plot(indexes,
                        cumulative_recon,
                        lw=3,
                        c='darkgoldenrod',
                        alpha=0.6,
                        label='cumulative'.format(comp))
                ax.plot(indexes,
                        current_component,
                        lw=3,
                        c='steelblue',
                        alpha=0.8,
                        label='component={}'.format(comp))

                ax.legend()
                plt.savefig(
                    "results/test3/program{}/cumulation_of_{}_components_for_index{}_2"
                    .format(idx, comp, idx))
                plt.show()

        print(mssa.component_ranks_[0:10])
        print(mssa.component_ranks_explained_variance_[0:10])

        total_comps = mssa.components_[0, :, :]
        print(total_comps.shape)

        total_wcorr = mssa.w_correlation(total_comps)
        total_wcorr_abs = np.abs(total_wcorr)
        fig, ax = plt.subplots(figsize=(12, 9))
        sns.heatmap(np.abs(total_wcorr_abs), cmap='coolwarm', ax=ax)
        ax.set_title('component w-correlations')

        plt.show()
        plt.savefig("results/test3/correlation_matrix")
        print(mssa.component_ranks_.shape)
        return mssa.component_ranks_.T

Пример #3

Показать файл

    segments = (num_ticks // (2 * p_window_size))

    for i in range(0, segments):
        # verify if i+(2*p_window_size) is the last observation
        first = i * (2 * p_window_size)
        if (i != segments - 1):
            last = (i + 1) * (2 * p_window_size)
        else:
            last = num_ticks
        # slice the data in 2*p_window_size ticks segments
        s_data_w = s_data[first:last, :]
        # only the first time, run svht, in following iterations, use the same n_components, without executing the svht algo

        if i == 0:
            mssa = MSSA(n_components='svht',
                        window_size=p_window_size,
                        verbose=True)
            mssa.fit(s_data_w)
            print("Selected Rank = ", str(mssa.rank_))
            #rank = int(mssa.rank_)
            rank = int(p_n_components)
        else:
            mssa = MSSA(n_components=rank,
                        window_size=p_window_size,
                        verbose=True)
            mssa.fit(s_data_w)
        # concatenate otput array with the new components
        if i == 0:
            output = copy.deepcopy(mssa.components_)
        else:
            np.concatenate((output, mssa.components_), axis=1)

Пример #4

Показать файл

Файл: mssa_decomposer.py Проект: andracin/feature-eng

    def core(self, input_ds):
        """ Performs mssa_decomposition. """
        # get the size of the input dataset
        self.rows_d, self.cols_d = input_ds.shape
        # create an empty array with the estimated output shape
        self.output_ds = np.empty(shape=(self.rows_d-self.conf.window_size, 1))
        # calculate the output by performing MSSA on <segments> number of windows of data of size window_size
        segments = (self.rows_d // (2*self.conf.window_size))
        for i in range(0, segments):
            # verify if i+(2*self.conf.window_size) is the last observation
            first = i * (2 * self.conf.window_size)
            if (i != segments-1):
                last = (i+1) * (2 * self.conf.window_size)
            else:
                last = self.rows_d
            # slice the input_ds dataset in 2*self.conf.window_size ticks segments
            s_data_w = input_ds[first : last,:]       
            # only the first time, run svht, in following iterations, use the same n_components, without executing the svht algo
            if i == 0: 
                # uses SVHT for selecting number of components if required from the conf parameters
                if self.conf.num_components == 0:
                    mssa = MSSA(n_components='svht', window_size=self.conf.window_size, verbose=True)
                    mssa.fit(s_data_w)
                    print("Automatically Selected Rank (number of components)= ",str(mssa.rank_))
                    rank = int(mssa.rank_)
                else:
                    rank = self.conf.num_components
                    mssa = MSSA(n_components=rank, window_size=self.conf.window_size, verbose=True)
                    mssa.fit(s_data_w)
            else:
                mssa = MSSA(n_components=rank, window_size=self.conf.window_size, verbose=True)
                mssa.fit(s_data_w)

            # concatenate otput array with the new components
            if i == 0:
                output_ds = copy.deepcopy(mssa.components_)
            else:
                np.concatenate((output_ds, mssa.components_), axis = 1)
                
            #TODO: concatenate grouped output 
            print("Grouping correlated components (manually set list)") 
            # use the same groups for all the features
            # load the groups from a json file
            grouped_output = []
            if self.conf.group_file != None:
                # TODO: QUITAR GUARDADO DE JSON DE EJEMPLO
                ts0_groups = [[0],[1],[2],[3],[4,5],[6],[7],[8],[9,10],[11],[12]]
                with open(self.conf.group_file, 'w') as f:
                    json.dump(ts0_groups, f)
                with open(self.conf.group_file) as json_file:
                    ts0_groups = json.load(json_file)
                for j in range(0, self.cols_d):
                    # draw correlation matrix for the first segment
                    mssa.set_ts_component_groups(j, ts0_groups)
                    ts0_grouped = mssa.grouped_components_[j]
                    # concatenate otput array with the new components
                    if i == 0:
                        grouped_output.append(copy.deepcopy(mssa.grouped_components_[j]))
                    else:
                        grouped_output[j] = np.concatenate((grouped_output[j], copy.deepcopy(mssa.grouped_components_[j])), axis = 0)
                    # save the correlation matrix only for the first segment
                    if (i == 0) and (self.conf.plot_correlations != None):
                        # save grouped component correlation matrix
                        ts0_grouped_wcor = mssa.w_correlation(ts0_grouped)
                        fig, ax = plt.subplots(figsize=(12,9))
                        sns.heatmap(np.abs(ts0_grouped_wcor), cmap='coolwarm', ax=ax)
                        ax.set_title('grouped component w-correlations')
                        fig.savefig(self.conf.plot_correlations + str(j) + 'grouped.png', dpi=200)
                self.output_ds = grouped_output
            else:
                grouped_output = self.output_ds
        # show progress
        progress = i*100/segments
        print("Segment: ",i,"/",segments, "     Progress: ", progress," %" )
        if self.conf.plot_prefix != None:
            # Graficar matriz de correlaciones del primero y  agrupar aditivamente los mas correlated.
            # genera gráficas para cada componente con valores agrupados
            # for the 5th and the next components, save plots containing the original and cummulative timeseries for the first data column 
            # TODO: QUITAR CUANDO DE HAGA PARA TODO SEGMENTO EN EL DATASET; NO SOLO EL PRIMERO
            cumulative_recon = np.zeros_like(s_data[:, 0])
            # TODO : QUITAR: TEST de tamaño de grouped_components_ dictionary
            for comp in range(len(grouped_output[0][0])):
                fig, ax = plt.subplots(figsize=(18, 7))
                current_component = grouped_output[0][:, comp]
                cumulative_recon = cumulative_recon + current_component
                ax.plot(s_data[:, 0], lw=3, alpha=0.2, c='k', label='original')
                ax.plot(cumulative_recon, lw=3, c='darkgoldenrod', alpha=0.6, label='cumulative'.format(comp))
                ax.plot(current_component, lw=3, c='steelblue', alpha=0.8, label='component={}'.format(comp))
                ax.legend()
                fig.savefig(self.conf.plot_prefix + '_' + str(comp) + '.png', dpi=600)


        return self.output_ds

Пример #5

Показать файл

    def core(self, input_ds):
        """ Performs mssa_decomposition. """
        # get the size of the input dataset, try if there are more than one column, else, assign number of columns as 1
        try:
            self.rows_d, self.cols_d = input_ds.shape
        except:
            (self.rows_d, ) = input_ds.shape
            self.cols_d = 1
            input_ds = input_ds.reshape(self.rows_d, self.cols_d)
        # create an empty array with the estimated output shape
        self.output_ds = np.empty(shape=(self.rows_d - self.conf.window_size,
                                         self.cols_d))

        # center the input_ds before fitting
        in_means = np.nanmean(input_ds, axis=0)
        input_ds = input_ds - in_means

        # calculate the output by performing MSSA on <segments> number of windows of data of size window_size
        segments = (self.rows_d // (2 * self.conf.window_size))
        grouped_output = []
        for i in range(0, segments):
            # verify if i+(2*self.conf.window_size) is the last observation
            first = i * (2 * self.conf.window_size)
            if (i != segments - 1):
                last = (i + 1) * (2 * self.conf.window_size)
            else:
                last = self.rows_d
            # slice the input_ds dataset in 2*self.conf.window_size ticks segments
            s_data_w = input_ds[first:last, :]
            # only the first time, run svht, in following iterations, use the same n_components, without executing the svht algo
            if i == 0:
                # uses SVHT for selecting number of components if required from the conf parameters
                if self.conf.num_components == 0:
                    mssa = MSSA(n_components='svht',
                                window_size=self.conf.window_size,
                                verbose=True)
                    mssa.fit(s_data_w)
                    print(
                        "Automatically Selected Rank (number of components)= ",
                        str(mssa.rank_))
                    rank = int(mssa.rank_)
                else:
                    rank = self.conf.num_components
                    mssa = MSSA(n_components=rank,
                                window_size=self.conf.window_size,
                                verbose=True)
                    mssa.fit(s_data_w)
            else:
                mssa = MSSA(n_components=rank,
                            window_size=self.conf.window_size,
                            verbose=True)
                mssa.fit(s_data_w)

            # concatenate otput array with the new components
            if i == 0:
                if self.conf.group_file == None:
                    self.output_ds = np.array(mssa.components_)
            else:
                if self.conf.group_file == None:
                    self.output_ds = np.concatenate(
                        (self.output_ds, mssa.components_), axis=1)

            # load the groups from a json file, use the same groups for all the features
            if self.conf.group_file != None:
                print("Grouping correlated components (manually set list)")
                with open(self.conf.group_file) as json_file:
                    ts0_groups = json.load(json_file)
                for j in range(0, self.cols_d):
                    # draw correlation matrix for the first segment
                    mssa.set_ts_component_groups(j, ts0_groups)
                    ts0_grouped = mssa.grouped_components_[j]
                    # concatenate otput array with the new components
                    if i == 0:
                        grouped_output.append(
                            copy.deepcopy(mssa.grouped_components_[j]))
                    else:
                        grouped_output[j] = np.concatenate(
                            (grouped_output[j],
                             copy.deepcopy(mssa.grouped_components_[j])),
                            axis=0)
                    # save the correlation matrix only for the first segment
                    if (i == 0) and (self.conf.w_prefix != None):
                        # save grouped component correlation matrix
                        ts0_grouped_wcor = mssa.w_correlation(ts0_grouped)
                        fig, ax = plt.subplots(figsize=(12, 9))
                        sns.heatmap(np.abs(ts0_grouped_wcor),
                                    cmap='coolwarm',
                                    ax=ax)
                        ax.set_title('grouped component w-correlations')
                        fig.savefig(self.conf.w_prefix + str(j) +
                                    '_grouped.png',
                                    dpi=200)
                self.output_ds = np.array(grouped_output)
            else:
                # save the correlation matrix only for the first segment
                for j in range(0, self.cols_d):
                    if (i == 0) and (self.conf.w_prefix != None):
                        total_comps = mssa.components_[j, :, :]
                        # save grouped component correlation matrix
                        ts0_wcor = mssa.w_correlation(total_comps)
                        fig, ax = plt.subplots(figsize=(12, 9))
                        sns.heatmap(np.abs(ts0_wcor), cmap='coolwarm', ax=ax)
                        ax.set_title('component w-correlations')
                        fig.savefig(self.conf.w_prefix + str(j) + '.png',
                                    dpi=200)
                grouped_output = self.output_ds.tolist()
        # show progress
        # save the correlation matrix only for the first segment
        if (i == 0) and (self.conf.w_prefix != None):
            # save grouped component correlation matrix
            ts0_grouped_wcor = mssa.w_correlation(ts0_grouped)
            fig, ax = plt.subplots(figsize=(12, 9))
            sns.heatmap(np.abs(ts0_grouped_wcor), cmap='coolwarm', ax=ax)
            ax.set_title('grouped component w-correlations')
            fig.savefig(self.conf.w_prefix + str(j) + '.png', dpi=200)
        progress = i * 100 / segments
        print("Segment: ", i, "/", segments, "     Progress: ", progress, " %")
        if self.conf.plot_prefix != None:
            # Graficar matriz de correlaciones del primero y  agrupar aditivamente los mas correlated.
            # genera gráficas para cada componente con valores agrupados
            # for the 5th and the next components, save plots containing the original and cummulative timeseries for the first data column
            cumulative_recon = np.zeros_like(input_ds[:, 0])
            for comp in range(len(grouped_output[0][0])):
                fig, ax = plt.subplots(figsize=(18, 7))
                current_component = self.output_ds[0, :, comp]
                cumulative_recon = cumulative_recon + current_component
                ax.plot(input_ds[:, 0],
                        lw=3,
                        alpha=0.2,
                        c='k',
                        label='original')
                ax.plot(cumulative_recon,
                        lw=3,
                        c='darkgoldenrod',
                        alpha=0.6,
                        label='cumulative'.format(comp))
                ax.plot(current_component,
                        lw=3,
                        c='steelblue',
                        alpha=0.8,
                        label='component={}'.format(comp))
                ax.legend()
                fig.savefig(self.conf.plot_prefix + '_' + str(comp) + '.png',
                            dpi=600)
        print("pre self.output_ds.shape = ", self.output_ds.shape)

        # transforms the dimensions from (features, ticks, channels) to (ticks, feats*channels)
        ns_output = []
        for n in range(self.output_ds.shape[1]):
            row = []
            for p in range(self.output_ds.shape[0]):
                for c in range(self.output_ds.shape[2]):
                    #row.append(self.output_ds[p,n,c])
                    row.append(self.output_ds[p, n, c])
            ns_output.append(row)
        # convert to np array
        self.output_ds = np.array(ns_output)
        print("new self.output_ds.shape = ", self.output_ds.shape)
        return self.output_ds

Пример #6

Показать файл

Файл: get_iex_data.py Проект: smalik/stock_price_prediction

pca.fit(df_small)
fit = pca.fit(df_small)
trans = pca.fit_transform(df_small)
#_df.adjclose.plot()
trans.iloc[:, 0].plot()
plt.show()
print(fit.column_correlations(df_small))




from pyts.decomposition import SingularSpectrumAnalysis
from pymssa import MSSA

window_size = 20
groups = [np.arange(i, i+5) for i in range(0, 20, 5)]

ssa = SingularSpectrumAnalysis(window_size= window_size)
X_ssa = ssa.fit_transform(df_small)

mssa = MSSA(n_components=5,
            window_size=21,
            verbose=True)
mssa.fit(_df.adjclose)

pd.DataFrame(mssa.components_[0,:,:], index=_df.index).plot()
plt.show()

_df.adjclose.plot()
plt.show()

Пример #7

Показать файл

Файл: TickerTransform.py Проект: smalik/stock_price_prediction

    def get_ssa(self, ncomp: int = None, wsize=60):
        model = MSSA(n_components=ncomp, window_size=wsize, verbose=True)
        model.fit(self.data)

        self.model_ssa = model