Exemplo n.º 1
0
    def explore_eqdiff_fitting(self, derivative_in_y, derivatives2explore, poly2explore, rational=False, getXfunc=get_x_operator_func):
        # ---------- save params of experiment ----------
        self.experiments.append({'explore_eqdiff_fitting': {
            'date': datetime.now(),
            'derivative_in_y': derivative_in_y,
            'derivatives2explore': derivatives2explore,
            'poly2explore': poly2explore}
        })

        rsquares = pd.DataFrame(np.nan, columns=poly2explore, index=derivatives2explore)
        for poly_degree in poly2explore:
            print("\n---------------------")
            print("Polynomial degree: {}".format(poly_degree))
            print("Derivative order:", end='')

            for derivative_depth in derivatives2explore:
                print(" {}".format(derivative_depth), end='')
                data_manager = self.get_data_manager()
                data_manager.set_X_operator(getXfunc(derivative_depth, poly_degree, rational=rational))
                data_manager.set_y_operator(
                    get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivative_depth)))

                pde_finder = self.fit_eqdifff(data_manager)
                rsquares.loc[derivative_depth, poly_degree] = np.mean(self.get_rsquare_of_eqdiff_fit(pde_finder,
                                                                                                     data_manager).values)

                subname = '_y{}_der_x{}_pol{}'.format(derivative_in_y, derivative_depth, poly_degree)
                # with savefig('feature_importance_der{}'.format(subname), self.experiment_name,
                #              subfolders=['derivative_in_y_{}'.format(derivative_in_y), 'feature_importances']):
                #     self.plot_feature_importance(pde_finder)

                with savefig('fit_vs_real_{}'.format(subname),
                             self.experiment_name,
                             subfolders=['derivative_in_y_{}'.format(derivative_in_y), 'fit_vs_real']):
                    self.plot_fitted_vs_real(pde_finder, data_manager)

                with savefig('fit_and_real_{}'.format(subname),
                             self.experiment_name,
                             subfolders=['derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real']):
                    self.plot_fitted_and_real(pde_finder, data_manager)

                with savefig('zoom_fit_and_real{}'.format(subname),
                             self.experiment_name,
                             subfolders=['derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real_zoom']):
                    self.plot_fitted_and_real(pde_finder, data_manager, subinit=self.sub_set_init,
                                              sublen=self.sub_set_len)

        if derivative_in_y == -1:
            # because we want to plot the maximum derivative value.
            rsquares.index = rsquares.index + 1
        save_csv(rsquares, 'rsquares_eqfit_der_y{}_rational{}'.format(derivative_in_y, rational), self.experiment_name)

        # ---------- plot heatmap of rsquares ----------
        with savefig('rsquares_eqfit_der_y{}_rational{}'.format(derivative_in_y, rational), self.experiment_name):
            plt.close('all')
            sns.heatmap(rsquares * (rsquares > 0), annot=True)
            plt.xlabel("Polynomial max order")
            plt.ylabel("Derivative max order")
            plt.title("rsquares for derivative in y {}".format(derivative_in_y))
Exemplo n.º 2
0
    def explore_phase_diagram_delayed(self, prediction_methods, max_delay_inl_x, poly_degree, delay_in_y=0,
                                      rational=False, getXfunc=get_x_operator_func_delay):
        # ---------- save params of experiment ----------
        self.experiments.append({'explore_phase_diagram': {
            'date': datetime.now(),
            'prediction_methods': prediction_methods,
            'delay_in_y': delay_in_y,
            'max_delay_in_x': max_delay_in_x,
            'poly_degree': poly_degree}
        })

        # ----------------------------------------
        prediction_methods = list(sorted(prediction_methods))

        data_manager = self.get_data_manager()
        data_manager.set_X_operator(getXfunc(max_delay_in_x, poly_degree, rational))
        data_manager.set_y_operator(lambda field: Delay(axis_name='t', delay=delay_in_y) * field)

        # ---------- fit eqdiff ----------
        pde_finder = self.fit_eqdifff(data_manager)

        # ---------- predictions ----------
        subfolders = ['phase_diagram']
        predictions = load_csv('phase_diagram_predictions_data', self.experiment_name, subfolders=subfolders)
        real, predictions = self.do_predictions(prediction_methods, pde_finder, data_manager,
                                                self.phase_diagram_horizon, num_evaluations=1,
                                                predictions=predictions)
        print(predictions)
        save_csv(real, 'phase_diagram_real_data', self.experiment_name, subfolders=subfolders)
        save_csv(predictions, 'phase_diagram_predictions_data', self.experiment_name, subfolders=subfolders)

        # if we want to append new methods.
        prediction_methods = predictions.method.unique()
        method_colors = {pred_method: self.colors[i] for i, pred_method in enumerate(prediction_methods)}

        # ---------- evaluate statistics ----------
        for var in data_manager.field.data:
            var_name = var.get_full_name()

            # ---------- plot phase diagram ----------
            with savefig('phase_diagram_{}_real'.format(var_name), self.experiment_name, subfolders=subfolders):
                self.plot_phase_diagram(real[var_name].values.ravel(), dx=data_manager.domain.step_width['t'],
                                        method='real', var_name=var_name, color='black')

            for method, df in predictions.groupby('method'):
                with savefig('phase_diagram_{}_{}'.format(var_name, method), self.experiment_name,
                             subfolders=subfolders):
                    self.plot_phase_diagram(df[var_name].values.ravel(), dx=data_manager.domain.step_width['t'],
                                            method=method, var_name=var_name,
                                            color=method_colors[method])

            plt.close("all")
Exemplo n.º 3
0
    def explore_phase_diagram(self, prediction_methods, derivative_in_y, derivatives_in_x, poly_degree, rational=False,
                              method_label_dict={}, reload=True, starting_point={"t": 0}, prediction_methods2plot=None,
                              getXfunc=get_x_operator_func):
        # ---------- save params of experiment ----------
        self.experiments.append({'explore_phase_diagram': {
            'date': datetime.now(),
            'prediction_methods': prediction_methods,
            'derivative_in_y': derivative_in_y,
            'derivatives_in_x': derivatives_in_x,
            'poly_degree': poly_degree}
        })

        subfolders = ['phase_diagram']

        # ----------------------------------------
        prediction_methods = list(sorted(prediction_methods))
        if prediction_methods2plot is None:
            prediction_methods2plot = prediction_methods

        data_manager = self.get_data_manager()
        data_manager.set_X_operator(getXfunc(derivatives_in_x, poly_degree, rational))
        data_manager.set_y_operator(get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x)))

        # ---------- fit eqdiff ----------
        pde_finder = self.fit_eqdifff(data_manager)

        base_name = 'dery{}_derx{}_poly{}'.format(derivative_in_y, derivatives_in_x, poly_degree)
        # pde_finder = self.load_fitsave_eqdifff(self, data_manager)
        with savefig('coeficients_{}_dery{}_derx{}_poly{}'.format('_'.join(prediction_methods), derivative_in_y,
                                                                  derivatives_in_x, poly_degree), self.experiment_name,
                     subfolders=subfolders):
            self.plot_coefficients(pde_finder)

        # ---------- predictions ----------
        predictions = load_csv('phase_diagram_predictions_data', self.experiment_name, subfolders=subfolders)
        if predictions is not None and not reload and predictions.method.isin(prediction_methods).any():
            predictions = predictions.loc[~predictions.method.isin(prediction_methods), :]

        for i, prediction_method in enumerate(prediction_methods):
            if not reload or (reload and prediction_method not in predictions.method.unique()):
                df_predictions = pde_finder.integrate2(dm=data_manager,
                                                       dery=derivatives_in_x - derivative_in_y if derivative_in_y < 0 else derivative_in_y,
                                                       starting_point=starting_point,
                                                       horizon=self.phase_diagram_horizon,
                                                       method=prediction_method)
                df_predictions['method'] = prediction_method
                predictions = pd.concat([predictions if predictions is not None else
                                         pd.DataFrame([], columns=df_predictions.columns)] + [df_predictions])

        real = evaluator.get_real_values([Identity()],
                                         dm=data_manager,
                                         starting_point=starting_point,
                                         domain_variable2predict='t',
                                         horizon=self.phase_diagram_horizon)
        real = pd.concat(real)
        real = real.reset_index()
        real['method'] = 'real'

        print(predictions)
        save_csv(real, 'phase_diagram_real_data', self.experiment_name, subfolders=subfolders)
        save_csv(predictions, 'phase_diagram_predictions_data', self.experiment_name, subfolders=subfolders)

        # if we want to append new methods.
        prediction_methods = set(predictions.method.unique()).intersection(set(prediction_methods2plot))
        method_colors = {pred_method: self.colors[i] for i, pred_method in enumerate(prediction_methods)}

        # ---------- evaluate statistics ----------
        for var in data_manager.field.data:
            var_name = var.get_full_name()

            # ---------- plot phase diagram ----------
            with savefig('phase_diagram_{}_{}'.format(var_name, '-'.join(prediction_methods)), self.experiment_name,
                         subfolders=subfolders):
                fig, allax = plt.subplots(nrows=len(prediction_methods),
                                          figsize=(15, len(prediction_methods) * 15), sharex=True)
                for i, (method, df) in enumerate(
                        predictions.loc[predictions.method.isin(prediction_methods), :].groupby('method')):
                    ax = allax if len(prediction_methods) == 1 else allax[i]
                    x, dx = self.plot_phase_diagram(real[var_name].values.ravel(),
                                                    dx=data_manager.domain.step_width['t'],
                                                    method='real', var_name=var_name, color='black', ax=ax)
                    ax.set_xlim((np.min(x) - (np.max(x) - np.min(x)) / 2, np.max(x) + (np.max(x) - np.min(x)) / 2))
                    ax.set_ylim(
                        (np.min(dx) - (np.max(dx) - np.min(dx)) / 2, np.max(dx) + (np.max(dx) - np.min(dx)) / 2))
                    self.plot_phase_diagram(df[var_name].values.ravel(), dx=data_manager.domain.step_width['t'],
                                            method=method, var_name=var_name,
                                            color=method_colors[method], ax=ax)
                    ax.legend()
            plt.close("all")

            # ---------- plot series ----------
            with savefig('predictions_{}_{}'.format(var_name, '-'.join(prediction_methods)), self.experiment_name,
                         subfolders=subfolders):
                fig, allax = plt.subplots(nrows=len(prediction_methods),
                                          figsize=(15, len(prediction_methods) * 15), sharex=True)
                for i, (method, df) in enumerate(
                        predictions.loc[predictions.method.isin(prediction_methods), :].groupby('method')):
                    ax = allax if len(prediction_methods) == 1 else allax[i]
                    real_series = real[var_name].values.ravel()
                    ax.plot(real['index'].values.ravel() * data_manager.domain.step_width['t'],
                            real[var_name].values.ravel(), label='real', c='black')
                    ax.set_ylim((np.min(real_series) - (np.max(real_series) - np.min(real_series)) / 2,
                                 np.max(real_series) + (np.max(real_series) - np.min(real_series)) / 2))
                    ax.plot(real['index'].values.ravel() * data_manager.domain.step_width['t'],
                            df[var_name].values.ravel(), label='model',
                            c=method_colors[method])

                    ax.set_xlabel(data_manager.domain.axis_names[0], fontsize=20)
                    ax.set_ylabel(varname2latex(var_name, derivative=0), fontsize=20, rotation=0)
                    ax.legend()
            plt.close("all")
Exemplo n.º 4
0
    def explore_predictions(self, prediction_methods, derivative_in_y, derivatives_in_x, poly_degree,
                            method_label_dict={}, getXfunc=get_x_operator_func):
        # ---------- save params of experiment ----------
        self.experiments.append({'explore_predictions': {
            'date': datetime.now(),
            'prediction_methods': prediction_methods,
            'derivative_in_y': derivative_in_y,
            'derivatives_in_x': derivatives_in_x,
            'poly_degree': poly_degree}
        })

        # ----------------------------------------
        prediction_methods = list(sorted(prediction_methods))

        data_manager = self.get_data_manager()
        data_manager.set_X_operator(getXfunc(derivatives_in_x, poly_degree))
        data_manager.set_y_operator(get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x)))

        # ---------- fit eqdiff ----------
        pde_finder = self.fit_eqdifff(data_manager)

        # ---------- predictions ----------
        subfolders = ['predictions']
        predictions = load_csv('future_predictions_data', self.experiment_name, subfolders=subfolders)
        real, predictions = self.do_predictions(prediction_methods=prediction_methods,
                                                pde_finder=pde_finder,
                                                dery=derivatives_in_x + 1,
                                                data_manager=data_manager,
                                                horizon=self.horizon,
                                                num_evaluations=self.num_evaluations,
                                                predictions=predictions)
        save_csv(real, 'future_real_data', self.experiment_name, subfolders=subfolders)
        save_csv(predictions, 'future_predictions_data', self.experiment_name, subfolders=subfolders)

        # if we want to append new methods.
        prediction_methods = predictions.method.unique()
        method_colors = {pred_method: self.colors[i] for i, pred_method in enumerate(prediction_methods)}

        if method_label_dict == {}:
            method_label_dict = {method: method for method in prediction_methods}

        # ---------- evaluate statistics ----------
        for var in data_manager.field.data:
            var_name = var.get_full_name()

            # ---------- statistics ----------
            rsq = pd.DataFrame(np.nan, columns=prediction_methods, index=np.arange(self.horizon))
            mape = pd.DataFrame(np.nan, columns=prediction_methods, index=np.arange(self.horizon))
            mape_std = pd.DataFrame(np.nan, columns=prediction_methods, index=np.arange(self.horizon))

            # check if there are methods already calculated
            old_rsq = load_csv('r2_predictions_{}'.format(var_name), self.experiment_name, subfolders=subfolders)
            old_mape = load_csv('mape_predictions_{}'.format(var_name), self.experiment_name, subfolders=subfolders)
            if old_rsq is None:
                old_methods = []
            else:
                old_methods = old_rsq.columns
                rsq[old_methods] = old_rsq
                mape[old_methods] = old_mape

            # calculate statistics
            for method, df in predictions.groupby('method'):
                if method in old_methods:
                    continue

                for (ix_p, dfp), (ix_r, dfr) in zip(df.groupby(level='index'), real.groupby(level='index')):
                    assert ix_p == ix_r
                    rsq.loc[ix_p, method] = evaluator.rsquare(dfp[var_name], dfr[var_name])
                    mape.loc[ix_p, method] = evaluator.mape(dfp[var_name], dfr[var_name])
                    mape_std.loc[ix_p, method] = evaluator.mape_sd(dfp[var_name], dfr[var_name])

            # save
            save_csv(rsq, 'r2_predictions_{}'.format(var_name), self.experiment_name, subfolders=subfolders)
            save_csv(mape, 'mape_predictions_{}'.format(var_name), self.experiment_name, subfolders=subfolders)

            # ---------- plot statistics ----------
            with savefig('R2_{}'.format(var_name), self.experiment_name, subfolders=subfolders):
                fig, ax = plt.subplots()
                for method in rsq.columns:
                    ax.plot(rsq.index[rsq[method] > 0], rsq[method][rsq[method] > 0], '.-', c=method_colors[method],
                            label=method_label_dict[method])
                plt.legend()

            with savefig('mape_{}'.format(var_name), self.experiment_name, subfolders=subfolders):
                fig, ax = plt.subplots()
                for method in rsq.columns:
                    ax.plot(mape.index[mape[method] < 1], mape[method][mape[method] < 1], c=method_colors[method],
                            label=method_label_dict[method])
                    ax.fill_between(mape.index[mape[method] < 1],
                                    mape[method][mape[method] < 1] - mape_std[method][mape[method] < 1],
                                    mape[method][mape[method] < 1] + mape_std[method][mape[method] < 1],
                                    color=method_colors[method], alpha=0.4)
                plt.legend()
            plt.close("all")
Exemplo n.º 5
0
    def explore_noise_discretization(self, noise_range, discretization_range, derivative_in_y, derivatives_in_x,
                                     poly_degree, std_of_discrete_grad=False):
        """

        :param noise_range:
        :param discretization_range:
        :param derivative_in_y:
        :param derivatives_in_x:
        :param poly_degree:
        :param std_of_discrete_grad: True if we wan to calculate the std of the gradient of the series using the discretized version. Otherwise will be the original one.
        :return:
        """
        # ---------- save params of experiment ----------
        self.experiments.append({'explore_noise_discretization': {
            'date': datetime.now(),
            'noise_range': noise_range,
            'discretization_range': discretization_range,
            'derivative_in_y': derivative_in_y,
            'derivatives_in_x': derivatives_in_x,
            'poly_degree': poly_degree}
        })

        # ----------------------------------------
        rsquares = pd.DataFrame(np.nan, index=noise_range, columns=discretization_range)
        rsquares.index.name = "Noise"
        rsquares.columns.name = "Discretization"

        # ----------------------------------------
        data_manager = self.get_data_manager()

        std_of_vars = []
        for var in data_manager.field.data:
            series_grad = np.abs(np.gradient(var.data))
            std_of_vars.append(np.std(series_grad))
            with savefig('Distribution_series_differences_{}'.format(var.get_full_name()), self.experiment_name,
                         subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y)]):
                sns.distplot(series_grad, bins=int(np.sqrt(len(var.data))))
                plt.axvline(x=std_of_vars[-1])

        # ---------- Noise evaluation ----------
        for measure_dt in discretization_range:
            print("\n---------------------")
            print("meassure dt: {}".format(measure_dt))
            print("Noise: ", end='')
            for noise in noise_range:
                print(noise, end='')
                # choose steps with bigger dt; and sum normal noise.
                new_t = data_manager.domain.get_range("t")['t'][::measure_dt]
                domain_temp = Domain(lower_limits_dict={"t": np.min(new_t)},
                                     upper_limits_dict={"t": np.max(new_t)},
                                     step_width_dict={"t": data_manager.domain.step_width['t'] * measure_dt})

                data_manager_temp = DataManager()
                data_manager_original_temp = DataManager()
                for std, var in zip(std_of_vars, data_manager.field.data):
                    data_original = var.data[::measure_dt]

                    if std_of_discrete_grad:
                        series_grad = np.abs(np.gradient(data_original))
                        std = np.std(series_grad)

                    data = data_original + np.random.normal(loc=0, scale=std * noise, size=len(data_original))
                    data_manager_temp.add_variables(
                        Variable(data, domain_temp, domain2axis={"t": 0}, variable_name=var.name))
                    data_manager_original_temp.add_variables(
                        Variable(data_original, domain_temp, domain2axis={"t": 0}, variable_name=var.name))
                data_manager_temp.add_regressors([])
                data_manager_temp.set_domain()
                data_manager_original_temp.add_regressors([])
                data_manager_original_temp.set_domain()

                data_manager_temp.set_X_operator(get_x_operator_func(derivatives_in_x, poly_degree))
                data_manager_temp.set_y_operator(
                    get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x)))
                data_manager_original_temp.set_X_operator(get_x_operator_func(derivatives_in_x, poly_degree))
                data_manager_original_temp.set_y_operator(
                    get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x)))

                pde_finder = self.fit_eqdifff(data_manager_temp)

                y = data_manager_original_temp.get_y_dframe(self.testSplit)
                yhat = pd.DataFrame(pde_finder.transform(data_manager_temp.get_X_dframe(self.testSplit)),
                                    columns=y.columns)
                rsquares.loc[noise, measure_dt] = evaluator.rsquare(yhat=yhat, y=y).values
                # rsquares.loc[noise, measure_dt] = self.get_rsquare_of_eqdiff_fit(pde_finder, data_manager_temp).values

                with savefig('fit_vs_real_der_y{}_noise{}_dt{}'.format(derivative_in_y, str(noise).replace('.', ''),
                                                                       measure_dt),
                             self.experiment_name,
                             subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_vs_real']):
                    self.plot_fitted_vs_real(pde_finder, data_manager_temp)

                with savefig('fit_and_real_der_y{}_noise{}_dt{}'.format(derivative_in_y, str(noise).replace('.', ''),
                                                                        measure_dt),
                             self.experiment_name,
                             subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real']):
                    self.plot_fitted_and_real(pde_finder, data_manager_temp)

                with savefig('zoom_fit_and_real_der_y{}_dt{}_noise{}'.format(derivative_in_y, measure_dt,
                                                                             str(noise).replace('.', '')),
                             self.experiment_name,
                             subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real_zoom']):
                    self.plot_fitted_and_real(pde_finder, data_manager_temp, subinit=self.sub_set_init,
                                              sublen=self.sub_set_len)

        save_csv(rsquares, 'noise_discretization_rsquares_eqfit_der_y{}'.format(derivative_in_y), self.experiment_name)
        # plt.pcolor(rsquares * (rsquares > 0), cmap='autumn')
        # plt.yticks(np.arange(0.5, len(rsquares.index), 1), np.round(rsquares.index, decimals=2))
        # plt.xticks(np.arange(0.5, len(rsquares.columns), 1), rsquares.columns)

        # ---------- plot heatmap of rsquares ----------
        with savefig('noise_discretization_rsquares_eqfit_der_y{}'.format(derivative_in_y), self.experiment_name):
            rsquares.index = np.round(rsquares.index, decimals=2)
            plt.close('all')
            sns.heatmap(rsquares * (rsquares > 0), annot=True)
            plt.xlabel("Discretization (dt)")
            plt.ylabel("Noise (k*std)")
            plt.title("Noise and discretization for derivative in y {}".format(derivative_in_y))

        return rsquares