def explore_eqdiff_fitting(self, derivative_in_y, derivatives2explore, poly2explore, rational=False, getXfunc=get_x_operator_func): # ---------- save params of experiment ---------- self.experiments.append({'explore_eqdiff_fitting': { 'date': datetime.now(), 'derivative_in_y': derivative_in_y, 'derivatives2explore': derivatives2explore, 'poly2explore': poly2explore} }) rsquares = pd.DataFrame(np.nan, columns=poly2explore, index=derivatives2explore) for poly_degree in poly2explore: print("\n---------------------") print("Polynomial degree: {}".format(poly_degree)) print("Derivative order:", end='') for derivative_depth in derivatives2explore: print(" {}".format(derivative_depth), end='') data_manager = self.get_data_manager() data_manager.set_X_operator(getXfunc(derivative_depth, poly_degree, rational=rational)) data_manager.set_y_operator( get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivative_depth))) pde_finder = self.fit_eqdifff(data_manager) rsquares.loc[derivative_depth, poly_degree] = np.mean(self.get_rsquare_of_eqdiff_fit(pde_finder, data_manager).values) subname = '_y{}_der_x{}_pol{}'.format(derivative_in_y, derivative_depth, poly_degree) # with savefig('feature_importance_der{}'.format(subname), self.experiment_name, # subfolders=['derivative_in_y_{}'.format(derivative_in_y), 'feature_importances']): # self.plot_feature_importance(pde_finder) with savefig('fit_vs_real_{}'.format(subname), self.experiment_name, subfolders=['derivative_in_y_{}'.format(derivative_in_y), 'fit_vs_real']): self.plot_fitted_vs_real(pde_finder, data_manager) with savefig('fit_and_real_{}'.format(subname), self.experiment_name, subfolders=['derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real']): self.plot_fitted_and_real(pde_finder, data_manager) with savefig('zoom_fit_and_real{}'.format(subname), self.experiment_name, subfolders=['derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real_zoom']): self.plot_fitted_and_real(pde_finder, data_manager, subinit=self.sub_set_init, sublen=self.sub_set_len) if derivative_in_y == -1: # because we want to plot the maximum derivative value. rsquares.index = rsquares.index + 1 save_csv(rsquares, 'rsquares_eqfit_der_y{}_rational{}'.format(derivative_in_y, rational), self.experiment_name) # ---------- plot heatmap of rsquares ---------- with savefig('rsquares_eqfit_der_y{}_rational{}'.format(derivative_in_y, rational), self.experiment_name): plt.close('all') sns.heatmap(rsquares * (rsquares > 0), annot=True) plt.xlabel("Polynomial max order") plt.ylabel("Derivative max order") plt.title("rsquares for derivative in y {}".format(derivative_in_y))
def explore_phase_diagram_delayed(self, prediction_methods, max_delay_inl_x, poly_degree, delay_in_y=0, rational=False, getXfunc=get_x_operator_func_delay): # ---------- save params of experiment ---------- self.experiments.append({'explore_phase_diagram': { 'date': datetime.now(), 'prediction_methods': prediction_methods, 'delay_in_y': delay_in_y, 'max_delay_in_x': max_delay_in_x, 'poly_degree': poly_degree} }) # ---------------------------------------- prediction_methods = list(sorted(prediction_methods)) data_manager = self.get_data_manager() data_manager.set_X_operator(getXfunc(max_delay_in_x, poly_degree, rational)) data_manager.set_y_operator(lambda field: Delay(axis_name='t', delay=delay_in_y) * field) # ---------- fit eqdiff ---------- pde_finder = self.fit_eqdifff(data_manager) # ---------- predictions ---------- subfolders = ['phase_diagram'] predictions = load_csv('phase_diagram_predictions_data', self.experiment_name, subfolders=subfolders) real, predictions = self.do_predictions(prediction_methods, pde_finder, data_manager, self.phase_diagram_horizon, num_evaluations=1, predictions=predictions) print(predictions) save_csv(real, 'phase_diagram_real_data', self.experiment_name, subfolders=subfolders) save_csv(predictions, 'phase_diagram_predictions_data', self.experiment_name, subfolders=subfolders) # if we want to append new methods. prediction_methods = predictions.method.unique() method_colors = {pred_method: self.colors[i] for i, pred_method in enumerate(prediction_methods)} # ---------- evaluate statistics ---------- for var in data_manager.field.data: var_name = var.get_full_name() # ---------- plot phase diagram ---------- with savefig('phase_diagram_{}_real'.format(var_name), self.experiment_name, subfolders=subfolders): self.plot_phase_diagram(real[var_name].values.ravel(), dx=data_manager.domain.step_width['t'], method='real', var_name=var_name, color='black') for method, df in predictions.groupby('method'): with savefig('phase_diagram_{}_{}'.format(var_name, method), self.experiment_name, subfolders=subfolders): self.plot_phase_diagram(df[var_name].values.ravel(), dx=data_manager.domain.step_width['t'], method=method, var_name=var_name, color=method_colors[method]) plt.close("all")
def explore_phase_diagram(self, prediction_methods, derivative_in_y, derivatives_in_x, poly_degree, rational=False, method_label_dict={}, reload=True, starting_point={"t": 0}, prediction_methods2plot=None, getXfunc=get_x_operator_func): # ---------- save params of experiment ---------- self.experiments.append({'explore_phase_diagram': { 'date': datetime.now(), 'prediction_methods': prediction_methods, 'derivative_in_y': derivative_in_y, 'derivatives_in_x': derivatives_in_x, 'poly_degree': poly_degree} }) subfolders = ['phase_diagram'] # ---------------------------------------- prediction_methods = list(sorted(prediction_methods)) if prediction_methods2plot is None: prediction_methods2plot = prediction_methods data_manager = self.get_data_manager() data_manager.set_X_operator(getXfunc(derivatives_in_x, poly_degree, rational)) data_manager.set_y_operator(get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x))) # ---------- fit eqdiff ---------- pde_finder = self.fit_eqdifff(data_manager) base_name = 'dery{}_derx{}_poly{}'.format(derivative_in_y, derivatives_in_x, poly_degree) # pde_finder = self.load_fitsave_eqdifff(self, data_manager) with savefig('coeficients_{}_dery{}_derx{}_poly{}'.format('_'.join(prediction_methods), derivative_in_y, derivatives_in_x, poly_degree), self.experiment_name, subfolders=subfolders): self.plot_coefficients(pde_finder) # ---------- predictions ---------- predictions = load_csv('phase_diagram_predictions_data', self.experiment_name, subfolders=subfolders) if predictions is not None and not reload and predictions.method.isin(prediction_methods).any(): predictions = predictions.loc[~predictions.method.isin(prediction_methods), :] for i, prediction_method in enumerate(prediction_methods): if not reload or (reload and prediction_method not in predictions.method.unique()): df_predictions = pde_finder.integrate2(dm=data_manager, dery=derivatives_in_x - derivative_in_y if derivative_in_y < 0 else derivative_in_y, starting_point=starting_point, horizon=self.phase_diagram_horizon, method=prediction_method) df_predictions['method'] = prediction_method predictions = pd.concat([predictions if predictions is not None else pd.DataFrame([], columns=df_predictions.columns)] + [df_predictions]) real = evaluator.get_real_values([Identity()], dm=data_manager, starting_point=starting_point, domain_variable2predict='t', horizon=self.phase_diagram_horizon) real = pd.concat(real) real = real.reset_index() real['method'] = 'real' print(predictions) save_csv(real, 'phase_diagram_real_data', self.experiment_name, subfolders=subfolders) save_csv(predictions, 'phase_diagram_predictions_data', self.experiment_name, subfolders=subfolders) # if we want to append new methods. prediction_methods = set(predictions.method.unique()).intersection(set(prediction_methods2plot)) method_colors = {pred_method: self.colors[i] for i, pred_method in enumerate(prediction_methods)} # ---------- evaluate statistics ---------- for var in data_manager.field.data: var_name = var.get_full_name() # ---------- plot phase diagram ---------- with savefig('phase_diagram_{}_{}'.format(var_name, '-'.join(prediction_methods)), self.experiment_name, subfolders=subfolders): fig, allax = plt.subplots(nrows=len(prediction_methods), figsize=(15, len(prediction_methods) * 15), sharex=True) for i, (method, df) in enumerate( predictions.loc[predictions.method.isin(prediction_methods), :].groupby('method')): ax = allax if len(prediction_methods) == 1 else allax[i] x, dx = self.plot_phase_diagram(real[var_name].values.ravel(), dx=data_manager.domain.step_width['t'], method='real', var_name=var_name, color='black', ax=ax) ax.set_xlim((np.min(x) - (np.max(x) - np.min(x)) / 2, np.max(x) + (np.max(x) - np.min(x)) / 2)) ax.set_ylim( (np.min(dx) - (np.max(dx) - np.min(dx)) / 2, np.max(dx) + (np.max(dx) - np.min(dx)) / 2)) self.plot_phase_diagram(df[var_name].values.ravel(), dx=data_manager.domain.step_width['t'], method=method, var_name=var_name, color=method_colors[method], ax=ax) ax.legend() plt.close("all") # ---------- plot series ---------- with savefig('predictions_{}_{}'.format(var_name, '-'.join(prediction_methods)), self.experiment_name, subfolders=subfolders): fig, allax = plt.subplots(nrows=len(prediction_methods), figsize=(15, len(prediction_methods) * 15), sharex=True) for i, (method, df) in enumerate( predictions.loc[predictions.method.isin(prediction_methods), :].groupby('method')): ax = allax if len(prediction_methods) == 1 else allax[i] real_series = real[var_name].values.ravel() ax.plot(real['index'].values.ravel() * data_manager.domain.step_width['t'], real[var_name].values.ravel(), label='real', c='black') ax.set_ylim((np.min(real_series) - (np.max(real_series) - np.min(real_series)) / 2, np.max(real_series) + (np.max(real_series) - np.min(real_series)) / 2)) ax.plot(real['index'].values.ravel() * data_manager.domain.step_width['t'], df[var_name].values.ravel(), label='model', c=method_colors[method]) ax.set_xlabel(data_manager.domain.axis_names[0], fontsize=20) ax.set_ylabel(varname2latex(var_name, derivative=0), fontsize=20, rotation=0) ax.legend() plt.close("all")
def explore_predictions(self, prediction_methods, derivative_in_y, derivatives_in_x, poly_degree, method_label_dict={}, getXfunc=get_x_operator_func): # ---------- save params of experiment ---------- self.experiments.append({'explore_predictions': { 'date': datetime.now(), 'prediction_methods': prediction_methods, 'derivative_in_y': derivative_in_y, 'derivatives_in_x': derivatives_in_x, 'poly_degree': poly_degree} }) # ---------------------------------------- prediction_methods = list(sorted(prediction_methods)) data_manager = self.get_data_manager() data_manager.set_X_operator(getXfunc(derivatives_in_x, poly_degree)) data_manager.set_y_operator(get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x))) # ---------- fit eqdiff ---------- pde_finder = self.fit_eqdifff(data_manager) # ---------- predictions ---------- subfolders = ['predictions'] predictions = load_csv('future_predictions_data', self.experiment_name, subfolders=subfolders) real, predictions = self.do_predictions(prediction_methods=prediction_methods, pde_finder=pde_finder, dery=derivatives_in_x + 1, data_manager=data_manager, horizon=self.horizon, num_evaluations=self.num_evaluations, predictions=predictions) save_csv(real, 'future_real_data', self.experiment_name, subfolders=subfolders) save_csv(predictions, 'future_predictions_data', self.experiment_name, subfolders=subfolders) # if we want to append new methods. prediction_methods = predictions.method.unique() method_colors = {pred_method: self.colors[i] for i, pred_method in enumerate(prediction_methods)} if method_label_dict == {}: method_label_dict = {method: method for method in prediction_methods} # ---------- evaluate statistics ---------- for var in data_manager.field.data: var_name = var.get_full_name() # ---------- statistics ---------- rsq = pd.DataFrame(np.nan, columns=prediction_methods, index=np.arange(self.horizon)) mape = pd.DataFrame(np.nan, columns=prediction_methods, index=np.arange(self.horizon)) mape_std = pd.DataFrame(np.nan, columns=prediction_methods, index=np.arange(self.horizon)) # check if there are methods already calculated old_rsq = load_csv('r2_predictions_{}'.format(var_name), self.experiment_name, subfolders=subfolders) old_mape = load_csv('mape_predictions_{}'.format(var_name), self.experiment_name, subfolders=subfolders) if old_rsq is None: old_methods = [] else: old_methods = old_rsq.columns rsq[old_methods] = old_rsq mape[old_methods] = old_mape # calculate statistics for method, df in predictions.groupby('method'): if method in old_methods: continue for (ix_p, dfp), (ix_r, dfr) in zip(df.groupby(level='index'), real.groupby(level='index')): assert ix_p == ix_r rsq.loc[ix_p, method] = evaluator.rsquare(dfp[var_name], dfr[var_name]) mape.loc[ix_p, method] = evaluator.mape(dfp[var_name], dfr[var_name]) mape_std.loc[ix_p, method] = evaluator.mape_sd(dfp[var_name], dfr[var_name]) # save save_csv(rsq, 'r2_predictions_{}'.format(var_name), self.experiment_name, subfolders=subfolders) save_csv(mape, 'mape_predictions_{}'.format(var_name), self.experiment_name, subfolders=subfolders) # ---------- plot statistics ---------- with savefig('R2_{}'.format(var_name), self.experiment_name, subfolders=subfolders): fig, ax = plt.subplots() for method in rsq.columns: ax.plot(rsq.index[rsq[method] > 0], rsq[method][rsq[method] > 0], '.-', c=method_colors[method], label=method_label_dict[method]) plt.legend() with savefig('mape_{}'.format(var_name), self.experiment_name, subfolders=subfolders): fig, ax = plt.subplots() for method in rsq.columns: ax.plot(mape.index[mape[method] < 1], mape[method][mape[method] < 1], c=method_colors[method], label=method_label_dict[method]) ax.fill_between(mape.index[mape[method] < 1], mape[method][mape[method] < 1] - mape_std[method][mape[method] < 1], mape[method][mape[method] < 1] + mape_std[method][mape[method] < 1], color=method_colors[method], alpha=0.4) plt.legend() plt.close("all")
def explore_noise_discretization(self, noise_range, discretization_range, derivative_in_y, derivatives_in_x, poly_degree, std_of_discrete_grad=False): """ :param noise_range: :param discretization_range: :param derivative_in_y: :param derivatives_in_x: :param poly_degree: :param std_of_discrete_grad: True if we wan to calculate the std of the gradient of the series using the discretized version. Otherwise will be the original one. :return: """ # ---------- save params of experiment ---------- self.experiments.append({'explore_noise_discretization': { 'date': datetime.now(), 'noise_range': noise_range, 'discretization_range': discretization_range, 'derivative_in_y': derivative_in_y, 'derivatives_in_x': derivatives_in_x, 'poly_degree': poly_degree} }) # ---------------------------------------- rsquares = pd.DataFrame(np.nan, index=noise_range, columns=discretization_range) rsquares.index.name = "Noise" rsquares.columns.name = "Discretization" # ---------------------------------------- data_manager = self.get_data_manager() std_of_vars = [] for var in data_manager.field.data: series_grad = np.abs(np.gradient(var.data)) std_of_vars.append(np.std(series_grad)) with savefig('Distribution_series_differences_{}'.format(var.get_full_name()), self.experiment_name, subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y)]): sns.distplot(series_grad, bins=int(np.sqrt(len(var.data)))) plt.axvline(x=std_of_vars[-1]) # ---------- Noise evaluation ---------- for measure_dt in discretization_range: print("\n---------------------") print("meassure dt: {}".format(measure_dt)) print("Noise: ", end='') for noise in noise_range: print(noise, end='') # choose steps with bigger dt; and sum normal noise. new_t = data_manager.domain.get_range("t")['t'][::measure_dt] domain_temp = Domain(lower_limits_dict={"t": np.min(new_t)}, upper_limits_dict={"t": np.max(new_t)}, step_width_dict={"t": data_manager.domain.step_width['t'] * measure_dt}) data_manager_temp = DataManager() data_manager_original_temp = DataManager() for std, var in zip(std_of_vars, data_manager.field.data): data_original = var.data[::measure_dt] if std_of_discrete_grad: series_grad = np.abs(np.gradient(data_original)) std = np.std(series_grad) data = data_original + np.random.normal(loc=0, scale=std * noise, size=len(data_original)) data_manager_temp.add_variables( Variable(data, domain_temp, domain2axis={"t": 0}, variable_name=var.name)) data_manager_original_temp.add_variables( Variable(data_original, domain_temp, domain2axis={"t": 0}, variable_name=var.name)) data_manager_temp.add_regressors([]) data_manager_temp.set_domain() data_manager_original_temp.add_regressors([]) data_manager_original_temp.set_domain() data_manager_temp.set_X_operator(get_x_operator_func(derivatives_in_x, poly_degree)) data_manager_temp.set_y_operator( get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x))) data_manager_original_temp.set_X_operator(get_x_operator_func(derivatives_in_x, poly_degree)) data_manager_original_temp.set_y_operator( get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x))) pde_finder = self.fit_eqdifff(data_manager_temp) y = data_manager_original_temp.get_y_dframe(self.testSplit) yhat = pd.DataFrame(pde_finder.transform(data_manager_temp.get_X_dframe(self.testSplit)), columns=y.columns) rsquares.loc[noise, measure_dt] = evaluator.rsquare(yhat=yhat, y=y).values # rsquares.loc[noise, measure_dt] = self.get_rsquare_of_eqdiff_fit(pde_finder, data_manager_temp).values with savefig('fit_vs_real_der_y{}_noise{}_dt{}'.format(derivative_in_y, str(noise).replace('.', ''), measure_dt), self.experiment_name, subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_vs_real']): self.plot_fitted_vs_real(pde_finder, data_manager_temp) with savefig('fit_and_real_der_y{}_noise{}_dt{}'.format(derivative_in_y, str(noise).replace('.', ''), measure_dt), self.experiment_name, subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real']): self.plot_fitted_and_real(pde_finder, data_manager_temp) with savefig('zoom_fit_and_real_der_y{}_dt{}_noise{}'.format(derivative_in_y, measure_dt, str(noise).replace('.', '')), self.experiment_name, subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real_zoom']): self.plot_fitted_and_real(pde_finder, data_manager_temp, subinit=self.sub_set_init, sublen=self.sub_set_len) save_csv(rsquares, 'noise_discretization_rsquares_eqfit_der_y{}'.format(derivative_in_y), self.experiment_name) # plt.pcolor(rsquares * (rsquares > 0), cmap='autumn') # plt.yticks(np.arange(0.5, len(rsquares.index), 1), np.round(rsquares.index, decimals=2)) # plt.xticks(np.arange(0.5, len(rsquares.columns), 1), rsquares.columns) # ---------- plot heatmap of rsquares ---------- with savefig('noise_discretization_rsquares_eqfit_der_y{}'.format(derivative_in_y), self.experiment_name): rsquares.index = np.round(rsquares.index, decimals=2) plt.close('all') sns.heatmap(rsquares * (rsquares > 0), annot=True) plt.xlabel("Discretization (dt)") plt.ylabel("Noise (k*std)") plt.title("Noise and discretization for derivative in y {}".format(derivative_in_y)) return rsquares