def plot_2d_rmse_heatmap(self, cols): #adjust for log params if necessary xcol = cols[0] ycol = cols[1] xdata = np.array(self.flat_results[xcol]) xlabel = xcol if self.is_log_param(xcol): xdata_raw = np.array(self.flat_results[xcol].values, 'float') xdata = np.log10(xdata_raw) xlabel = "log10 %s" % xcol ydata = np.array(self.flat_results[ycol]) ylabel = ycol if self.is_log_param(ycol): ydata_raw = np.array(self.flat_results[ycol].values, 'float') ydata = np.log10(ydata_raw) ylabel = "log10 %s" % ycol kwargs = dict() kwargs['xlabel'] = xlabel kwargs['ylabel'] = ylabel kwargs['labellist'] = ['xy', 'rmse'] kwargs['xdatalist'] = [xdata, xdata] kwargs['ydatalist'] = [ydata, self.flat_results['rmse']] kwargs['xerrlist'] = [None, None] kwargs['yerrlist'] = [None, None] kwargs['notelist'] = list() kwargs['guideline'] = 0 plotlabel = "rmse_heatmap" kwargs['plotlabel'] = plotlabel kwargs['save_path'] = self.save_path myph = PlotHelper(**kwargs) myph.plot_2d_rmse_heatmap() self.readme_list.append("Plot %s.png created\n" % plotlabel) return
def plot(self): self.readme_list.append("----- Plotting -----\n") notelist = list() notelist.append("Mean RMSE: {:.2f}".format( self.statistics['avg_rmse'])) kwargs = dict() kwargs['xlabel'] = self.xlabel kwargs['ylabel'] = self.ylabel kwargs['notelist'] = notelist kwargs['save_path'] = self.save_path kwargs['marklargest'] = self.mark_outlying_points group_label_list = list() rms_list = list() group_rms_list = list() for cvtest in self.cvtest_dict.keys(): group_rms_list.append((self.cvtest_dict[cvtest]['group'], self.cvtest_dict[cvtest]['rmse'])) group_rms_list.sort() #sorts by group group_rms_array = np.array(group_rms_list) kwargs['xdatalist'] = [group_rms_array[:, 0]] kwargs['ydatalist'] = [np.array(group_rms_array[:, 1], 'float')] kwargs['xerrlist'] = [None] kwargs['yerrlist'] = [None] kwargs['labellist'] = ['predicted_rmse'] kwargs['plotlabel'] = "leave_out_group" myph = PlotHelper(**kwargs) myph.plot_rmse_vs_text() self.readme_list.append("Plot leave_out_group.png created.\n") return
def plot_single_rmse(self, col): #adjust for log params if necessary xdata = self.flat_results[col] xlabel = col if self.is_log_param(col): import numpy as np xdata_raw = np.array(self.flat_results[col].values, 'float') xdata = np.log10(xdata_raw) xlabel = "log10 %s" % col kwargs = dict() kwargs['xlabel'] = xlabel kwargs['ylabel'] = 'RMSE' kwargs['labellist'] = [xlabel] kwargs['xdatalist'] = [xdata] kwargs['ydatalist'] = [self.flat_results['rmse']] kwargs['xerrlist'] = list([None]) kwargs['yerrlist'] = list([None]) kwargs['notelist'] = list() kwargs['guideline'] = 0 plotlabel = "rmse_vs_%s" % col plotlabel = plotlabel.replace(".", "_") #mask periods kwargs['plotlabel'] = plotlabel kwargs['save_path'] = self.save_path myph = PlotHelper(**kwargs) myph.multiple_overlay() self.readme_list.append("Plot %s.png created\n" % plotlabel) return
def plot_results(self, notelist=list()): kwargs2 = dict() kwargs2['xlabel'] = self.xlabel kwargs2['ylabel'] = self.ylabel kwargs2['labellist'] = list(["loo_prediction"]) kwargs2['xdatalist'] = list([self.testing_dataset.target_data]) kwargs2['ydatalist'] = list( [self.cvtest_dict[0]['prediction_array']] ) #only one cvtest, with number of folds equal to number of data points kwargs2['xerrlist'] = list([self.testing_dataset.target_error_data]) kwargs2['yerrlist'] = list([None]) kwargs2['notelist'] = list(notelist) kwargs2['guideline'] = 1 kwargs2['plotlabel'] = "loo_results" kwargs2['save_path'] = self.save_path if not (self.mark_outlying_points is None): kwargs2['marklargest'] = self.mark_outlying_points if (self.testing_dataset.labeling_features is None): raise ValueError( "Must specify some labeling features if you want to mark the largest outlying points" ) labels = self.testing_dataset.data[ self.testing_dataset.labeling_features[0]] kwargs2['mlabellist'] = list([labels]) myph = PlotHelper(**kwargs2) myph.multiple_overlay() self.readme_list.append("Plot loo_results.png created,\n") self.readme_list.append(" showing results of all LOO tests.\n") return
def plot_meancv_overlay(self, notelist=list()): kwargs2 = dict() kwargs2['xlabel'] = self.xlabel kwargs2['ylabel'] = self.ylabel kwargs2['labellist'] = ["Mean CV test"] kwargs2['xdatalist'] = list([self.testing_dataset.target_data]) kwargs2['ydatalist'] = list([self.statistics['average_prediction']]) kwargs2['xerrlist'] = list([None]) kwargs2['yerrlist'] = list([self.statistics['std_err']]) kwargs2['notelist'] = list(notelist) kwargs2['guideline'] = 1 kwargs2['plotlabel'] = "mean_cv_overlay" kwargs2['save_path'] = self.save_path #kwargs2['std_err_in_mean'] = std_err_in_mean if not (self.mark_outlying_points is None): kwargs2['marklargest'] = self.mark_outlying_points if self.testing_dataset.labeling_features is None: raise ValueError( "Must specify some labeling features if you want to mark the largest outlying points" ) labels = self.testing_dataset.data[ self.testing_dataset.labeling_features[0]] kwargs2['mlabellist'] = list([labels, labels]) myph = PlotHelper(**kwargs2) myph.multiple_overlay() self.readme_list.append("Plot mean_cv_overlay.png created,\n") self.readme_list.append(" showing the mean cv of %i tests.\n" % self.num_cvtests) return
def plot_best_worst_overlay(self, notelist=list()): kwargs2 = dict() kwargs2['xlabel'] = self.xlabel kwargs2['ylabel'] = self.ylabel kwargs2['labellist'] = ["Best test", "Worst test"] kwargs2['xdatalist'] = list([ self.testing_dataset.target_data, self.testing_dataset.target_data ]) kwargs2['ydatalist'] = list([ self.cvtest_dict[self.best_test_index]['prediction_array'], self.cvtest_dict[self.worst_test_index]['prediction_array'] ]) kwargs2['xerrlist'] = list([None, None]) kwargs2['yerrlist'] = list([None, None]) kwargs2['notelist'] = list(notelist) kwargs2['guideline'] = 1 kwargs2['plotlabel'] = "best_worst_overlay" kwargs2['save_path'] = self.save_path if not (self.mark_outlying_points is None): kwargs2['marklargest'] = self.mark_outlying_points if self.testing_dataset.labeling_features is None: raise ValueError( "Must specify some labeling features if you want to mark the largest outlying points" ) labels = self.testing_dataset.data[ self.testing_dataset.labeling_features[0]] kwargs2['mlabellist'] = list([labels, labels]) myph = PlotHelper(**kwargs2) myph.multiple_overlay() self.readme_list.append("Plot best_worst_overlay.png created,\n") self.readme_list.append( " showing the best and worst of %i tests.\n" % self.num_cvtests) return
def one_plot(self, group=None): xdatalist = list() xerrlist = list() ydatalist = list() yerrlist = list() for testing_dataset in self.testing_datasets: xdata = testing_dataset.data[self.feature_plot_feature] ydata = testing_dataset.target_data yerrdata = testing_dataset.target_error_data xerrdata = None if not (group is None): xdata = xdata[testing_dataset.group_data == group] ydata = ydata[testing_dataset.group_data == group] if len(xdata) == 0: xdata = list() if len(ydata) == 0: ydata = list() if not (yerrdata is None): yerrdata = yerrdata[testing_dataset.group_data == group] if len(yerrdata) == 0: yerrdata = None xdatalist.append(xdata) xerrlist.append(xerrdata) ydatalist.append(ydata) yerrlist.append(yerrdata) kwargs = dict() if group is None: kwargs['plotlabel'] = "all" if not (group is None): kwargs['plotlabel'] = group kwargs['xdatalist'] = xdatalist kwargs['ydatalist'] = ydatalist kwargs['xerrlist'] = xerrlist kwargs['yerrlist'] = yerrlist kwargs['labellist'] = self.data_labels #kwargs['faces'] = ["None"] * len(self.data_labels) kwargs['save_path'] = self.save_path kwargs['xlabel'] = self.xlabel kwargs['ylabel'] = self.ylabel notelist = list() #if not(self.plot_filter_out is None): # notelist.append("Data not shown:") # for (feature, symbol, threshold) in self.plot_filter_out: # notelist.append(" %s %s %s" % (feature, symbol, threshold)) kwargs['notelist'] = notelist myph = PlotHelper(**kwargs) myph.multiple_overlay() #if do_fft == 1: # plotxy.single(xdata, fft(ydata), **kwargs) return
def plot_group_splits_with_outliers(self, group_dict=None, outlying_groups=list(), label="group_splits", group_notelist=list()): addl_kwargs = dict() addl_kwargs['xlabel'] = self.xlabel addl_kwargs['ylabel'] = self.ylabel addl_kwargs['save_path'] = os.path.join(self.save_path, label) addl_kwargs['guideline'] = 1 addl_kwargs['group_dict'] = group_dict addl_kwargs['outlying_groups'] = list(outlying_groups) addl_kwargs['plotlabel'] = label addl_kwargs['notelist'] = list(group_notelist) myph = PlotHelper(**addl_kwargs) myph.plot_group_splits_with_outliers() return
def plot_results(self, addl_plot_kwargs=None): self.readme_list.append("----- Plotting -----\n") if self.testing_dataset.target_data is None: logger.warning( "No testing target data. Predicted vs. measured plot will not be plotted." ) self.readme_list.append("No target data.\n") self.readme_list.append( "No plot comparing predicted vs. measured data was made.\n") return plot_kwargs = dict() plot_kwargs['xlabel'] = self.xlabel plot_kwargs['ylabel'] = self.ylabel plot_kwargs['plotlabel'] = "single_fit" plot_kwargs['guideline'] = 1 notelist = list() notelist.append("RMSE: %3.3f" % self.statistics['rmse']) notelist.append("R-squared: %3.3f" % self.statistics['rsquared']) notelist.append("R-squared (no int): %3.3f" % self.statistics['rsquared_noint']) notelist.append("Mean error: %3.3f" % self.statistics['mean_error']) notelist.append("Mean abs error: %3.3f" % self.statistics['mean_absolute_error']) plot_kwargs['notelist'] = notelist plot_kwargs['save_path'] = self.save_path if not (addl_plot_kwargs is None): for addl_plot_kwarg in addl_plot_kwargs: plot_kwargs[addl_plot_kwarg] = addl_plot_kwargs[ addl_plot_kwarg] if not (self.plot_filter_out is None): self.readme_list.append("Plot filtering out:\n") for (feature, symbol, threshold) in self.plot_filter_out: self.readme_list.append(" %s %s %s\n" % (feature, symbol, threshold)) notelist.append("Shown-only RMSE: %3.3f" % self.statistics['filtered_rmse']) notelist.append("Data not shown:") for (feature, symbol, threshold) in self.plot_filter_out: notelist.append(" %s %s %s" % (feature, symbol, threshold)) #Data should already have been filtered by now plot_kwargs['xdatalist'] = [self.testing_dataset.target_data] plot_kwargs['ydatalist'] = [self.testing_dataset.target_prediction] if self.testing_dataset.target_error_feature is None: plot_kwargs['xerrlist'] = [None] else: plot_kwargs['xerrlist'] = [self.testing_dataset.target_error_data] if self.trained_model.__class__.__name__ == "GaussianProcessRegressor": plot_kwargs['yerrlist'] = [ self.testing_dataset.target_prediction_sigma ] else: plot_kwargs['yerrlist'] = [None] plot_kwargs['labellist'] = ["predicted_vs_measured"] myph = PlotHelper(**plot_kwargs) myph.multiple_overlay() self.readme_list.append("Plot single_fit.png created.\n") self.readme_list.append( " Plotted data is in the data_... csv file.\n") self.readme_list.append( " Error column of all zeros indicates no error.\n") return
#!/usr/bin/env python from plot_data.PlotHelper import PlotHelper import numpy as np import os xtest = np.arange(-10, 10, 1) ytest = np.sin(xtest) ytest2 = np.cos(xtest) xdatalist = [xtest, xtest] ydatalist = [ytest, ytest2] xerrlist = [None, None] yerrlist = [np.ones(len(ytest)) * 0.01, np.ones(len(ytest2)) * 0.02] kwargs = dict() kwargs['xdatalist'] = xdatalist kwargs['ydatalist'] = ydatalist kwargs['xerrlist'] = xerrlist kwargs['yerrlist'] = yerrlist kwargs['labellist'] = ['sine', 'cosine'] kwargs['marklargest'] = "2,3" kwargs['mlabellist'] = None kwargs['guideline'] = 1 kwargs['xlabel'] = "Number" kwargs['ylabel'] = "Function value" kwargs['plotlabel'] = "sine_cos_overlay" kwargs['save_path'] = os.path.join(os.getcwd(), "save_testing") kwargs['notelist'] = ["Test plot"] myph = PlotHelper(**kwargs) #myph.multiple_overlay() myph2 = PlotHelper() myph2.test_all()
def make_series_feature_plot(self, group=None): """Make series feature plot """ plabel = "feature_plot_group_%s" % str(group) self.readme_list.append(" %s\n" % plabel) pdict = dict() group_notelist = list() if not (self.plot_filter_out is None): group_notelist.append("Data not displayed:") for (feature, symbol, threshold) in self.plot_filter_out: group_notelist.append(" %s %s %s" % (feature, symbol, threshold)) testsets = list(self.sf_dict.keys()) testsets.sort() for testset in testsets: ts_sf_td = self.sf_dict[testset].testing_dataset gfeat = self.training_dataset.grouping_feature if group is None: if ts_sf_td.target_data is None: measured = None else: measured = ts_sf_td.target_data feature_data = ts_sf_td.data[self.feature_plot_feature] predicted = ts_sf_td.target_prediction else: if ts_sf_td.target_data is None: measured = None else: measured = ts_sf_td.target_data[ts_sf_td.data[gfeat] == group] feature_data = ts_sf_td.data[self.feature_plot_feature][ ts_sf_td.data[gfeat] == group] predicted = ts_sf_td.target_prediction[ts_sf_td.data[gfeat] == group] if measured is None: pass elif len(measured) == 0: pass else: series_label = "%s measured" % testset pdict[series_label] = dict() pdict[series_label]['xdata'] = feature_data pdict[series_label]['xerrdata'] = None pdict[series_label]['ydata'] = measured if len(predicted) == 0: pass else: series_label = "%s predicted" % testset pdict[series_label] = dict() pdict[series_label]['xdata'] = feature_data pdict[series_label]['xerrdata'] = None pdict[series_label]['ydata'] = predicted series_list = list(pdict.keys()) if len(series_list) == 0: logging.info("No series for plot.") return addl_kwargs = dict() addl_kwargs['guideline'] = 0 addl_kwargs['save_path'] = os.path.join(self.save_path, plabel) addl_kwargs['xlabel'] = self.feature_plot_xlabel addl_kwargs['ylabel'] = self.feature_plot_ylabel addl_kwargs['markers'] = self.markers addl_kwargs['outlines'] = self.outlines addl_kwargs['linestyles'] = self.linestyles addl_kwargs['legendloc'] = self.legendloc addl_kwargs['sizes'] = self.sizes faces = list() for fidx in range(0, len(self.markers)): faces.append("None") addl_kwargs['faces'] = faces addl_kwargs['group_dict'] = pdict addl_kwargs['plotlabel'] = plabel addl_kwargs['outlying_groups'] = series_list addl_kwargs['notelist'] = list(group_notelist) myph = PlotHelper(**addl_kwargs) myph.plot_group_splits_with_outliers() return