示例#1
0
def plot_feats (df, cols,target, hue):
    """method for plotting relationship of target with high correlated variables
    using or not a hue
    cols = list of features to plot"""
    
    if hue in cols:
        cols.remove(hue)
    if target in cols:
        cols.remove(target)
    sns.reset_defaults()
    sns.set(style="ticks", color_codes=True)
#     fig = plt.figure(figsize = (15,10))
    sns.set(font_scale= 1.0)
    if hue == None:
        fig_s = plt.figure(figsize = (15,25))
        for i, c in enumerate(cols):
            fig_i= fig_s.add_subplot(420 + i + 1)
            sns.scatterplot(x = df[c], y = df[target], palette= 'Spectral')
            plt.show()
    else:
        #  Box plot hue/target
        fig =  plt.figure(figsize = (15,10))
        fig_1 = fig.add_subplot(221)
        sns.boxplot(x=hue, y=target, data=df[[target, hue]])
        plt.show()

        fig_s = plt.figure(figsize = (15,25))
        for i, c in enumerate(cols):
            fig_i= fig_s.add_subplot(420 + i + 1)
            sns.scatterplot(x = df[c], y = df[target], hue=df[hue], palette= 'Spectral')
            plt.show()
def ramachandran_plot(atomgroup,
                      selection,
                      outputfile1,
                      outputfile2,
                      image_format='png'):
    # plot standard mdanalysis and seaborn 2D with kde
    R = Ramachandran(atomgroup).run()
    fig, ax = plt.subplots(figsize=plt.figaspect(1))
    R.plot(ax=ax, color='k', marker='.', ref=True)

    a = R.angles.reshape(np.prod(R.angles.shape[:2]), 2)
    # open hdf file
    with h5py.File(args.o_data1, 'a') as f:
        setname = "%s" % (selection)
        f["/" + setname + "/ramachandran/phi"] = a[:, 0]
        f["/" + setname + "/ramachandran/psi"] = a[:, 1]
    plt.tight_layout()
    # svg is better but sticking with png for now
    plt.savefig(outputfile1, format=image_format)

    sns.reset_defaults()
    importlib.reload(plt)
    importlib.reload(sns)
    with sns.axes_style("white"):
        h = sns.jointplot(x=a[:, 0], y=a[:, 1], kind="kde", space=0)
        h.set_axis_labels(r'$\phi$ (deg)', r'$\psi$ (deg)')
        h.ax_joint.set_xlim(-180, 180)
        h.ax_joint.set_ylim(-180, 180)
        h.ax_joint.xaxis.set_major_locator(ticker.MultipleLocator(60))
        h.ax_joint.yaxis.set_major_locator(ticker.MultipleLocator(60))
        plt.savefig(outputfile2, format=image_format, bbox_inches='tight')
示例#3
0
    def plot_forecasts(self, series, forecasts, test):
        n_test = test.shape[0]+2
        sns.set()
        # plot the entire dataset in blue
        warnings.filterwarnings("ignore")
        plt.figure(0,figsize=[12,6])
        plt.plot(series.values, label='True time-series')

        # if self._n_seq == 1:
        # plot the forecasts
        for i in range(len(forecasts)):
            off_s = len(series) - n_test + i
            off_e = off_s + len(forecasts[i])
            xaxis = [x for x in range(off_s, off_e)]
            if i==0:
                lbs = 'Forecast + uncertainty score (std)'
            else:
                lbs = None
            if self._n_seq>1:
                plt.errorbar(x=xaxis, y=forecasts[i], yerr=self._stds[i], linestyle='None', marker='^', color='r', label=lbs)
            else:
                plt.errorbar(x=xaxis, y=forecasts[i], yerr=self._stds[i], linestyle='None', marker='^',
                 color='r',label=lbs)
        plt.legend()
        # show the plot
        plt.title('Forecasting in testing set of time-series')
        plt.xlabel('timestep')
        plt.ylabel('Value')
        plt.show()
        sns.reset_defaults()
示例#4
0
 def setsea(self):
     if self.ch.isChecked():
         sns.set()
     else:
         sns.reset_defaults()
     
     plt.style.use(self.cb.currentText())
     self.setGraph()
    def make_fuzziness_histo(self, distance_list_series, plot_name):
        sns.set(style="darkgrid")
        sns.set_color_codes()
        sns.distplot(distance_list_series.dropna(), norm_hist=True, color="r")

        plt.savefig(plot_name)
        sns.reset_defaults()
        sns.reset_orig()
        plt.clf()
def plotMu(mus, cantonPop):
    sns.reset_defaults()
    sns.set(rc={"figure.figsize": (7, 5)}, style="white")  # nicer layout
    ax = sns.histplot(mus, kde=False)
    ax.set(xlabel="mu", ylabel="count", title="Mus for all Cantons")
    sns.despine()

    ax = sns.scatterplot(x=mus, y=cantonPop)
    ax.set(xlabel="mu", ylabel="canton population", title="Canton Population vs Mu")
    sns.despine()
    plt.show()
示例#7
0
def hist(data, bins=10, spacing=True, axis="on"):
    import matplotlib.pyplot as plt
    if spacing:
        import seaborn as sns
        sns.set(rc={'figure.figsize': (11.7, 8.27)})
    fig, axs = plt.subplots(1, 1, sharey=True)
    # We can set the number of bins with the `bins` kwarg
    plt.axis(axis)
    axs.hist(data, bins=bins)
    if spacing:
        sns.reset_defaults()
示例#8
0
 def _report_pipeline_set_confusion_matrix(self, pipeline_name: str,
                                           perf: ClassificationPerformance,
                                           set_: Set):
     sns.reset_defaults()
     cm = ConfusionMatrixDisplay(
         confusion_matrix(perf.labels,
                          perf.predictions,
                          labels=perf.unique_labels),
         display_labels=perf.unique_labels,
     )
     cm.plot(cmap=plt.cm.Blues, values_format=".4g")
     self._mf.figure(cm.figure_, f"{pipeline_name}_{set_}_cm.png")
     sns.set()
def plot_resid(resid, resid_test, folder_path):
    '''
    creates plots of mean and standard devations of training and testing
    
    Parameters
    ----------
    resid: array of observations - gmpe predictions for training data
    resid_test: array of observations - gmpe predictions for testing data
    folder_path: path for saving png files

    Returns
    -------
    creates pngs of standard deviation of residuals and average of residuals

    '''
    import numpy as np
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    import seaborn as sns
    sns.set(style="ticks", color_codes=True)
    sns.reset_defaults()
    sns.set_style('whitegrid')
    sns.set_context('talk')
    sns.set_context(context='talk',font_scale=0.7)
    
    period=[10,7.5,5,4,3,2,1,0.5,0.2,0.1]

    diff=np.std(resid,axis=0)
    difftest=np.std(resid_test,axis=0)
    f22=plt.figure('Difference Std of residuals vs Period')
    plt.semilogx(period,diff,label='Training ')
    plt.semilogx(period,difftest,label='Testing')
    plt.xlabel('Period')
    plt.ylabel('Total Standard Deviation')
    plt.legend()
    plt.ylim(.25,.85)
    plt.savefig(folder_path + 'resid_T.png')
    plt.show()
    
    diffmean=np.mean(resid,axis=0)
    diffmeantest=np.mean(resid_test,axis=0)
    f22=plt.figure('Difference Std of residuals vs Period')
    plt.semilogx(period,diffmean,label='Training')
    plt.semilogx(period,diffmeantest,label='Testing')
    plt.xlabel('Period')
    plt.ylabel('Mean residual')
    plt.legend()
    plt.savefig(folder_path + 'mean_T.png')
    plt.show()
    plt.close('all')
def target_correlation_plot(dframe):
    """
    It plots a bar graph between target column and correlation values of all other dimensions with the target column.
    This visualization is chosen because even the processed dataframe for the problem contains 518 feature columns.
    So many plots like pairplot, correlation matrix plot, etc would become very huge and impossible to render.

    :param dframe: dataframe to visualize
    :return: an object of seaborn figure
    """
    sns.set(rc={'figure.figsize': (7, 100)})
    sns.set(font_scale=0.6)
    figure = sns.barplot(dframe.corr()[constants.RESULT_COLUMN_NAME],
                         preprocess.get_headers(dframe)).get_figure()
    sns.reset_defaults()
    return figure
示例#11
0
def plot_plant_metrics(metrics: pd.DataFrame,
                       sens_vars: Collection[str],
                       act_vars: Collection[str],
                       out_path: str,
                       fname_prefix: str = ''):
    out_path = Path(out_path)
    assert out_path.is_dir()

    metrics = metrics.copy()

    # use relative timestamp
    metrics['timestamp'] = metrics['timestamp'] - metrics['timestamp'].min()

    sns.set_theme(context='paper', palette='Dark2')
    with sns.color_palette('Dark2', len(metrics.columns)) as colors:
        colors = iter(colors)
        # sensor readings
        fig, ax = plt.subplots(nrows=len(sens_vars),
                               sharex='all',
                               squeeze=False)
        for ax, var in zip(ax, sens_vars):
            __plot_raw_proc_values(df=metrics,
                                   var_name=var,
                                   prefix='sens_',
                                   colors=colors,
                                   proc_label='Sensor Reading',
                                   ax=ax.item())
        fig.suptitle('Monitored values & sensor readings')
        fig.tight_layout()
        fig.savefig(out_path / f'{fname_prefix}_sensors.png')

        # actuator outputs
        fig, ax = plt.subplots(nrows=len(act_vars),
                               sharex='all',
                               squeeze=False)
        for ax, var in zip(ax, act_vars):
            __plot_raw_proc_values(df=metrics,
                                   var_name=var,
                                   prefix='act_',
                                   colors=colors,
                                   proc_label='Actuator Output',
                                   ax=ax.item())
        fig.suptitle('Actuated values & actuator outputs')
        fig.tight_layout()
        fig.savefig(out_path / f'{fname_prefix}_actuators.png')

    plt.close('all')
    sns.reset_defaults()
def plot_rawinputs(x_raw, mean_x_allT, y, feature_names, period, folder_path):
    '''
    plots model predictions vs. raw (untransformed) input features
    
    Parameters
    ----------
    x_raw: numpy array of untransformed data
    mean_x_test_allT: 2d array of model predictions for data
    y: 2d array numpy array of targets
    feature_names: array or list of feature names
    period: list of periods
    folder_path: path for saving png files
    
    Returns
    -------
    creates png scatterplots of predicted ground motions vs. each input feature (before transformation)
    '''
    
    import matplotlib.pyplot as plt
    import numpy as np
    import os
    import seaborn as sns
    sns.set(style="ticks", color_codes=True)
    sns.reset_defaults()
    sns.set_style('whitegrid')
    sns.set_context('talk')
    sns.set_context(context='talk',font_scale=0.7)

    folderlist = ['T10s','T7_5s','T5s','T4s','T3s','T2s','T1s','T_5s','T_2s','T_1s']
    for j in range(len(period)):
        mean_x_test = mean_x_allT[:,j:j+1].flatten()
        if not os.path.exists(folder_path + folderlist[j]):
            os.makedirs(folder_path + folderlist[j])
        for i in range(len(x_raw[0])):
            fig, axes = plt.subplots(2,1,figsize=(10,8))
            plt.title('T = ' + str(period[j]) + ' s')
            ylim = max(np.abs(y[:,j]))
            axes[0].set_ylim(-1*ylim,ylim)
            axes[1].set_ylim(-1*ylim,ylim)
            axes[0].scatter(x_raw[:,i], mean_x_test,s=1, label='predictions', color='blue')
            axes[1].scatter(x_raw[:,i], y[:,j], s=1, label='targets', color='green')
            axes[1].set_xlabel(feature_names[i])
            axes[0].set_ylabel('prediction')
            axes[1].set_ylabel('target')
            axes[0].legend(loc = 'upper left')
            axes[1].legend(loc = 'upper left')
            plt.savefig(folder_path + folderlist[j] + '/predictions_vs_' + feature_names[i] + '.png')
            plt.show()
示例#13
0
def plot_controller_network_metrics(metrics: pd.DataFrame,
                                    out_path: str,
                                    fname_prefix: str = ''):
    out_path = Path(out_path)
    assert out_path.is_dir()

    # plot processing time distributions and rates
    metrics = metrics.copy()
    metrics['process_time'] *= 1000.0
    metrics['timestamp'] = \
        metrics['recv_timestamp'] - metrics['recv_timestamp'].min()

    sns.set_theme(context='paper', palette='Dark2')
    with sns.color_palette('Dark2') as colors:
        colors = iter(colors)
        fig, ax = plt.subplots(nrows=2)

        sns.histplot(data=metrics,
                     x='process_time',
                     stat='density',
                     kde=True,
                     color=next(colors),
                     ax=ax[0])
        ax[0].set_title('Distribution of sample processing times.')
        ax[0].set_xlabel('Processing time (bins) [ms]')

        __plot_rate_per_time_unit(df=metrics,
                                  x='timestamp',
                                  timestamp='recv_timestamp',
                                  ax=ax[1],
                                  label='Receive rate',
                                  color=next(colors))

        __plot_rate_per_time_unit(df=metrics,
                                  x='timestamp',
                                  timestamp='send_timestamp',
                                  ax=ax[1],
                                  label='Send rate',
                                  color=next(colors))
        ax[1].set_xlabel('Time [s]')
        ax[1].set_ylabel('Packets / second')
        ax[1].set_title('Packet rates over time.')

        fig.tight_layout()
        fig.savefig(out_path / f'{fname_prefix}controller_metrics.png')

    plt.close('all')
    sns.reset_defaults()
def obs_pre(y_train, y_test, pre, pre_test, period, folder_path):
    '''
    creates scatterplots of observed ground motion residuals vs. model predicted ground motion data for training and testing data
    
    Parameters
    ----------
    y_train: 2d numpy array of observed ground motion residuals for training data
    y_test: 2d numpy array of observed ground motion residuals for testing data
    pre: numpy array of model predictions for training data
    pre_test: numpy array of model predictions for testing data
    period: list of periods 
    folder_path: path for saving png files
    
    Returns
    -------
    creates png scatterplots of observed ground motions vs. predicted for each period
    '''
    import numpy as np
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    import seaborn as sns
    sns.set(style="ticks", color_codes=True)
    sns.reset_defaults()
    sns.set_style('whitegrid')
    sns.set_context('talk')
    sns.set_context(context='talk',font_scale=0.7)
    
    for i in range(len(period)):
        T= period[i]
        y = pre.T[i]
        x = y_train.T[i]
        y_testplot = pre_test.T[i]
        x_test = y_test.T[i]
        plt.figure(figsize = (6,6))
        lim = np.max(np.asarray([abs(x), abs(y)]).flatten())
        plt.scatter(x,y,s=1,label='Training')
        plt.scatter(x_test,y_testplot,s=1,label='Testing')
        plt.xlabel('observed')
        plt.ylabel('predicted')
        plt.title('T ' + str(T) + ' s')
        plt.xlim(-1*lim, lim)
        plt.ylim(-1*lim, lim)
        plt.legend()
        plt.savefig(folder_path + 'obs_pre_T_' + str(T) + '.png')
        plt.show()
    plt.close('all')
示例#15
0
def delta_plot(delta_df,
               x,
               y,
               name,
               minmax=True,
               hline=[-0.5, 0.5],
               vline=[-3, 3]):
    xy_df = delta_df[delta_df['measure'] == x].melt('measure')
    y_df = delta_df[delta_df['measure'] == y].melt('measure')
    xy_df['value2'] = y_df['value'].values

    xy_df['type'] = xy_df['variable'].apply(lambda x: x.split('_')[0])
    xy_df['depth'] = xy_df['variable'].apply(
        lambda x: float(x.split('_')[1])).astype('float')
    xy_df['depth'] = xy_df['variable'].apply(
        lambda x: float(x.split('_')[1])).astype('float')

    sns.reset_defaults()

    #     unique_tags = xy_df['variable'].unique()
    #     p = sns.cubehelix_palette(len(unique_tags), light=.8, start=.5, rot=-.75)
    #     ax = sns.scatterplot(x='value', y='value2', hue='variable', style='type', palette=p, data=xy_df)

    ax = sns.scatterplot(x='value',
                         y='value2',
                         hue='depth',
                         style='type',
                         legend='brief',
                         data=xy_df)

    ax.set_xlabel('delta ' + x.replace('_vec', ''))
    ax.set_ylabel('delta ' + y.replace('_vec', ''))
    if minmax:
        ax.hlines(0, xy_df['value'].min() - .01, xy_df['value'].max() + .01)
        ax.set_ylim(xy_df['value'].min() - .01, xy_df['value'].max() + .01)
        ax.vlines(0, xy_df['value2'].min() - .01, xy_df['value2'].max() + .01)
        ax.set_ylim(xy_df['value2'].min() - .01, xy_df['value2'].max() + .01)
    else:
        ax.hlines(0, *hline)
        ax.set_xlim(*hline)
        ax.vlines(0, *vline)
        ax.set_ylim(*vline)
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    ax.set_title('{}, d{} vs d{}'.format(name, x, y))
    return xy_df, ax
def plotR0(R0: np.ndarray, cantonNames: list, ax=None):
    sns.reset_defaults()
    sns.set(rc={"figure.figsize": (7, 5)}, style="white")  # nicer layout
    if ax is not None:
        ax.set(ylim=(0, 5))
        sns.lineplot(data=R0.T, ax=ax, legend=None, dashes=False, palette=colors)
    else:
        ax = sns.lineplot(data=R0.T, palette=colors, dashes=False)
        ax.set(ylim=(0, 5))
        ax.legend(
            cantonNames,
            frameon=False,
            bbox_to_anchor=(1.0, 1),
            loc="upper left",
            fontsize="xx-small",
        )
    ax.set(xlabel="day", ylabel="R0", title="R0 over time per Canton")
    ax.axhline(1, color="grey", dashes=[6, 2])
    sns.despine()
def plot_confusion_matrix(predicted_Y_loaded_model,
                          true_Y,
                          saving_path,
                          recall=0,
                          precision=0,
                          f1_score=0):
    predicted_Y_loaded_model = list(map(
        np.round,
        predicted_Y_loaded_model))  #tf.math.round(predicted_Y_loaded_model)
    cm = confusion_matrix(true_Y, predicted_Y_loaded_model)
    plt.figure(figsize=(5, 5))
    seaborn.heatmap(cm, annot=True, fmt="d")
    plt.title("recall:" + str(recall) + ", precision:" + str(precision) +
              ",f1_score:" + str(f1_score))
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')
    plt.savefig(saving_path)
    plt.clf()
    seaborn.reset_defaults()
示例#18
0
def plot_confusion_matrix(y_true,
                          y_pred,
                          labels=None,
                          title=None,
                          save_dir=None,
                          is_percentage=False,
                          is_show=False):
    """
    绘制混淆矩阵
    :param y_true: (list or numpy.array) 标签
    :param y_pred: (list or numpy.array) 预测值
    :param labels: (list) 标签列表, 默认是二分类, 即(0, 1)
    :param title: (str) 标题, 默认是None
    :param save_dir: (str) 保存目录, 默认是None
    :param is_percentage: (bool) 是否以百分比的形式, 默认是Fasle
    :param is_show: (bool) 是否展示, 默认是Fasle
    :return:
    """
    y_true = data_transform(y_true)
    y_pred = data_transform(y_pred)
    if title is None:
        title = "confusion matrix"

    matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=labels)
    if is_percentage:
        matrix = (matrix.T / np.sum(matrix, axis=1)).T
        fmt = ".4g"
    else:
        fmt = ".20g"
    plt.figure()
    sns.set()
    f, ax = plt.subplots()
    sns.heatmap(matrix, annot=True, ax=ax, fmt=fmt)
    ax.set_title(title)
    ax.set_xlabel("predict")
    ax.set_ylabel("true")

    if save_dir is not None:
        plt.savefig(os.path.join(save_dir, f"{title}.png"))
    if is_show:
        plt.show()
    plt.close()
    sns.reset_defaults()
示例#19
0
文件: speed.py 项目: triwahyuu/vortex
 def plot_time_and_percentile(results: dict, filename: str, unit: str):
     plt.cla()
     plt.clf()
     sns.set(style="whitegrid")
     fig, (ax1, ax2) = plt.subplots(2)
     plt.gcf().set_size_inches((6.4, 9.6))
     ax1.set_title("timedata")
     ax2.set_title("percentile")
     ax1.set(xlabel='iteration')
     ax1.set(ylabel='time ({})'.format(unit))
     ax2.set(xlabel='n-th percentile')
     ax2.set(ylabel='time ({})'.format(unit))
     p = np.arange(100)
     percentile, data = results['percentile'], results['data']
     median, mean = results['median'], results['mean']
     mean_x = np.abs(percentile - mean).argmin()
     p_90 = percentile[90]
     timedata = pd.DataFrame(data,
                             index=range(len(data)),
                             columns=["timedata"])
     sns.lineplot(data=timedata, palette="tab10", linewidth=0.15, ax=ax1)
     ax2.plot(p, percentile, label='percentile')
     ax2.scatter(50,
                 median,
                 linestyle=':',
                 label='median ({0:.2f})'.format(median))
     ax2.scatter(90,
                 p_90,
                 linestyle=':',
                 label='90-th percentile ({0:.2f})'.format(p_90))
     ax2.scatter(np.abs(percentile - mean).argmin(),
                 mean,
                 label='mean ({0:.2f})'.format(mean))
     ax2.legend()
     plt.autoscale()
     plt.tight_layout()
     plt.savefig(filename)
     plt.gcf().set_size_inches(
         (6.4, 4.8))  ## reset back to matplotlib default
     sns.reset_defaults()
示例#20
0
    def plot_forecasts(self, series, forecasts, test):
        n_test = test.shape[0] + 2
        sns.set()
        # plot the entire dataset in blue
        warnings.filterwarnings("ignore")
        plt.figure(0, figsize=[12, 6])
        plt.plot(series.values, label='True time-series')

        # if self._n_seq == 1:
        # plot the forecasts
        for i in range(len(forecasts)):
            off_s = len(series) - n_test + i
            off_e = off_s + len(forecasts[i])
            xaxis = [x for x in range(off_s, off_e)]
            if i == 0:
                lb = 'Forecasted time-series'
            else:
                lb = None
            if self._n_seq > 1:
                sns.lineplot(x=xaxis,
                             y=forecasts[i],
                             label=lb,
                             color='r',
                             hue_order=False)
            else:
                sns.scatterplot(x=xaxis,
                                y=forecasts[i],
                                label=lb,
                                color='r',
                                hue_order=False)
            #plt.plot(xaxis, forecasts[i], color='red',label='Forecasted time-series')
        # show the plot
        plt.title('Forecasting in testing set of time-series')
        plt.xlabel('timestep')
        plt.ylabel('Value')

        plt.show()
        sns.reset_defaults()
示例#21
0
    def plot(self, output_directory, *args, **kwargs):
        results = self.report()
        output_directory = Path(output_directory)
        output_directory = Path(output_directory)
        output_filename = output_directory / '{}.png'.format(self.name)
        plt.cla()
        plt.clf()
        sns.set(style="whitegrid")
        fig, (ax1, ax2) = plt.subplots(2)
        plt.gcf().set_size_inches((6.4, 9.6))
        cpu_data_array = np.asarray(self.cpu_percent_data)
        n_cpu = 1 if len(
            cpu_data_array.shape) == 1 else cpu_data_array.shape[-1]
        columns = ['cpu{}'.format(i)
                   for i in range(n_cpu)] if n_cpu > 1 else ['cpu']
        linewidth = 0.75
        cpu_data = pd.DataFrame(cpu_data_array, columns=columns)
        sns.lineplot(data=cpu_data,
                     palette="tab10",
                     linewidth=linewidth,
                     dashes=False,
                     ax=ax1)
        ax1.set(xlabel='time (x{0:.2f}s)'.format(self.dt),
                ylabel='Utilization (%)')
        ax1.set_title("CPU Utilization (%)")

        ax2.set_title("CPU Utilization (%)")
        ax2.boxplot(cpu_data_array, showfliers=False)
        plt.autoscale()
        plt.tight_layout()
        plt.savefig(output_filename)

        plt.gcf().set_size_inches(
            (6.4, 4.8))  ## reset back to matplotlib default
        sns.reset_defaults()

        return {'cpu_percent': output_filename}
    def make_density_histo(self):
        print('Making histogram...')
        mean_density = self.filtered_density_pandas['Density'].mean()
        min_val = mean_density - (3 * self.st_dev_density)
        max_val = mean_density + (3 * self.st_dev_density)

        all_density_list = self.filtered_density_pandas['Density'].to_list()
        # filtered to within 3 stdevs, as otherwise plot is a bit meh
        filtered_densities = [
            a for a in all_density_list if min_val < a < max_val
        ]
        filtered_density_series = pd.Series(data=filtered_densities,
                                            name="Points per sq m")

        sns.set(style="darkgrid")
        sns.set_color_codes()
        # plt.axes(xbound=(0, 100))
        sns.distplot(filtered_density_series.dropna(),
                     norm_hist=True,
                     color="r")
        plt.savefig(self.histo_out)
        sns.reset_defaults()
        sns.reset_orig()
        plt.clf()
示例#23
0
def createChartsForUnknown():
    sns.reset_defaults()
    plt.clf()
    print "\nCreating charts for Unknown...",
    sql = "SELECT Component, count(*) FROM Tests WHERE Author = 'UnKnown' and StreamId = " + str(
        streamid
    ) + " and Date BETWEEN '" + StartDate + "' AND '" + EndDate + "' group by Component;"
    c.execute(sql)
    data = c.fetchall()
    if len(data) == 0:
        print "No Test added prior to " + StartDate
    df2 = pd.DataFrame.from_records(data, columns=['Component', 'Count'])

    X = np.array(df2.Component)
    Y = np.array(df2.Count)

    size = np.shape(X)[0]

    sns.set_style("whitegrid")

    colors = sns.color_palette("cubehelix", len(df2.Component.unique()) + 5)

    # print size
    for i in range(size):
        g = sns.barplot(y=X[i:i + 1],
                        x=Y[i:i + 1],
                        color=colors[i],
                        order=X,
                        url=X[i] + '-unknown.html',
                        orient='h')
        g.text(Y[i] + 0.5, i, Y[i], color='black', ha="center", weight="bold")
        g.tick_params(labelsize=20)

    # sns.despine(left=True)

    plt.title("Tests added by Unknown Authors between " + StartDate + " and " +
              EndDate,
              fontsize=20,
              fontweight=0.5,
              color='Black')
    plt.savefig("Report/" + 'Unknown.svg', dpi=300, bbox_inches="tight")

    for i in range(size):

        sql = "SELECT Date, Component, Test, TestCase, File FROM Tests WHERE Author = 'UnKnown' and StreamId = " + str(
            streamid
        ) + " and Component='" + X[
            i] + "' AND Date BETWEEN '" + StartDate + "' AND '" + EndDate + "';"
        c.execute(sql)
        data = c.fetchall()
        if len(data) == 0:
            print "Unable to get data fo unknown test " + X[i]
        df2 = pd.DataFrame.from_records(
            data, columns=['Date', 'Component', 'Test', 'TestCase', 'File'])
        #     print df2.columns

        htmlString = '<table style="width: 50%;" border="3" cellpadding="20"><tbody><tr style="font-weight: bold; background-color: black; color: white;"><td>Index</td><td>Date</td><td>Component</td><td>Test</td><td>Test Case</td><td>File</td></tr>'

        for j, row in df2.iterrows():
            htmlString += "<tr><td>" + str(
                j + 1) + "</td>" + "<td>" + row.values[
                    0] + "</td>" + "<td>" + row.values[
                        1] + "</td>" + "<td>" + row.values[
                            2] + "</td>" + "<td>" + row.values[
                                3] + "</td>" + "<td>" + row.values[
                                    4] + "</td></tr>"
        htmlString += "</tbody></table>"
        html_file = open("Report/" + X[i] + '-unknown.html', 'w')
        html_file.write(htmlString)
        html_file.close()
        print ".",
示例#24
0
def plot_cv_results(in_cvresult, mname, save_fig=False):
    plt.close('all')
    fig, ax = plt.subplots(1, 1, figsize=[16, 9])

    # Extract scores of best alpha parameter and drop training scores
    in_cvresult = in_cvresult.copy()
    in_cvresult['best_alpha'] = ((in_cvresult.set_index('param_alpha').groupby(
        'param_encode')['mean_test_score'].transform('idxmax').rename(
            'best_alpha').reset_index(drop=True)))

    in_cvresult = (in_cvresult.query("param_alpha == best_alpha").drop(
        columns=[
            'param_alpha', 'best_alpha', 'mean_train_score', 'std_train_score'
        ]))

    for name_old, name_new in [('atchley_cluster', 'Atchley clust.'),
                               ('atchley', 'Atchley'), ('onehot', 'One-Hot'),
                               ('reduced_alphabet', 'Reduced Alphabet'),
                               ('word_embedding_cluster', 'Word2Vec Clust.'),
                               ('word_embedding', 'Word2Vec'),
                               ('elmo_embedding_summed', 'ELMo summed'),
                               ('elmo_embedding', 'ELMo')]:
        in_cvresult['param_encode'].replace(name_old, name_new, inplace=True)

    in_cvresult = in_cvresult.loc[[3, 0, 14, 13, 4, 6, 11, 8]]

    sns.set()
    sns.reset_defaults()

    plt.close('all')
    fig, ax = plt.subplots(figsize=[10, 7])
    ax: plt.Axes
    plt.plot('param_encode',
             'mean_test_score',
             'b.',
             markersize=25,
             data=in_cvresult)
    plt.errorbar('param_encode',
                 'mean_test_score',
                 'std_test_score',
                 linewidth=4,
                 data=in_cvresult,
                 capsize=10,
                 capthick=4)
    plt.xticks(rotation=10)
    ax.grid()
    ax.set_yticks(
        np.arange(round(ax.get_ylim()[0], 2),
                  round(ax.get_ylim()[1], 2) + 0.01, 0.01))
    ax.set_xlabel('Encoding')
    ax.set_ylabel('Accuracy')

    [
        ax.text(x - 0.4, y_, f"{s:.3f}") for x, y_, s in zip(
            range(len(in_cvresult['param_encode'])),
            in_cvresult.mean_test_score, in_cvresult.mean_test_score)
    ]

    if save_fig:
        fig.tight_layout()
        fig.savefig(f"paper/figures/CV_score_{mname}")

    else:
        plt.show()
def plot_fit_3D(fitted_model,
                column1,
                column2,
                data=None,
                points=100,
                scolor='C3',
                fcolor='C0',
                cicolor='C1',
                salpha=0.4,
                cialpha=0.2,
                cmap='Oranges',
                figsize=(12, 9),
                show_ci=True):
    """Produce 3D scatter plot and overlay fitted model surface.
    
    
    Make a 3D scatter plot of the response versus two specified predictors and
    overlay the fit result surface.  The distributions of the other predictors
    are marginalised out, ie. they are set to the mean values of their
    respective distributions.

    NOTE: This resets matplotlib graphics options to the defaults. 

    Returns the matplotlib figure and Axes3D objects.
    """

    model = fitted_model.model

    if data is None:
        data = pd.DataFrame(model.exog, columns=model.exog_names)

    marg = utils.marginalised_range((column1, column2), data, points=points)

    sns.reset_defaults()

    fig = plt.figure(figsize=figsize)
    ax = axes3d.Axes3D(fig)

    # prepare point grids from the ranges of the scatter plot
    xs = marg[column1]
    ys = marg[column2]
    xv, yv = np.meshgrid(xs, ys)
    zv = np.zeros((ys.size, xs.size))
    lv = np.zeros((ys.size, xs.size))
    uv = np.zeros((ys.size, xs.size))

    # compute predictions and CI bounds for the rows in the point grids
    for idx, y in enumerate(yv):
        marg[column2] = y
        pred = fitted_model.get_prediction(marg).summary_frame()
        zv[idx] = pred['mean']
        lv[idx] = pred['mean_ci_lower']
        uv[idx] = pred['mean_ci_upper']

    # 3D scatter plot of the raw data
    ax.scatter(data[column1], data[column2], model.endog, color=scolor)

    # plot the prediction & CI boundary surfaces
    ax.plot_surface(xv, yv, zv, alpha=salpha, color=fcolor)
    if show_ci:
        ax.plot_surface(xv, yv, lv, alpha=cialpha, color=cicolor)
        ax.plot_surface(xv, yv, uv, alpha=cialpha, color=cicolor)

        # add contour plot of the CI width to the bottom of the figure
        ax.contourf(xv,
                    yv,
                    uv - lv,
                    zdir='z',
                    offset=ax.get_zlim()[0],
                    levels=50,
                    antialiased=True,
                    alpha=cialpha * 2,
                    cmap=cmap)

    ax.set_xlabel(column1)
    ax.set_ylabel(column2)
    ax.set_zlabel(model.endog_names)
    try:
        fig.suptitle(f'Fit vs {column1} & {column2}\n{model.formula}')
    except AttributeError:
        fig.suptitle(f'Fit vs {column1} & {column2}')

    return fig, ax
@author: raulv
"""


import Windprof2 as wp
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sbn
import numpy as np
import pandas as pd
from matplotlib.gridspec import GridSpecFromSubplotSpec as gssp
from rv_utilities import add_colorbar, discrete_cmap
from datetime import datetime
import Meteoframes as mf

sbn.reset_defaults()


from matplotlib import rcParams
rcParams['xtick.major.pad'] = 3
rcParams['ytick.major.pad'] = 3
rcParams['xtick.labelsize'] = 15
rcParams['ytick.labelsize'] = 15
rcParams['axes.labelsize'] = 15
rcParams['legend.handletextpad'] = 0.1
rcParams['legend.handlelength'] = 1.
rcParams['legend.fontsize'] = 15
rcParams['mathtext.default'] = 'sf'

def cosd(array):
    return np.cos(np.radians(array))
示例#27
0
def sns_reset():
    """Call this function to toggle back to the sns plotting environment from the matplotlib environment."""
    sns.reset_defaults()
    sns.set_style("white")
    sns.set_style("ticks")
    sns.set_context("notebook")
示例#28
0
def create_ANN(x_train, y_train, x_test, y_test, feature_names, numlayers,
               units, epochs, transform_method, folder_pathmod):
    '''
    build, compiles, and fits ANN model
    saves trained model files with keras
    saves error figure and model details text file
    
    Parameters
    ----------
    x_train: 2d numpy array of transformed training data	
    y_train: 2d numpy array of training targets
    x_test: 2d numpy array of transformed testing data 
    y_test: 2d numpy array of testing targets
    feature_names: array or list of feature names
    numlayers: integer for number of layers
    units: list of hidden units per layer
    epochs: integer number of epochs
    transform_method: name of transformation method of model details file
    folder_pathmod: path for saving png files and model detail text file
    
    Returns
    -------
    resid_train: array of observations - gmpe predictions for training data
    resid_test: array of observations - gmpe predictions for testing data
    pre_train: 2d array of model predictions for training data
    pre_test: 2d array of model predictions for testing data
    '''
    import numpy as np
    import pandas as pd
    from keras.models import Sequential
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    from keras import layers
    from keras import optimizers
    import tensorflow.compat.v2 as tf
    tf.enable_v2_behavior()
    import seaborn as sns
    sns.set(style="ticks", color_codes=True)
    sns.reset_defaults()
    sns.set_style('whitegrid')
    sns.set_context('talk')
    sns.set_context(context='talk', font_scale=0.7)

    batch_size = 256

    def build_model():
        model = Sequential()
        model.add(
            layers.Dense(units[0],
                         activation='sigmoid',
                         input_shape=(x_train.shape[1], )))
        for i in range(1, numlayers):
            model.add(
                layers.Dense(units[i])
            )  #add sigmoid aciivation functio? (only alues betwen 0 and 1)
        model.add(
            layers.Dense(y_train.shape[1])
        )  #add sigmoid aciivation functio? (only alues betwen 0 and 1)
        model.compile(optimizer=optimizers.Adam(lr=2e-3),
                      loss='mse',
                      metrics=['mae', 'mse'])
        return model

    model = build_model()

    # fit the model
    history = model.fit(x_train,
                        y_train,
                        validation_data=(x_test, y_test),
                        epochs=epochs,
                        batch_size=batch_size,
                        verbose=1)

    model.save(folder_pathmod + 'model')

    mae_history = history.history['val_mae']
    mae_history_train = history.history['mae']
    test_mse_score, test_mae_score, tempp = model.evaluate(x_test, y_test)
    # dataframe for saving purposes
    hist_df = pd.DataFrame(history.history)

    f10 = plt.figure('Overfitting Test')
    plt.plot(mae_history_train, label='Training Data')
    plt.plot(mae_history, label='Testing Data')
    plt.xlabel('Epoch')
    plt.ylabel('Mean Absolute Error')
    plt.title('Overfitting Test')
    plt.legend()
    print(test_mae_score)
    plt.grid()
    plt.savefig(folder_pathmod + 'error.png')
    plt.show()

    pre_test = np.array(model.predict(x_test))
    pre_train = np.array(model.predict(x_train))

    # test data
    mean_x_test_allT = pre_test

    # training data
    mean_x_train_allT = pre_train

    resid_train = y_train - mean_x_train_allT
    resid_test = y_test - mean_x_test_allT

    diff = np.std(y_train - mean_x_train_allT, axis=0)
    difftest = np.std(y_test - mean_x_test_allT, axis=0)
    # write model details to a file
    file = open(folder_pathmod + 'model_details.txt', "w+")
    file.write('number training samples ' + str(len(x_train)) + '\n')
    file.write('number testing samples ' + str(len(x_test)) + '\n')
    file.write('data transformation method ' + str(transform_method) + '\n')
    file.write('input feature names ' + str(feature_names) + '\n')
    file.write('number of epochs ' + str(epochs) + '\n')
    model.summary(print_fn=lambda x: file.write(x + '\n'))
    file.write('model fit history' + str(hist_df.to_string) + '\n')
    file.write('stddev train' + str(diff) + '\n')
    file.write('stddev test' + str(difftest) + '\n')
    file.close()

    # write predictions to a file
    period = [10, 7.5, 5, 4, 3, 2, 1, 0.5, 0.2, 0.1]
    cols = ['obs_' + str(period[i]) for i in range(len(period))
            ] + ['pre_' + str(period[i]) for i in range(len(period))]
    out = np.concatenate((y_train, pre_train), axis=1)
    df_out = pd.DataFrame(out, columns=cols)
    df_out.to_csv(folder_pathmod + 'train_obs_pre.csv')

    period = [10, 7.5, 5, 4, 3, 2, 1, 0.5, 0.2, 0.1]
    cols = ['obs_' + str(period[i]) for i in range(len(period))
            ] + ['pre_' + str(period[i]) for i in range(len(period))]
    out = np.concatenate((y_test, pre_test), axis=1)
    df_out = pd.DataFrame(out, columns=cols)
    df_out.to_csv(folder_pathmod + 'test_obs_pre.csv')

    return resid_train, resid_test, pre_train, pre_test
# Plot number of reviews for companies
company = data["company"].value_counts()
# brands.count()
plt.figure(figsize=(12,8))
company.plot(kind='bar')
plt.title("Number of Reviews for the 6 Companies")

"""We can see here that amazon has the most number of reviews in the data set , while Netflix hs the least number of reviews submitted."""

# Plot distribution of ratings for each catefgory of ratings given to companies
rating_cols = ["overall-ratings", "work-balance-stars", "culture-values-stars",
       "carrer-opportunities-stars", "comp-benefit-stars",
       "senior-mangemnet-stars"]

sns.reset_defaults()

xcol = "company"
xlabel = "Company"
ylabel = "Count"
title = "Vote Count Per Company"
nrows = 3
ncols = 2
sns.countplot(x=data[xcol], data=data)
#plt.subplot(nrows,ncols, i+1)              
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.plot()

feature_count = len(rating_cols)
示例#30
0
def getVariantRatioTabInFamily(d, max_ratio_cutoff=0.1, mean_read_cutoff=0, draw_fig=False):
    '''
    Some post-ranscriptional modifications on tRNAs will result in mismatches in NGS data
    The function 
        1)Generates a tsv file containing details about mismatch information for tRNA families across samples.
        2)Create bar charts for the distribution of mutation ratio for each sample.
        3)Creates mismatch ratio matrix for tRNA family (mutation location vs samples).
    @param d:  The data object generated by data_loader.py
    @param tRNA_families: a list of the name of tRNA families which will show in the matrixes, if it 
                          empty all avaliable tRNA families will be shown in the matrix
    @param max_ratio_cutoff: only mismatch sites with mismatch ratio above the cutoff in at 
                            least one sample will be repoted
    @param mean_read_cutoff: The minimal of mean read number across samples to 
                            filter out sites with very few reads
    @param draw_fig: whether draw bar charts for distribution of mutation ratio for each sample
    @param test: If test is true, only draw three matrixes for testing.
    @return: None
    '''
    fv = pd.DataFrame(
        columns=['sample', 'family', 'loc', 'RNA_IDs', 'mem_num', 'members', 'ref', 'muts', 'mut_reads', 'total_reads',
                 'ratio'])
    v = pd.read_csv(d["variants"], sep="\t")
    # Combine mutations for each tRNA, here we just sum the mutation reads and keep total_reads not change.
    gv = v.groupby(['#SampleID', 'family', 'tRNA_ID', 'loc', 'ref']).sum()
    gv['mut_reads'] = v.groupby(['#SampleID', 'family', 'tRNA_ID', 'loc', 'ref'])['mut_reads'].sum()
    gv['total_reads'] = v.groupby(['#SampleID', 'family', 'tRNA_ID', 'loc', 'ref'])['total_reads'].mean()
    gv['muts'] = v.groupby(['#SampleID', 'family', 'tRNA_ID', 'loc', 'ref'])['mut'].apply(','.join)
    gv['mut_num'] = v.groupby(['#SampleID', 'family', 'tRNA_ID', 'loc', 'ref'])['mut_reads'].apply(
        lambda x: ','.join(x.astype(int).astype(str)))
    gv = gv.reset_index()
    # print(gv)
    # Combine mutations for each tRNA family, here we just sum both the mutation reads and total_reads not change.
    fv = gv.groupby(['#SampleID', 'family', 'loc', 'ref']).sum()
    fv['mut_reads'] = gv.groupby(['#SampleID', 'family', 'loc', 'ref'])['mut_reads'].sum()
    fv['total_reads'] = gv.groupby(['#SampleID', 'family', 'loc', 'ref'])['total_reads'].sum()
    fv['ratio'] = fv['mut_reads'] / fv['total_reads']
    fv['muts'] = gv.groupby(['#SampleID', 'family', 'loc', 'ref'])['muts'].apply(','.join)
    fv['mut_num'] = gv.groupby(['#SampleID', 'family', 'loc', 'ref'])['mut_num'].apply(','.join)
    fv['tRNA_IDs'] = gv.groupby(['#SampleID', 'family', 'loc', 'ref'])['tRNA_ID'].apply(','.join)
    fv['tRNA_num'] = gv.groupby(['#SampleID', 'family', 'loc', 'ref'])['tRNA_ID'].count()
    fv['uni_reads'] = fv['mut_num'].apply(lambda x: len(set(x.split(','))))
    fv = fv.reset_index()

    # Explain for transform https://pbpython.com/pandas_transform.html#:~:text=Understanding%20the%20Transform%20Function%20in%20Pandas%201%20Introduction.,...%204%20Second%20Approach%20-%20Using%20Transform.%20
    fv['ratio_max'] = fv.groupby(['family', 'loc', 'ref'])['ratio'].transform('max')
    fv['mut_read_mean'] = fv.groupby(['family', 'loc', 'ref'])['mut_reads'].transform('mean')

    # Delete -1 rows
    fv = fv.loc[fv['loc'] >= 0]
    # Filter matrix
    fv = fv.loc[fv['ratio_max'] >= max_ratio_cutoff][fv['mut_read_mean'] >= mean_read_cutoff]
    # Add sample discription
    fv['SampleDes'] = fv['#SampleID'].apply(getSampleDes, d=d)
    # Draw mutation matrix

    print("Download tsv here:")
    dl.csv_download_link(fv, 'family_mut.tsv', delete_prompt=False)
    if draw_fig:
        sns.reset_defaults()
        g = sns.FacetGrid(fv, row="SampleDes", height=1.7, aspect=4)
        g.map(sns.distplot, 'ratio', kde=False, bins=20)
        axes = g.axes.flatten()
        index =0
        for ax in axes:
            #ax.set_title(fv['SampleDes'][index])
            #ax.set_xlabel('Mismatch Ratio')
            ax.set_ylabel('Site Number')
            index+=1
        #g.ax_joint.set(xlabel="Ratio", ylabel="Numbers")
        plt.xlim(0, 1)
        plt.figure()
        g = sns.FacetGrid(fv, row="SampleDes", height=1.7, aspect=4)
        g.map(sns.distplot, 'loc', kde=False, bins=75)
        #g.ax_joint.set(xlabel="Mismatch Locations", ylabel="Numbers")
        axes = g.axes.flatten()
        index =0
        for ax in axes:
            #ax.set_title(fv['SampleDes'][index])
            #ax.set_xlabel('Mutation Locations')
            ax.set_ylabel('Site Number')
            index+=1
        plt.xlim(0, 75)
        plt.show()
    return fv
    def save_metrics(self, output_directory):
        results = pd.DataFrame(self.results)
        n_classes = len(results['truths'])

        ## compute confusion matrix
        y_true_flat, y_pred_flat = [], []
        for i in range(n_classes):
            y_true_flat.extend(results['truths'][i])
            y_pred_flat.extend(results['predictions'][i])
        cm = confusion_matrix(
            y_true=y_true_flat,
            y_pred=y_pred_flat,
        )
        cm = cm / cm.sum(axis=1, keepdims=True)
        df_cm = pd.DataFrame(cm, range(cm.shape[0]), range(cm.shape[1]))

        to_list = lambda mapping: list(mapping[i] for i in range(len(mapping)))
        truths = np.array(to_list(results['truths']))
        predictions = np.array(to_list(results['predictions']))
        scores = np.array(to_list(results['scores']))

        ## truths, predictions, scores are mappings
        ## truths : class_label -> class_label,
        ## predictions : class_label -> prediction,
        ## scores : class_label -> score,
        ## each size is (n_classes, n_samples)
        average_precisions, precisions, recalls = [], [], []
        roc_aucs, fprs, tprs = [], [], []
        for class_truths, class_predictions, class_scores in zip(
                truths, predictions, scores):
            n_samples = len(class_predictions)
            scores_mat = np.zeros((n_samples, n_classes))
            truths_mat = np.zeros_like(scores_mat)
            ## one hot encoding, fill with scores and label
            scores_mat[np.arange(n_samples), class_predictions] = class_scores
            truths_mat[np.arange(n_samples), class_truths] = 1

            average_precisions.append(
                average_precision_score(truths_mat.flatten(),
                                        scores_mat.flatten()))
            precision, recall, _ = precision_recall_curve(
                truths_mat.flatten(),
                scores_mat.flatten(),
            )
            precisions.append(precision)
            recalls.append(recall)

            roc_aucs.append(
                roc_auc_score(truths_mat.flatten(), scores_mat.flatten()))
            fpr, tpr, _ = roc_curve(truths_mat.flatten(), scores_mat.flatten())
            fprs.append(fpr)
            tprs.append(tpr)

        assets = {}

        ## plot confusion matrix
        plt.clf()
        plt.cla()
        plt.gcf().set_size_inches((6.4, 4.8))
        ax = plt.gca()
        sn.set(font_scale=1.4)  # for label size
        sn.heatmap(
            df_cm,
            annot=True,
            ax=ax,
            annot_kws={"size": 10}  # font size
        )
        plt.autoscale()
        plt.tight_layout()
        filename = self.output_directory / '{}_{}.png'.format(
            self.experiment_name, self.predictor_name)
        plt.savefig(filename)
        sn.reset_defaults()
        assets.update({
            'Confusion Matrix': filename,
        })

        ## plot pr curve
        plt.clf()
        plt.cla()
        ax = plt.gca()
        plt.gcf().set_size_inches((6.4, 4.8))
        lines, labels = [], []
        colors = cycle(
            ['navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal'])
        for i, (precision, recall, ap, color) in enumerate(
                zip(precisions, recalls, average_precisions, colors)):
            l, = ax.plot(recall, precision, color=color)
            class_name = 'class_{}'.format(
                i) if self.class_names is None else self.class_names[i]
            label = '{} (ap :{:.2f}'.format(class_name, ap)
            lines.append(l)
            labels.append(label)
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])
        plt.grid()
        plt.xlabel('recall')
        plt.ylabel('precision')
        plt.title("Precision Recall Curve")
        plt.legend(lines,
                   labels,
                   loc='center left',
                   prop=dict(size=8),
                   bbox_to_anchor=(1., 0.5))
        plt.autoscale()
        plt.tight_layout()
        filename = self.output_directory / '{}_{}_pr_curve.png'.format(
            self.experiment_name, self.predictor_name)
        plt.savefig(filename)
        assets.update({
            'Precision Recall': filename,
        })

        ## plot roc auc curve
        plt.clf()
        plt.cla()
        ax = plt.gca()
        plt.gcf().set_size_inches((6.4, 4.8))
        lines, labels = [], []
        for i, (fpr, tpr, auc,
                color) in enumerate(zip(fprs, tprs, roc_aucs, colors)):
            l, = ax.plot(fpr, tpr, color=color)
            class_name = 'class_{}'.format(
                i) if self.class_names is None else self.class_names[i]
            label = '{} (auc :{:.2f})'.format(class_name, auc)
            lines.append(l)
            labels.append(label)
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])
        plt.grid()
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic (ROC) Curve')
        plt.legend(lines,
                   labels,
                   loc='center left',
                   prop=dict(size=8),
                   bbox_to_anchor=(1., 0.5))
        plt.autoscale()
        plt.tight_layout()
        filename = self.output_directory / '{}_{}_roc_curve.png'.format(
            self.experiment_name, self.predictor_name)
        plt.savefig(filename)
        assets.update({
            'ROC Curve': filename,
        })

        return assets