def eval_model_ensemble(models,x,y_ref=None,is_class=False,verbose=False):
    """Evaluate ensemble of models.
    
    Parameters
    ----------
    models : single or iterable set of scikit-learn model instances
        model(s) to be evaluated
        
    x : numpy.array
        model inputs with m-row observations and n-column features
        
    y_ref : numpy.array (Default value = None)
        reference target output for observations in X
        
    is_class: bool (Default value = False)
        indication if classification problem (only needed when Y!=None)
        
    Returns
    -------
    model output : numpy array of len(X)
        if Y!=None: list of length 2 [mean error to reference Y, model outputs]
    
    """
    
    # model evaluation
    if len(np.array(x).shape)==1: # single observation input
        if data_func.is_iterable(models)==False: # single model
            y_pred = models.predict(x)
        else:
            y_pred = np.zeros(len(models))  # multiple models
            for m,mo in enumerate(models):
                y_pred[m] = mo.predict(x)
    elif len(np.array(x).shape)==2:
        if data_func.is_iterable(models)==False: # single model
            y_pred = models.predict(x)
        else:
            y_pred = np.zeros((len(x),len(models))) 
            for m,mo in enumerate(models):
                y_pred[:,m] = mo.predict(x)
    else:
        raise ValueError('Feature imput dimension greater than 2.')
            
    # error evaluation
    if y_ref==None:
        return y_pred
    else:
        if is_class==False: # regression problem
            y_err = np.mean(np.abs(y_pred-y_ref))
        else: # classification problem
            y_err = np.mean(np.abs(y_pred!=y_ref))
        if verbose==True:
            print '\nMean model error: {0}.'.format(np.round(y_err,2))
        return [y_pred, y_err]
def get_feat_importance(W, max_norm=100.):
    """max-normed feature importance and its variance across models.

    Parameters
    ----------
    W : numpy.array
        input data
        
    max_norm : float, (Default value = 100)
        max norm
        
    Returns
    -------
    impo : number or array
        max-normed mean values (axis=0)
        
    error : number or array
        max-normed sample standard deviation (axis=0)
        
    """
    
    # mean variable importance and its standard deviation as measure of variation
    mean_imp,imp_norm,imp_sd =\
        np.nanmean(W,0),max(np.nanmean(W,0)),np.nanstd(W,0,ddof=1)
    impo  = max_norm*mean_imp/imp_norm
    error = max_norm*imp_sd/imp_norm
    
    return np.array([impo,error])
示例#3
0
elif type(config.data_trafos) == dict:
    trafos = [
        config.data_trafos[name] for name in [config.target] + config.features
    ]
elif data_func.is_iterable(config.data_trafos) == True:
    trafos = [
        config.data_trafos[i]
        for i in range(len([config.target] + config.features))
    ]
else:
    raise ValueError('Invalid data transformation type.')

# 1-hot-encoding of categorical variables
for cat in config.categorical:
    # data trafos
    i_cat_trafo = int(np.where(np.array(config.features) == cat)[0])
    cat_trafo = trafos[i_cat_trafo]
    del trafos[i_cat_trafo]
    # get indicator frames
    cat_rawData = pat.dmatrix(cat, raw_data, return_type='dataframe').iloc[:,
                                                                           1:]
    # append columns
    for col in cat_rawData.columns:
        raw_data[col] = cat_rawData[col]
        config.features.append(col)
        trafos.append(cat_trafo)
    config.features.remove(cat)

#%% get transformed data
data_shifted = data_func.data_framer(data=raw_data.copy(),target=config.target,features=config.features,\
                                     index=config.time_var,start_i=config.start_time,end_i=config.end_time,\
def ML_train_tester(df,target,features,method,m_test=1,n_boot=500,is_class=False,is_zero_one=False,\
                   to_norm=None,CV_name=None,CV_value=None,counter_fact=False,\
                   horizon=None,save_out=False,save_models=True,file_name='',verbose=False):
    """Machine learning wrapper for bootstrapped training and testing.

    Parameters
    ----------
    df : pandas.DataFrame (input data)
        
    target : str
        LHS variable
        
    features : list of str
        RHS variable(s)
        
    method : str
        model
        
    m_test : int or index mask, optional (Default value = 1, "jackknife")
        size of test data or index max of test set. If mask, n_boot is set to 1
        
    n_boot : int, optional (Default value = 500)
        number of bootstraps
        
    is_class : bool, optional (Default value = False)
        if True, maps to integer output
        
    is_zero_one : bool, optional (Default value = True)
        if True, maps to Boolean output
        
    to_norm : list, optional (Default value = None)
        variables to norm (z-scores)
        
    CV_name : str, optional (Default value = None)
        name of cross-validation parameter
        
    CV_value : float, optional (Default value = None)
        value for cross-validation parameter
        
    counter_fact : bool, optional (Default value = False)
        if True, variable importance by leaving one feature out at a time
        
    horizon : int, optional (Default value = None)
        lead-lag size for projection model (only used for VAR)
        
    save_out : bool, optional (Default value = False)
        if True save output to file
        
    save_models : bool, optional (Default value = True)
        if True, include  models in output file (could use lots of space)

    file_name : str, optional (Default value = '')
        name of output file
    verbose : bool, optional (Default value = False)
        if True, print basic fit results to screen

    Returns
    -------
    dict, keyed by
        testPred : numpy.array 
            prediction on test set
            
        testErr : numpy.array
            test error over all bootstraps
            
        meanTestErr : float
            mean error over all bootstraps
            
        ID : str
            identifier
            
        y_test : numpy.array
            test target over all bootstraps
            
        weights : numpy.array
            feature importances
            
        testInd : numpy.array
            indix mask of test samples for each bootstrap
            
        trainErr : numpy.array
            training error over all bootstraps
        
    """
    
    # definitions and initialisations
    m, n_col = len(df), len(features)+1
    if data_func.is_iterable(m_test)==True:
        n_boot=1
    elif m_test==1: 
        n_boot=m # one fit for each observation
    if method=='VAR': 
        n_boot=m_test=1
    # empty fields for bootstrapped model output
    test_ref_Y,   test_pred_Y  = np.array([]), np.array([]) # test target values and out-of-sample predictions
    train_ref_Y,  train_pred_Y = np.array([]), np.array([]) # training target values and in-sample predictions
    train_error,  test_error   = np.array([]), np.array([]) # in and out-of-sample errors
    boot_errors,  models       = np.array([]), np.array([]) # mean bootstrap error and bootstrap models
    feat_weights, test_indices = np.zeros((n_boot,n_col-1)), np.zeros((n_boot,m)) # weights for feature importance, test_index over bootstraps
    
    # input data
    inputs = df.copy()
    if not to_norm==None: # normalise data (z-scores)
        for var in to_norm:
            if var in inputs.columns:
                vals        = inputs[var].values
                inputs[var] = (vals-vals.mean(0))/vals.std(0,ddof=1)
            else:
                raise ValueError("Norm error: Variable '{0}' not in dataframe.".format(var))
    
    # loop over bootstrapped samples
    for t in range(n_boot):
        # get training and testing data
        if data_func.is_iterable(m_test)==True:
            df_train, df_test = inputs[~m_test], inputs[m_test]
            test_indices[t,:] = m_test
        else:
            df_train, df_test, is_train = train_test_split(inputs,m_test=m_test,t=t) # random split
            test_indices[t,:]           = ~is_train
        # get values
        x_train, y_train = df_train[features].values, df_train[target].values
        x_test,  y_test  = df_test[features].values,  df_test[target].values
        
        # set learning methods
        if not method=='VAR': # VAR part of statsmodels library (treated differently)
            ML = model_selection(method,n_HN=n_col-1,CV_name=CV_name,CV_value=CV_value) # n_HN only used for neural network
                                                                            # (nNeurons=nFeatures in each layer)
        else: # can only be used with m_test==1
            input_data = inputs[[target]+features].values
            ML         = model_selection(method,input_data)
            y_train    = y_test = input_data[:,0]
            if CV_name==None: model = ML.fit(maxlags=1) # model fit, defaults VAR with one lag
            else:      exec('model = ML.fit('+CV_name+'='+str(CV_value)+')')
        
        # fit model and train/test predictions
        if method=='VAR': # fit at method selection step (CV_name needed)
            in_pred  = np.zeros(m)*np.nan
            for r in range(m):
                start_values = input_data[r,:]
                fcast        = model.forecast(start_values.reshape((1,len(features)+1)),horizon)[-1,0]
                if r+horizon<m:
                    in_pred[r+horizon]  = fcast
            out_pred = in_pred
        else:
            model_clone  = skl_base.clone(ML)
            model        = ML.fit(x_train,y_train) # model fit
            out_pred     = model.predict(x_test)
            in_pred      = model.predict(x_train)
        
        # get discrete class output & get bootstrap error
        if is_class==True: # target should be an integer
            if is_zero_one==True: # map to Boolean
                in_pred  = data_func.to_zero_one(in_pred).astype(bool)
                out_pred = data_func.to_zero_one(out_pred).astype(bool)
            else: # map to integer
                in_pred  = np.round(in_pred).astype(int)
                out_pred = np.round(out_pred).astype(int)
            boot_errors = np.hstack((boot_errors,np.mean(out_pred!=y_test)))
        else:
            if method=='VAR':
                boot_errors = np.nanmean(np.abs(out_pred-y_test))
            else:
                boot_errors = np.hstack((boot_errors,np.mean(np.abs(out_pred-y_test))))
        models = np.hstack((models,model)) # store model
        
        # feature importance
        if counter_fact==False:
            if method in ['Tree-rgr','Tree-clf','Forest-rgr','Forest-clf']:
                feat_weights[t] = model.feature_importances_
        # feature importance through "counter_factual" analysis (leave one variable out and compare)
        elif counter_fact==True: # may slow things down
            for f,feat in enumerate(features):
                model_clone_II = skl_base.clone(model_clone)
                temp_features = list(features)
                temp_features.remove(feat)
                # get training and testing data
                x_train, x_test = df_train[temp_features].values, df_test[temp_features].values
                temp_model      = model_clone_II.fit(x_train,y_train)
                temp_pred       = temp_model.predict(x_test)
                if is_class==True:
                    feat_weights[t,f] = np.mean(temp_pred!=y_test)
                else:
                    feat_weights[t,f] = np.mean(np.abs(temp_pred-y_test))
        # train Ys
        train_pred_Y = np.hstack((train_pred_Y, in_pred))
        train_ref_Y  = np.hstack((train_ref_Y,  y_train))
        # test Ys
        test_pred_Y  = np.hstack((test_pred_Y,  out_pred))
        test_ref_Y   = np.hstack((test_ref_Y,   y_test))
    
    # get errors    
    if is_class==True:
        train_error  = np.mean(train_pred_Y!=train_ref_Y)
        test_error   = np.mean(test_pred_Y!=test_ref_Y)
    else:
        train_error  = np.mean(np.abs(train_pred_Y-train_ref_Y))
        test_error   = np.mean(np.abs(test_pred_Y-test_ref_Y))
    
    # verbose
    ID = target+'-'+method+'-'+str(m_test)+'-'+str(n_boot)
    if verbose==True:
        print '\nTraining Summary'
        print 'ID:',ID
        print '\tin-sample error:',round(train_error,3)
        print '\tout-of-sample error:',round(test_error,3)
        print '\terror variance:',round(np.std(boot_errors,ddof=1),3)
        print '\terror signal-to-noise:',
        print round(test_error/np.std(boot_errors,ddof=1),3)
    
    # package output
    out_dict = {'ID' : ID,\
                'mean_train_err' : train_error,  'mean_test_err' : test_error,\
                'train_pred_Y'   : train_pred_Y, 'test_pred_Y'   : test_pred_Y,\
                'train_ref_Y'    : train_ref_Y,  'test_ref_Y'    : test_ref_Y,\
                'feat_weights'   : feat_weights, 'test_ind'      : test_indices}
    if save_models==True:
        out_dict['models']=np.array(models)
    if save_out==True:
        pk.dump(out_dict,open(file_name,'wb'))
    if save_models==False: # if not saved, keep models in temp (full) output
        out_dict['models']=np.array(models)
    
    # return output dictionary
    return out_dict
示例#5
0
def plot_feat_importance(weights,variance=None,corrs=None,features=None,last=False,\
                         y_label=None,x_mark=None,x_mark_label='',title='',color_dict=None,\
                         y_mark=None,y_lim=None,color_map='rainbow',\
                         save=False,save_name='feature_importance.png'):
    """Plot feature importance: time series or last.

    Parameters
    ----------
    weights : pandas.DataFrame
        feature importance scores
        
    variance : array, optional (Default value = None)
        error bands of feature importance scores
        
    corrs : array, optional (Default value = None)
        correlation between features and target
        
    features : list of str, optional (Default value = None)
        names of features
        
    last : bool, optional (Default value = False)
        if True, horizontal bar-chart of feature importance, else time series
        
    y_label : str, optional (Default value = None)
        y-axis label
        
    x_mark : value, optional (Default value = None)
        index value for x-axis reference value
        
    x_mark_label : str, optional (Default value = '')
        label of x-axes reference
        
    title : str, optional (Default value = '')
        plot title 
        
    color_dict : dict, optional (Default value = None)
        dictionary keyed by features and values providing color (if last==
        False)
        
    y_mark : values, optional (Default value = None)
         index value for y-axis reference value
         
    y_lim : [min_value,max_value], optional (Default value = None)
        y-boundaries of plot
        
    color_map : str, optional (Default value = 'rainbow')
        colormap, see also https://matplotlib.org/examples/color/colormaps_reference.html
        
    save : bool, optional (Default value = True)
        if True, save plot
        
    save_name : str, optional (Default value = 'feature_importance.png')
        file name under which to save plot (incl directory)
        
    Note: plot can be further adjusted by modifying code below.

    """

    fsize = 15  # reference fontsize
    if features == None:
        features = weights.columns
    if last == False:  # plot time series
        if color_dict == None:
            fig = weights[features].plot(figsize=(8.5, 6), lw=2, rot=30)
        else:
            color_seq = [color_dict[f] for f in features]
            fig = weights[features].plot(figsize=(8.5, 6),
                                         color=color_seq,
                                         lw=2,
                                         rot=30)
        if not x_mark == None:
            x_mark = list(weights.index).index(x_mark)
            plt.axvline(x_mark, ls='--', lw=2, c='k', label=x_mark_label)
        if not y_mark == None:
            plt.axvline(y_mark, ls='-', lw=1, c='k')
        lgd = fig.legend(bbox_to_anchor=(1.4, 1.02), prop={'size': fsize - 1})
        fig.tick_params(axis='x', labelsize=fsize - 2)
        fig.tick_params(axis='y', labelsize=fsize - 2)
        if not y_lim == None:
            axes = plt.gca()
            axes.set_ylim(y_lim)
        if y_label == None:
            plt.ylabel('max-normed feature importance', fontsize=fsize)
        else:
            plt.ylabel(y_label, fontsize=fsize)
        plt.xlabel('date', fontsize=fsize)
        plt.title(title)
        if save == True:
            plt.savefig(save_name,
                        dpi=200,
                        bbox_extra_artists=(lgd, ),
                        bbox_inches='tight')
    else:
        # get feature importance and order values largest first
        if type(weights) == pd.core.frame.DataFrame:
            impo = weights.values[-1, :]
        else:
            impo = weights
        order = impo.argsort()
        ranks = order.argsort()
        if type(variance) == pd.core.frame.DataFrame:
            error = variance.values[-1, :]
        else:
            error = variance
        error = error[order]
        fig, ax = plt.subplots(figsize=(8.5, 6))
        # get correlation color
        if not np.array(corrs).shape == ():
            CMAP = cm = plt.get_cmap(color_map)
            cNorm = colors.Normalize(vmin=-1, vmax=1)
            scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=color_map)
            COL = colorVal = scalarMap.to_rgba(corrs)
            ax.barh(ranks, impo, xerr=error, color=COL, align='center')
            scalarMap.set_array([-1, 1])
            cb = fig.colorbar(scalarMap, ax=ax, ticks=np.arange(-1, 1.1, .5))
            cb.set_label('target-feature correlation',
                         rotation=270,
                         fontsize=fsize - 2)
        else:
            ax.barh(ranks,
                    impo,
                    xerr=error,
                    color='r',
                    align='center',
                    alpha=0.4)
            xl = ax.get_xlim()
            if xl[1] > 97:
                ax.set_xlim([0, 110])
        # axes & ticks
        plt.yticks(ranks, features, fontsize=fsize - 2)
        plt.axvline(100, ls='--', lw=0.5, color='k')
        plt.xlabel('max-normed feature importance', fontsize=fsize)
        axes = plt.gca()
        axes.set_xlim(left=0)
        axes.set_ylim([-1, len(features)])
        plt.title(title, fontsize=fsize)
    if save == True:
        plt.savefig(save_name, dpi=200, bbox_inches='tight')
    plt.draw()
示例#6
0
def ML_heatmap(f1,f2,df,features,target,models=None,model_outputs=None,condition='median',\
               N=30,ranges=None,to_norm=None,color_norms=None,title='',\
               color_map='rainbow',save=False,save_name='ml_heatmap.png'):
    """Heatmap of conditional 2-D model prediction.

    Parameters
    ----------
    f1 : str
        name of first variable feature
        
    f2 : str
        name of second variable feature
        
    df : pandas.DataFrame
        input data
        
    features : list of str
        names of model features (RHS)
        
    target : str
        name of target variables (LHS)
        
    models : list-like, optional (Default value = None)
        models to be evaluated. If None, needs pre-computed model_outputs

    model_outputs : 2-d numpy.array (NxN), optional (Default value = None)
        pre-computed model_outputs for f1-f2 feature ranges and condition
        
    condition : str or values, optional (Default value = 'median')
        condition for non-variable features, options: median, mean, last or custom values
            
    N : int, optional (Default value = 30)
        raster density within ranges
        
    ranges : [f1_min,f1_max,f2_min,f2_max], optional (Default value = None)
        ranges of variable features
        
    to_norm : list of str, optional (Default value = None)
        variable names to be normalised (z-scores)
     
    color_norms : [vmin,vmax], optional (Default value = None)
        range to norm color scale
        
    title : str, optional (Default value = '')
        plot title
        
    color_map : str, optional (Default value = 'rainbow')
        colormap, see also https://matplotlib.org/examples/color/colormaps_reference.html
        
    save : bool, optional (Default value = True)
        if True, save plot
        
    save_name : str, optional (Default value = 'ml_heatmap.png')
        file name under which to save plot (incl directory)
        
        
    Note: plot can be further adjusted by modifying code below.
    
    Returns
    -------
    df : 2-d numpy.array (NxN)
        heatmap values

    """

    data = df.copy()
    # normalise input data
    if not to_norm == None:
        for var in to_norm:
            vals = data[var].values
            data[var] = (vals - vals.mean(0)) / vals.std(0, ddof=1)
    df1f2 = [min(data[f1]), max(data[f1]), min(data[f2]), max(data[f2])]
    if condition == 'median':
        inputs = data[features].median().values.reshape(1, -1)
        z = data[target].median()
    elif condition == 'mean':
        inputs = data[features].mean().values.reshape(1, -1)
        z = data[target].mean()
    elif condition == 'last':
        inputs = data[features].values[-1, :].reshape(1, -1)
        z = data[target].values[-1]
    elif type(condition) == int:
        inputs = data[features].values[condition, :].reshape(1, -1)
        z = data[target].values[condition]
    elif len(condition) == len(features):
        inputs = np.array(condition[1:]).reshape(1, -1)
        z = condition[0]
    else:
        raise (ValueError('No valid modelling condition given.'))
    if ranges == None:
        ranges = df1f2
    elif not len(ranges) == 4:
        raise (ValueError('Invalid feature ranges.'))
    # model prediction for models and feature ranges
    i1, i2 = features.index(f1), features.index(f2)
    y0, x0 = inputs[0][i1], inputs[0][i2]
    range1 = np.linspace(ranges[0], ranges[1], N)
    range2 = np.linspace(ranges[2], ranges[3], N)
    if model_outputs == None:
        output = np.zeros((len(models), N, N))
        for m, model in enumerate(models):
            for i, val1 in enumerate(range1):
                inputs[0, i1] = val1
                for j, val2 in enumerate(range2):
                    inputs[0, i2] = val2
                    output[m, i, j] = model.predict(inputs)
        output = np.mean(output[:, :, :], 0)  # model mean
    else:
        output = model_outputs
    # figure parameters
    if color_norms == None:
        vals = output.flatten()
        vmin = min(vals)
        vmax = max(vals)
    elif len(color_norms) == 2:
        vmin, vmax = color_norms
    else:
        raise (ValueError('Invalid color norm.'))
    # plot
    fig, ax = plt.subplots(figsize=(8, 6))
    # color map
    CMAP = cm = plt.get_cmap(color_map)
    cNorm = colors.Normalize(vmin=vmin, vmax=vmax)
    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=CMAP)
    im = ax.imshow(output,
                   origin='lower',
                   cmap=color_map,
                   vmin=vmin,
                   vmax=vmax,
                   interpolation='hermite')
    ax.autoscale(False)

    # conditioning reference point
    x1 = (x0 - ranges[2]) * N / (ranges[3] - ranges[2]) - .5
    y1 = (y0 - ranges[0]) * N / (ranges[1] - ranges[0]) - .5
    ax.plot(x1, y1, 'wo', ms=20)
    # condition point
    COL = colorVal = scalarMap.to_rgba(z)
    ax.plot(x1, y1, 'o', c=COL, ms=20, markeredgecolor='w', mew=3)

    fsize = 15  # figure base fontsize
    plt.title(title, fontsize=fsize)
    plt.xlabel(f2, fontsize=fsize)
    plt.ylabel(f1, fontsize=fsize)
    #tix = [0,int((N-1)/4),int((N-1)/2),int(3*(N-1)/4),N-1]
    tix = [0, int((N - 1) / 4), int((N - 1) / 2), int(3 * (N - 1) / 4), N - 1]
    plt.xticks(tix, np.round(range2[tix], 1), fontsize=fsize - 2)
    plt.yticks(tix, np.round(range1[tix], 1), fontsize=fsize - 2)
    cbar = plt.colorbar(im)
    cbar.set_label(target, fontsize=fsize)
    if save == True:
        plt.savefig(save_name, dpi=200, bbox_inches='tight')
    plt.draw()

    return output
示例#7
0
def cond_fan_chart(df_X,df_Y,models,ref_time,cond=True,idx=None,h_ref_line=None,data_return=False,\
                   two_class=False,legend_loc='best',y_lim=None,y_label=None,x_label=None,title='',\
                   save=False,save_name='cond_fan_chart.png'):
    """Percentile-based fan chart, optionally conditioned on Y-reference at reference time.

    Parameters
    ----------
    df_X : pandas.DataFrame
        input data for models
        
    df_Y : pandas.DataFrame
        
    models : list-like,
        fitted models
        
    ref_time : value
        index value of reference time
        
    cond : bool, optional (Default value = True)
        if True, force model mean on reference point
        
    idx : str, optional (Default value = None)
        name of index if not set
        
    h_ref_line : float, optional (Default value = None)
        y-value for horizontal reference line
        
    data_return : bool, optional (Default value = False)
        if True, return plot input data
        
    two_class : bool, optional (Default value = False)
        if True, two-class classification is assumed
        
    legend_loc : str or int, optional (Default value = 'best')
        matplotlib legend location    
    
    y_lim : [min_value,max_value], optional (Default value = None)
        y-boundaries of plot
        
    y_label : str, optional (Default value = None)
        y-axis label
        
    x_label : str, optional (Default value = None)
        x-axis label
         
    title : str, optional (Default value = '')
        plot title
        
    save : bool, optional (Default value = True)
        if True, save plot
        
    save_name : str, optional (Default value = 'cond_fan_chart.png')
        file name under which to save plot (incl directory)
        
    Note: plot can be further adjusted by modifying code below.

    Returns
    -------
    df : pandas.DataFrame
        internally generated data used for plot

    """

    # set index (df_X & df_Y need to have the same index)
    if not idx == None:
        df_X.set_index(idx, inplace=True)
        df_Y.set_index(idx, inplace=True)

    # model input values based on X and models
    X = np.zeros((len(models), len(df_X)))
    for i, model in enumerate(models):
        X[i, :] = model.predict(df_X)

    # mean and percentiles: conditioned on reference point
    df, refY, ref_name = df_X.copy(), df_Y.loc[ref_time][
        df_Y.columns[0]], df_Y.columns[0]
    df['mean model'], df['median model'] = np.mean(X, axis=0), np.percentile(
        X, 50, axis=0)
    mean_off, median_off = df.loc[ref_time]['mean model'] - refY, df.loc[
        ref_time]['median model'] - refY
    if cond == False:
        df['p25'], df['p75'] = np.percentile(X, 25,
                                             axis=0), np.percentile(X,
                                                                    75,
                                                                    axis=0)
        df['p5'], df['p95'] = np.percentile(X, 5,
                                            axis=0), np.percentile(X,
                                                                   95,
                                                                   axis=0)
        df['p0.5'], df['p99.5'] = np.percentile(X, 1,
                                                axis=0), np.percentile(X,
                                                                       99,
                                                                       axis=0)
    else:
        df['mean model'], df['median model'] = df['mean model'] - mean_off, df[
            'median model'] - median_off
        df['p25'], df['p75'] = np.percentile(
            X, 25, axis=0) - median_off, np.percentile(X, 75,
                                                       axis=0) - median_off
        df['p5'], df['p95'] = np.percentile(
            X, 5, axis=0) - median_off, np.percentile(X, 95,
                                                      axis=0) - median_off
        df['p0.5'], df['p99.5'] = np.percentile(
            X, 1, axis=0) - median_off, np.percentile(X, 99,
                                                      axis=0) - median_off
    # merge df and df_Y
    df = pd.concat([df_Y, df], axis=1)

    # plotting
    p=df[[ref_name,'mean model','median model']].plot(figsize=(9,6),linewidth=3,\
          style=['bo-','gs-','rd-'],ms=5,rot=0,alpha=.7)

    # reference
    ref_T = list(df.index.values).index(ref_time)
    p.axvline(ref_T, ls='--', c='k', lw=2)
    p.plot([ref_T], [refY],
           'o',
           markersize=15,
           color='k',
           alpha=.5,
           label='ref.: ' + str(ref_time))
    p.fill_between(range(len(df)),
                   df['p25'].values,
                   df['p75'].values,
                   color='r',
                   alpha=.2)
    r50 = patch.Patch(color='r', alpha=.6)
    p.fill_between(range(len(df)),
                   df['p5'].values,
                   df['p95'].values,
                   color='r',
                   alpha=.2)
    r90 = patch.Patch(color='r', alpha=.4)
    p.fill_between(range(len(df)),
                   df['p0.5'].values,
                   df['p99.5'].values,
                   color='r',
                   alpha=.2)
    r99 = patch.Patch(color='r', alpha=.2)

    # add boundaries for two-class classification
    if two_class == True:
        p.axhline(0, ls='-', c='k', lw=.4)
        p.axhline(1, ls='-', c='k', lw=.4)
        if not y_lim == None:
            p.set_ylim(y_lim)
        else:
            p.set_ylim([-.25, 1.5])
        p.set_yticks([0, 1])

    # add reference line and adjust legend ordering
    if not h_ref_line == None:
        p.axhline(h_ref_line[0],
                  ls='-',
                  c='k',
                  lw=3,
                  alpha=.3,
                  label=h_ref_line[1])
        new_index = [0, 5, 3, 1, 6, 4, 2, 7]  # for legend ordering
    else:
        new_index = [0, 4, 3, 1, 5, 2, 6]

    # legend
    fsize = 15
    handles, labels = p.get_legend_handles_labels()
    handles += [r50, r90, r99]
    labels += ['p-50', 'p-90', 'p-99']
    handles = np.array(handles)[new_index]
    labels = np.array(labels)[new_index]
    p.legend(handles,
             labels,
             loc=legend_loc,
             ncol=3,
             prop={'size': fsize - 2},
             numpoints=1)

    # axes $ labels
    if not y_lim == None:
        p.set_ylim(y_lim)
    if not y_label == None:
        p.set_ylabel(y_label, fontsize=fsize)
    if not x_label == None:
        p.set_xlabel(x_label, fontsize=fsize)
    p.set_title(title, fontsize=fsize)
    p.tick_params(axis='x', labelsize=fsize - 2)
    p.tick_params(axis='y', labelsize=fsize - 2)

    # save figure
    if save == True:
        plt.savefig(save_name, dpi=200, bbox_inches='tight')
    plt.draw()

    # return underlying data
    if data_return == True:
        return df