def w_integral(s,W_ax,Y_ax):
    """
        The integral part of the Bellman-equation.
    """
    function = lambda theta,eps,s: max(interp(s*(theta+eps),W_ax,Y_ax), \
        interp(s*(theta+eps)-COST,W_ax,v_func))*aggPDF(theta)*idiPDF(eps)
    return dblquad(function,MIN_VAL_E,MAX_VAL_E,lambda x: MIN_VAL_AG, lambda x: MAX_VAL_AG, args=(s,))
	def __call__(self,x):
		if issubclass(type(x),list): 
			y=scipy.interp(x,self['x'],self['y']);
		else:
			y=scipy.interp([x],self['x'],self['y']);
			y=y[0];
		return y;
 def e_r_read(self,waves):
     p = open('MaterialsDataFiles/'+self.material + '.txt','r')
     string=p.read()
     p.close()
     e_r=[]
     w=[]
     n=[]
     k=[]
     linecounter=0
     for thisline in string.split('\n'):
         for x in thisline.split():
             if linecounter==0 and len(x)>0:
                 w.append(float(x))
             if linecounter==1 and len(x)>0:
                 n.append(float(x))
             if linecounter==2 and len(x)>0:
                 k.append(float(x))
         linecounter+=1
     
     # interpolate n&k walues 
     n_new=sp.interp(waves,w,n)
     k_new=sp.interp(waves,w,k)
     e_r_new=[]      
     # calculate epsilon from n&k for every wavelength
     for i in range(len(waves)):
         e_r_new.append(n_new[i]**2-k_new[i]**2+2j*n_new[i]*k_new[i]) # calculate the complex epsilon from n&k
     e_r.append(waves)
     e_r.append(e_r_new) 
     
     self.permittivity=e_r
    def e_r_read(self, waves):
        p = open("MaterialsDataFiles/" + self.material + ".txt", "r")
        string = p.read()
        p.close()
        e_r = []
        w = []
        n = []
        k = []
        linecounter = 0
        for thisline in string.split("\n"):
            for x in thisline.split():
                if linecounter == 0 and len(x) > 0:
                    w.append(float(x))
                if linecounter == 1 and len(x) > 0:
                    n.append(float(x))
                if linecounter == 2 and len(x) > 0:
                    k.append(float(x))
            linecounter += 1

        # interpolate n&k walues
        n_new = sp.interp(waves, w, n)
        k_new = sp.interp(waves, w, k)
        e_r_new = []
        # calculate epsilon from n&k for every wavelength
        for i in range(len(waves)):
            e_r_new.append(
                n_new[i] ** 2 - k_new[i] ** 2 + 2j * n_new[i] * k_new[i]
            )  # calculate the complex epsilon from n&k

        # not sure anymore why e_r (permittivity) gets appended the waves and e_r_new (complex epsilon)
        e_r.append(waves)
        e_r.append(e_r_new)

        self.permittivity = e_r
def plot_allkfolds_ROC(timestamp, cv, fpr_arr, tpr_arr):

    sns.set(style="white", palette="muted", color_codes=True)

    mean_tpr = 0.0
    mean_fpr = 0.0
    all_roc_auc = []
    bins_roc = np.linspace(0, 1, 300)
    with plt.style.context(('seaborn-muted')):
        fig, ax = plt.subplots(figsize=(10, 8))
        for i, (train, test) in enumerate(cv):
            mean_tpr += interp(bins_roc, fpr_arr[i], tpr_arr[i])
            mean_tpr[0] = 0.0
            mean_fpr += interp(bins_roc, fpr_arr[i], tpr_arr[i])
            mean_fpr[0] = 0.0
            roc_auc = metrics.auc(fpr_arr[i], tpr_arr[i])
            all_roc_auc.append(roc_auc)
            ax.plot(fpr_arr[i], tpr_arr[i], lw=1, label='KFold %d (AUC = %0.2f)' % (i, roc_auc))
        ax.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Random')

        mean_tpr /= len(cv)
        mean_tpr[-1] = 1.0
        mean_auc = np.mean(all_roc_auc)
        ax.plot(bins_roc, mean_tpr, 'k--',
             label='Mean ROC (AUC = %0.2f)' % mean_auc, lw=2)

        ax.set_xlim([-0.05, 1.05])
        ax.set_ylim([-0.05, 1.05])
        ax.set_xlabel('False Positive Rate')
        ax.set_ylabel('True Positive Rate')
        ax.set_title('Receiver Operating Characteristic')
        ax.legend(loc="lower right")
        plt.savefig('{}_roc.png'.format(timestamp))
    plt.close('all') 
    return mean_auc
Пример #6
0
    def calc_mean(self, intensity):
        """
        Calculate mean loss ratio and sigma based on the specified points on
        the curve:
                        |
                        |                                +
                        |                           +
        Mean loss ratio |                     +
                        |               +
                        |            +
                        |          +
                        |         +
                        |        +
                        |       +
                        |    +
                        | +
                        +-----------------------------------
                               Intensity measure level

        For a given intensity, mean loss and sigma is determined by linearly
        interpolating the points on the curve.

        Note that sigma is calculated as cv * mean loss as cv = mean loss/sigma
        """
        mean_loss = interp(intensity,
                           self.intensity_measure_level,
                           self.mean_loss)
        cv = interp(intensity,
                    self.intensity_measure_level,
                    self.coefficient_of_variation)
        # cv = sigma / mean
        sigma = cv * mean_loss

        return (mean_loss, sigma)
Пример #7
0
    def test_calc_mean(self):
        vulnerability_set = self.from_xml()

        intensity_level = 9

        expected_w1timbermetal_mean = interp(intensity_level, [8.99, 9.05], [0.13, 0.15])
        expected_w1timbermetal_sigma = expected_w1timbermetal_mean * 0.3
        (w1timbermetal_mean, w1timbermetal_sigma) = vulnerability_set.calc_mean("W1TIMBERMETAL", intensity_level)
        self.assertEqual(
            expected_w1timbermetal_mean,
            w1timbermetal_mean,
            self.msg % (expected_w1timbermetal_mean, w1timbermetal_mean),
        )
        self.assertEqual(
            expected_w1timbermetal_sigma,
            w1timbermetal_sigma,
            self.msg % (expected_w1timbermetal_sigma, w1timbermetal_sigma),
        )

        expected_w1bvmetal_mean = interp(intensity_level, [8.99, 9.05], [0.84, 0.85])
        expected_w1bvmetal_sigma = expected_w1bvmetal_mean * 0.3
        (w1bvmetal_mean, w1bvmetal_sigma) = vulnerability_set.calc_mean("W1BVMETAL", intensity_level)
        self.assertEqual(expected_w1bvmetal_mean, w1bvmetal_mean, self.msg % (expected_w1bvmetal_mean, w1bvmetal_mean))
        self.assertEqual(
            expected_w1bvmetal_sigma, w1bvmetal_sigma, self.msg % (expected_w1bvmetal_sigma, w1bvmetal_sigma)
        )
Пример #8
0
 def __call__(self, z):
     """Parameters: z is a number, sequence or array.
     This method makes an instance f of LinInterp callable,
     so f(z) returns the interpolation value(s) at z.
     """
     if isinstance(z, int) or isinstance(z, float):
         return interp ([z], self.X, self.Y)[0]
     else:
         return interp(z, self.X, self.Y)
Пример #9
0
def interpolate_data(data, interpolation_timestep_ms):
	'''
		Interpolates all axes of the data array such that samples occur at equidistant timestamps.
	'''
	samples_count = len(data[:,TIME_AXIS_INDEX])
	timestamps = np.arange(0.0, data[samples_count-1,TIME_AXIS_INDEX], interpolation_timestep_ms, dtype=np.float64)
	interx = scipy.interp(timestamps, data[:,0], data[:,X_AXIS_INDEX])
	intery = scipy.interp(timestamps, data[:,0], data[:,Y_AXIS_INDEX])
	interz = scipy.interp(timestamps, data[:,0], data[:,Z_AXIS_INDEX])
	return np.array([timestamps, interx, intery, interz]).transpose()
def get_water_level(costs_all,link,proba,climat):
	'''extracts water levels. the 'proba' argument is how much frequency is increased because of climate change or el nino''' 
	water = DataFrame(columns=['return_period','water_level','proba'])
	for RP in [5,10,25,50,100,250,500,1000]:
		col = "{}_RP{} (dm)".format(climat,RP)
		water.loc[len(water),:]=[RP/proba,costs_all.ix[(costs_all.scenarioID==str(link))&(costs_all.partial_or_full=="full")&(costs_all.improved_2nd==0),col].values[0],proba]
	inter = water.copy()
	#s = InterpolatedUnivariateSpline(water['return period'], water['water level'],k=1)
	water.loc[len(water),:] = [500,interp([500],inter['return_period'].astype(float), inter['water_level'].astype(float))[0],proba]
	water.loc[len(water),:] = [1000,interp([1000],inter['return_period'].astype(float), inter['water_level'].astype(float))[0],proba]
		
	return water
Пример #11
0
def ROC(scores):
    # Generate an ROC curve for each fold, ordered by increasing threshold
    roc = scores.groupby('user').apply(lambda x: pd.DataFrame(np.c_[roc_curve(x['genuine'], x['score'])][::-1],
                                                              columns=['far', 'frr', 'threshold']))

    # interpolate to get the same threshold values in each fold
    thresholds = np.sort(roc['threshold'].unique())
    roc = roc.groupby(level='user').apply(lambda x: pd.DataFrame(np.c_[thresholds,
                                                                       interp(thresholds, x['threshold'], x['far']),
                                                                       interp(thresholds, x['threshold'], x['frr'])],
                                                                 columns=['threshold', 'far', 'frr']))
    roc = roc.reset_index(level=1, drop=True).reset_index()
    return roc
Пример #12
0
    def coordinate(self):
        """
        Prediction position of the target at current_time

        @rtype: SkyCoord
        """
        ra = scipy.interp(self.current_time.jd,
                          self.ephemeris['Time'].jd,
                          self.ephemeris['R.A._(ICRF/J2000.0)']) * units.degree
        dec = scipy.interp(self.current_time.jd,
                           self.ephemeris['Time'].jd,
                           self.ephemeris['DEC_(ICRF/J2000.0)']) * units.degree
        return SkyCoord(ra, dec)
Пример #13
0
def tophatfold(lam, flux, FWHM=0.035):
    lammin=min(lam)
    lammax=max(lam)
    dlambda=FWHM/17.
    interlam=np.arange(lammin,lammax,dlambda)
    interflux=interp(interlam,lam,flux)

    #convovle flux array with gaussian--use smooth
    fold=sp.ndimage.filters.uniform_filter(interflux,size=17)

    #interpolate back to original grid
    fluxfold=interp(lam,interlam,fold)

    return fluxfold
Пример #14
0
    def execute(self):

        #print 'in CPCT_Interpolate'

        wind_speed_ax = np.cos(self.yaw*np.pi/180.0)**(self.pP/3.0)*self.wind_speed_hub
        # use interpolation on precalculated CP-CT curve
        wind_speed_ax = np.maximum(wind_speed_ax, self.windSpeedToCPCT.wind_speed[0])
        wind_speed_ax = np.minimum(wind_speed_ax, self.windSpeedToCPCT.wind_speed[-1])
        self.CP = interp(wind_speed_ax, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CP)
        self.CT = interp(wind_speed_ax, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CT)

        # normalize on incoming wind speed to correct coefficients for yaw
        self.CP = self.CP * np.cos(self.yaw*np.pi/180.0)**self.pP
        self.CT = self.CT * np.cos(self.yaw*np.pi/180.0)**2
def populate_wing_sections(avl_wing,suave_wing):
	symm     = avl_wing.symmetric
	sweep    = avl_wing.sweep
	dihedral = avl_wing.dihedral
	span     = suave_wing.spans.projected
	semispan = suave_wing.spans.projected * 0.5 * (2 - symm)
	origin   = suave_wing.origin
	root_section = Section()
	root_section.tag    = 'root_section'
	root_section.origin = origin
	root_section.chord  = suave_wing.chords.root
	root_section.twist  = suave_wing.twists.root
	
	tip_section = Section()
	tip_section.tag  = 'tip_section'
	tip_section.chord = suave_wing.chords.tip
	tip_section.twist = suave_wing.twists.tip
	tip_section.origin = [origin[0]+semispan*np.tan(sweep),origin[1]+semispan,origin[2]+semispan*np.tan(dihedral)]
	
	if avl_wing.vertical:
		temp = tip_section.origin[2]
		tip_section.origin[2] = tip_section.origin[1]
		tip_section.origin[1] = temp
	
	avl_wing.append_section(root_section)
	avl_wing.append_section(tip_section)
	
	if suave_wing.control_surfaces:
		for ctrl in suave_wing.control_surfaces:
			num = 1
			for section in ctrl.sections:
				semispan_fraction = (span/semispan) * section.origins.span_fraction
				s = Section()
				s.chord  = scipy.interp(semispan_fraction,[0.,1.],[root_section.chord,tip_section.chord])
				s.tag    = '{0}_section{1}'.format(ctrl.tag,num)
				s.origin = section.origins.dimensional
				s.origin[0] = s.origin[0] - s.chord*section.origins.chord_fraction
				s.twist  = scipy.interp(semispan_fraction,[0.,1.],[root_section.twist,tip_section.twist])
				c = Control_Surface()
				c.tag     = ctrl.tag
				c.x_hinge = 1. - section.chord_fraction
				c.sign_duplicate = ctrl.deflection_symmetry
				
				s.append_control_surface(c)
				avl_wing.append_section(s)
				num += 1
	
	return avl_wing
Пример #16
0
    def provideJ(self):

        #print 'in CPCT_Interpolate - provideJ'
        
         # standard central differencing
        # set step size for finite differencing
        h = 1e-6

        # calculate upper and lower function values
        wind_speed_ax_high_yaw = np.cos((self.yaw+h)*np.pi/180.0)**(self.pP/3.0)*self.wind_speed_hub
        wind_speed_ax_low_yaw = np.cos((self.yaw-h)*np.pi/180.0)**(self.pP/3.0)*self.wind_speed_hub
        wind_speed_ax_high_wind = np.cos(self.yaw*np.pi/180.0)**(self.pP/3.0)*(self.wind_speed_hub+h)
        wind_speed_ax_low_wind = np.cos(self.yaw*np.pi/180.0)**(self.pP/3.0)*(self.wind_speed_hub-h)

        # use interpolation on precalculated CP-CT curve
        wind_speed_ax_high_yaw = np.maximum(wind_speed_ax_high_yaw, self.windSpeedToCPCT.wind_speed[0])
        wind_speed_ax_low_yaw = np.maximum(wind_speed_ax_low_yaw, self.windSpeedToCPCT.wind_speed[0])
        wind_speed_ax_high_wind = np.maximum(wind_speed_ax_high_wind, self.windSpeedToCPCT.wind_speed[0])
        wind_speed_ax_low_wind = np.maximum(wind_speed_ax_low_wind, self.windSpeedToCPCT.wind_speed[0])

        wind_speed_ax_high_yaw = np.minimum(wind_speed_ax_high_yaw, self.windSpeedToCPCT.wind_speed[-1])
        wind_speed_ax_low_yaw = np.minimum(wind_speed_ax_low_yaw, self.windSpeedToCPCT.wind_speed[-1])
        wind_speed_ax_high_wind = np.minimum(wind_speed_ax_high_wind, self.windSpeedToCPCT.wind_speed[-1])
        wind_speed_ax_low_wind = np.minimum(wind_speed_ax_low_wind, self.windSpeedToCPCT.wind_speed[-1])

        CP_high_yaw = interp(wind_speed_ax_high_yaw, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CP)
        CP_low_yaw = interp(wind_speed_ax_low_yaw, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CP)
        CP_high_wind = interp(wind_speed_ax_high_wind, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CP)
        CP_low_wind = interp(wind_speed_ax_low_wind, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CP)

        CT_high_yaw = interp(wind_speed_ax_high_yaw, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CT)
        CT_low_yaw = interp(wind_speed_ax_low_yaw, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CT)
        CT_high_wind = interp(wind_speed_ax_high_wind, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CT)
        CT_low_wind = interp(wind_speed_ax_low_wind, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CT)

        # normalize on incoming wind speed to correct coefficients for yaw
        CP_high_yaw = CP_high_yaw * np.cos((self.yaw+h)*np.pi/180.0)**self.pP
        CP_low_yaw = CP_low_yaw * np.cos((self.yaw-h)*np.pi/180.0)**self.pP
        CP_high_wind = CP_high_wind * np.cos((self.yaw)*np.pi/180.0)**self.pP
        CP_low_wind = CP_low_wind * np.cos((self.yaw)*np.pi/180.0)**self.pP

        CT_high_yaw = CT_high_yaw * np.cos((self.yaw+h)*np.pi/180.0)**2
        CT_low_yaw = CT_low_yaw * np.cos((self.yaw-h)*np.pi/180.0)**2
        CT_high_wind = CT_high_wind * np.cos((self.yaw)*np.pi/180.0)**2
        CT_low_wind = CT_low_wind * np.cos((self.yaw)*np.pi/180.0)**2

        # compute derivative via central differencing and arrange in sub-matrices of the Jacobian
        dCP_dyaw = np.eye(self.nTurbines)*(CP_high_yaw-CP_low_yaw)/(2.0*h)
        dCP_dwind = np.eye(self.nTurbines)*(CP_high_wind-CP_low_wind)/(2.0*h)
        dCT_dyaw = np.eye(self.nTurbines)*(CT_high_yaw-CT_low_yaw)/(2.0*h)
        dCT_dwind = np.eye(self.nTurbines)*(CT_high_wind-CT_low_wind)/(2.0*h)

        # compile full Jacobian from sub-matrices
        dCP = np.hstack((dCP_dyaw, dCP_dwind))
        dCT = np.hstack((dCT_dyaw, dCT_dwind))
        J = np.vstack((dCP, dCT))

        return J
Пример #17
0
    def _read_iop_from_file(self, file_name):
        """
        Generic IOP reader that interpolates the iop to the common wavelengths defined in the constructor

        returns: interpolated iop
        """
        lg.info('Reading :: ' + file_name + ' :: and interpolating to ' + str(self.wavelengths))

        if os.path.isfile(file_name):
            iop_reader = csv.reader(open(file_name), delimiter=',', quotechar='"')
            wave = scipy.float32(iop_reader.next())
            iop = scipy.zeros_like(wave)
            for row in iop_reader:
                iop = scipy.vstack((iop, row))

            iop = scipy.float32(iop[1:, :])  # drop the first row of zeros
        else:
            lg.exception('Problem reading file :: ' + file_name)
            raise IOError

        try:
            int_iop = scipy.zeros((iop.shape[0], self.wavelengths.shape[1]))
            for i_iter in range(0, iop.shape[0]):
                # r = scipy.interp(self.wavelengths[0, :], wave, iop[i_iter, :])
                int_iop[i_iter, :] = scipy.interp(self.wavelengths, wave, iop[i_iter, :])
            return int_iop
        except IOError:
            lg.exception('Error interpolating IOP to common wavelength')
            return -1
Пример #18
0
    def bellman_operator(self, w, compute_policy=False):
        """
        The approximate Bellman operator, which computes and returns the 
        updated value function Tw on the grid points.

        Parameters
        ==========
            w : a flat NumPy array with len(w) = len(grid)

        The vector w represents the value of the input function on the grid
        points.

        """
        # === Apply linear interpolation to w === #
        Aw = lambda x: interp(x, self.grid, w)  

        if compute_policy:
            sigma = np.empty(len(w))

        # === set Tw[i] equal to max_c { u(c) + beta w(f(k_i) - c)} === #
        Tw = np.empty(len(w))
        for i, k in enumerate(self.grid):
            objective = lambda c:  - self.u(c) - self.beta * Aw(self.f(k) - c)
            c_star = fminbound(objective, 1e-6, self.f(k))
            if compute_policy:
                # sigma[i] = argmax_c { u(c) + beta w(f(k_i) - c)} 
                sigma[i] = c_star
            Tw[i] = - objective(c_star)

        if compute_policy:
            return Tw, sigma
        else:
            return Tw
Пример #19
0
def crossval_roc(X, y):
    cv = StratifiedKFold(y, n_folds=10)
    clf = RandomForestClassifier()

    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    all_tpr = []

    for i, (train, test) in enumerate(cv):
        fitted = clf.fit(X[train], y[train])
        probas_ = fitted.predict_proba(X[test])
        scored_ = fitted.predict(X[test])
        # Compute ROC curve and area the curve
        fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        #roc_auc = auc(fpr, tpr)
        roc_auc = roc_auc_score(scored_, y[test], average="micro")
        #plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))


    mean_tpr /= len(cv)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    return plt.plot(mean_fpr, mean_tpr, 
                    label='Mean ROC (area = %0.2f)' % mean_auc, lw=1)
Пример #20
0
def plot_roc_cv(classifier, X, y, cv):
    '''
    cv = KFold(len(y),n_folds=5)
    '''
    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    all_tpr = []

    for i, (train, test) in enumerate(cv):
        probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
        # Compute ROC curve and area the curve
        fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))

    plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')

    mean_tpr /= len(cv)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    plt.plot(mean_fpr, mean_tpr, 'k--',
             label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)

    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example')
    plt.legend(loc="lower right")
    plt.show()
Пример #21
0
def plot_roc_estimator(estimator, x, y):
    kf = KFold(len(y), n_folds=10, shuffle=True)
    y_prob = np.zeros((len(y), 2))
    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    for i, (train_index, test_index) in enumerate(kf):
        x_train, x_test = x[train_index], x[test_index]
        y_train = y[train_index]

        estimator.fit(x_train, y_train)
        y_prob[test_index] = estimator.predict_proba(x_test)
        fpr, tpr, thresholds = roc_curve(y[test_index], y_prob[test_index, 1])
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))
    mean_tpr /= len(kf)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)

    plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Random')
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.show()
Пример #22
0
def plot_roc_class(x, y, fit_class, **kwargs):
    kf = KFold(len(y), n_folds=10, shuffle=True)
    y_prob = np.zeros((len(y), 2))
    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    for i, (train_index, test_index) in enumerate(kf):
        x_train, x_test = x[train_index], x[test_index]
        y_train = y[train_index]
        clf = fit_class(**kwargs)
        clf.fit(x_train, y_train)
        # Predict probabilities, not classes
        y_prob[test_index] = clf.predict_proba(x_test)
        fpr, tpr, thresholds = roc_curve(y[test_index], y_prob[test_index, 1])
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))
    mean_tpr /= len(kf)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)

    plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Random')
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="lower right")
    plt.show()
Пример #23
0
def draw(X, y, classifier):
    cv = StratifiedKFold(y, n_folds=6)
    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    all_tpr = []

    for i, (train, test) in enumerate(cv):
        probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
        fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, lw=1, label="ROC fold %d (area = %0.2f)" % (i, roc_auc))

    plt.plot([0, 1], [0, 1], "--", color=(0.6, 0.6, 0.6), label="Luck")

    mean_tpr /= len(cv)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    plt.plot(mean_fpr, mean_tpr, "k--", label="Mean ROC (area = %0.2f)" % mean_auc, lw=2)

    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("Receiver operating characteristic example")
    plt.legend(loc="lower right")
    plt.show()
Пример #24
0
def classify_only(X, Y, model):
    cv = cross_validation.StratifiedKFold(Y, n_folds=K_FOLDS)
    # print len(Y)
    mean_tpr = 0.0
    mean_fpr = numpy.linspace(0, 1, 100)
    all_tpr = []

    for i, (train, test) in enumerate(cv):
        probas_ = model.fit(X.values[train], Y.values[train]).predict_proba(X.values[test])
        # Compute ROC curve and area the curve
        fpr, tpr, thresholds = roc_curve(Y.values[test], probas_[:, 1])
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))

    plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')

    mean_tpr /= len(cv)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)

    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example: '+model.__class__.__name__)
    plt.legend(loc="lower right")
    plt.show()
    print "Plot done"
Пример #25
0
    def bellman_operator(self, w, compute_policy=False):
        """
        The approximate Bellman operator, which computes and returns the
        updated value function Tw on the grid points.

        Parameters
        ----------
        w : array_like(float, ndim=1)
            The value of the input function on different grid points
        compute_policy : Boolean, optional(default=False)
            Whether or not to compute policy function

        """
        # === Apply linear interpolation to w === #
        Aw = lambda x: interp(x, self.grid, w)

        if compute_policy:
            sigma = np.empty(len(w))

        # == set Tw[i] equal to max_c { u(c) + beta w(f(k_i) - c)} == #
        Tw = np.empty(len(w))
        for i, k in enumerate(self.grid):
            objective = lambda c: - self.u(c) - self.beta * Aw(self.f(k) - c)
            c_star = fminbound(objective, 1e-6, self.f(k))
            if compute_policy:
                # sigma[i] = argmax_c { u(c) + beta w(f(k_i) - c)}
                sigma[i] = c_star
            Tw[i] = - objective(c_star)

        if compute_policy:
            return Tw, sigma
        else:
            return Tw
Пример #26
0
def draw_roc_curve(classifier, cv, X, y):
    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)

    colors = cycle(['cyan', 'indigo', 'seagreen', 'yellow', 'blue', 'darkorange'])
    lw = 2

    for i, (train, test) in enumerate(cv):
        probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
        # Compute ROC curve and area the curve
        fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))

    plt.plot([0, 1], [0, 1], linestyle='--', lw=lw, color='k',
             label='Luck')

    mean_tpr /= len(cv)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    plt.plot(mean_fpr, mean_tpr, color='g', linestyle='--',
             label='Mean ROC (area = %0.2f)' % mean_auc, lw=lw)

    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="lower right")
    plt.show()
def calculate_roc(truth, predictions):
    lb_truth = label_binarize(truth.iloc[:, -1].astype(int), np.arange(n_classes))
    lb_prediction = label_binarize(predictions.iloc[:, -1].astype(int), np.arange(n_classes))

    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(letter_set)):
        fpr[i], tpr[i], _ = roc_curve(lb_truth[:, i], lb_prediction[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(lb_truth.ravel(), lb_prediction.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    
    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])

    # Finally average it and compute AUC
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    
    return fpr, tpr, roc_auc
def LogisticRegressionROC(Data,Label,TrueLabel, Color,legend, marker):
    Majority_Logistic = LogisticRegression()
    Majority_Logistic.verbose=0
    N_folds=5
    kf= cv.KFold(X.shape[0],n_folds=N_folds)

    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)

    for i, (train, test) in enumerate(kf):
        probas_ = Majority_Logistic.fit(Data[train], Label[train]).predict_proba(Data[test])
        # Compute ROC curve and area the curve
        fpr, tpr, thresholds = roc_curve(TrueLabel[test], probas_[:, 1])
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        #roc_auc = auc(fpr, tpr)
        #plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))
    
    mean_tpr /= len(kf)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    
    plt.plot(mean_fpr, mean_tpr, marker ,color=Color,label='%s AUC:%0.2f' % (legend,mean_auc), lw=2)
    
    plt.xlim([-0.05,1.05])
    plt.ylim([-0.05,1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('R.O.C. Curves for iono data')
    plt.legend(loc="lower right")
Пример #29
0
def predictForTarget(target):
    modTargets = numpy.array([(-1.0, 1.0)[x == target] for x in targets])

    mean_tpr = 0.0
    mean_fpr = numpy.linspace(0, 1, 100)

    for trainIndices, testIndices in cv:
        if algorithm.startswith("svm"):
            clf = sklearn.svm.SVC(C=1.0, kernel=algorithm.replace("svm", ""), gamma=0.0, shrinking=True, probability=True, tol=0.001, cache_size=200, class_weight='auto', verbose=False, max_iter=-1, random_state=0)

        trainData = data[trainIndices,]
        trainTargets = modTargets[trainIndices]
        testData = data[testIndices,]
        testTargets = modTargets[testIndices]

        model = clf.fit(trainData, trainTargets)
        probs = model.predict_proba(testData)[:,1]

        fpr, tpr, thresholds = sklearn.metrics.roc_curve(testTargets, probs)
        mean_tpr += scipy.interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        roc_auc = sklearn.metrics.auc(fpr, tpr)

    mean_tpr /= len(cv)
    mean_tpr[-1] = 1.0

    return sklearn.metrics.auc(mean_fpr, mean_tpr)
Пример #30
0
    def find_fingers3(self, img, prev_img):
        if img is None or prev_img is None:
            return []

        crop_spec = [0, 0, img.width, img.height]
        scale_factor = 2
        r1 = img.grayscale().crop(*crop_spec)
        r2 = prev_img.grayscale().crop(*crop_spec)

        # modified
        diff = (r2 - r1).binarize(40)
        edge_mask = diff.erode(5).dilate(5) - diff.erode(5)
        edge_mask = edge_mask.dilate(5)
        scaled = (diff.edges() & edge_mask).resize(r1.width / scale_factor)

        points = []
        for x in range(scaled.width):
            points.append(scaled.edgeIntersections((x, 0), (x, scaled.height))[0])
        points = [xy for xy in points if xy is not None]
        if not points:
            return []

        xs = range(scaled.width)
        ys = scipy.interp(range(scaled.width), [a[0] for a in points], [a[1] for a in points])
        peaks = scipy.signal.find_peaks_cwt(-ys, np.arange(7, 11))
        if len(peaks) == 0:
            return []

        positions = np.array(zip(peaks, np.array(ys)[peaks])) * scale_factor + np.array(crop_spec[:2])

        return positions
Пример #31
0
def runClassification_CV(data_folder, cfg, classifier):
    print "Gather dataset"
    train_x, train_y = gatherAllData(data_folder, cfg)

    model = classifier[0]
    clf_name = classifier[1]

    #Report Cross-Validation Accuracy
    scores = cross_val_score(model,
                             np.asarray(train_x),
                             np.asarray(train_y),
                             cv=10)
    print clf_name
    print "Avg. Accuracy: " + str(sum(scores) / float(len(scores)))

    cv = KFold(n_splits=10)
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)

    #Split the data in k-folds, perform classification, and report ROC
    i = 0
    for train, test in cv.split(train_x, train_y):
        probas_ = model.fit(
            np.asarray(train_x)[train],
            np.asarray(train_y)[train]).predict_proba(
                np.asarray(train_x)[test])

        # Compute ROC curve and area under the curve
        fpr, tpr, thresholds = roc_curve(
            np.asarray(train_y)[test], probas_[:, 1])
        tprs.append(interp(mean_fpr, fpr, tpr))
        tprs[-1][0] = 0.0
        roc_auc = auc(fpr, tpr)
        aucs.append(roc_auc)
        #plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))
        i += 1

    plt.plot([0, 1], [0, 1],
             linestyle='--',
             lw=2,
             color='r',
             label='Random Guess',
             alpha=.8)

    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)

    unblock70 = True
    unblock80 = True
    unblock90 = True
    unblock95 = True
    for n, i in enumerate(mean_tpr):
        if (i >= 0.7 and unblock70):
            print '70%  TPR  = ' + str(mean_fpr[n])
            unblock70 = False
        if (i >= 0.8 and unblock80):
            print '80%  TPR  = ' + str(mean_fpr[n])
            unblock80 = False
        if (i >= 0.9 and unblock90):
            print '90%  TPR  = ' + str(mean_fpr[n])
            unblock90 = False
        if (i >= 0.95 and unblock95):
            print '95%  TPR  = ' + str(mean_fpr[n])
            unblock95 = False

    #Figure properties
    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    std_auc = np.std(aucs)
    plt.plot(mean_fpr,
             mean_tpr,
             color='b',
             label=r'Mean ROC (AUC = %0.2f $\pm$ %0.3f)' % (mean_auc, std_auc),
             lw=2,
             alpha=.8)

    #Compute Standard Deviation between folds
    std_tpr = np.std(tprs, axis=0)
    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    plt.fill_between(mean_fpr,
                     tprs_lower,
                     tprs_upper,
                     color='grey',
                     alpha=.3,
                     label=r'$\pm$ ROC Std. Dev.')

    ax1.plot([0, 1], [0, 1], 'k--', lw=2, color='orange', label='Random Guess')
    ax1.grid(color='black', linestyle='dotted')

    plt.title('Receiver Operating Characteristic (ROC)')
    plt.xlabel('False Positive Rate', fontsize='x-large')
    plt.ylabel('True Positive Rate', fontsize='x-large')
    plt.legend(loc='lower right', fontsize='large')

    plt.setp(ax1.get_xticklabels(), fontsize=14)
    plt.setp(ax1.get_yticklabels(), fontsize=14)

    fig.savefig('xgBoost/' + "ROC_" + clf_name + "_" + cfg[1] +
                ".pdf")  # save the figure to file
    plt.close(fig)
Пример #32
0
def model_evaluaiton(model, fig_prefix=''):
    """
    预测模型的好坏:多分类
    :param model:
    :param fig_prefix:
    :return:
    """
    model.load_weights(model_store_path)
    pre = model.predict(x_test)

    # 计算每一类的ROC Curve和AUC-ROC
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(CLASS_NUMBER):
        fpr[i], tpr[i], thresholds_ = roc_curve(y_test[:, i], pre[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr['micro'], tpr['micro'], _ = roc_curve(y_test.ravel(), pre.ravel())
    roc_auc['micro'] = auc(fpr['micro'], tpr['micro'])

    # Compute macro-average ROC curve and ROC area
    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(CLASS_NUMBER)]))
    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(CLASS_NUMBER):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])
    # Finally average it and compute AUC
    mean_tpr /= CLASS_NUMBER
    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

    # Plot all ROC curves
    lw = 2
    fig = plt.figure()
    plt.plot(fpr["micro"], tpr["micro"],
             label='micro (area={0:0.2f})'
                   ''.format(roc_auc["micro"]),
             color='deeppink', linestyle=':', linewidth=lw)
    plt.plot(fpr["macro"], tpr["macro"],
             label='macro (area={0:0.2f})'
                   ''.format(roc_auc["macro"]),
             color='blue', linestyle=':', linewidth=lw)
    colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'pink', 'chocolate',
                    'seagreen', 'mediumslateblue', 'orangered', 'slategray'])
    for i, color in zip(range(CLASS_NUMBER), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=lw,
                 label='{0} (area={1:0.2f})'
                       ''.format(label_strs[i], roc_auc[i]))
    plt.plot([0, 1], [0, 1], 'k--', lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC curve of compiler family identification')
    plt.legend(loc="lower right")
    # plt.show()
    fig.savefig(fig_prefix + '#ROC-curve.eps')
    plt.close(fig)

    for i in range(len(pre)):
        max_value = max(pre[i])
        for j in range(len(pre[i])):
            if max_value == pre[i][j]:
                pre[i][j] = 1
            else:
                pre[i][j] = 0
    # 生成分类评估报告
    report_str = str(classification_report(y_test, pre, digits=4, target_names=label_strs))
    with open(fig_prefix + '#classification_report.txt', 'w') as f:
        f.write(report_str)
        f.close()
    print(report_str)
def train_cross_validate(n_folds, data_dir, categories, image_size,
                         num_epochs):
    # initialize stratifying k fold
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=SEED)

    # open pickle files
    # features = pickle.load(open("features.pickle","rb")) #already reshaped as numpy array
    # features = features/255.0
    # labels = pickle.load(open("labels.pickle","rb"))
    # labels = np.array(labels)
    # img_names = pickle.load(open("img_names.pickle","rb"))

    # data frame to save values of loss and validation after each fold
    df = pd.DataFrame()
    #obtain images
    data = import_images(data_dir, categories, image_size)
    # features = data[0]
    # labels = data[1]
    print("Stored features and labels")
    # for roc plotting
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)

    # for index, (train_indices, val_indices) in enumerate(skf.split(features, labels)):
    # 	print("Training on fold " + str(index + 1) + "/" + str(n_folds))
    # 	train_features = features[train_indices]
    # 	train_labels = labels[train_indices]
    # 	print("Training data obtained")
    # 	val_features = features[val_indices]
    # 	val_labels = labels[val_indices]
    # 	print("Validation data obtained")
    # train_labels, val_labels = labels[train_indices], labels[val_indices]
    for index in range(n_folds):
        boot = resample(data,
                        replace=True,
                        n_samples=len(data),
                        random_state=index)
        booted_imgs = set([])
        train_features = []
        train_labels = []
        for entry in boot:
            train_features.append(entry[0])
            train_labels.append(entry[1])
            booted_imgs.add(entry[2])

        val_features = []
        val_labels = []

        for entry in data:
            if entry[2] not in booted_imgs:
                val_features.append(entry[0])
                val_labels.append(entry[1])

        #reshape into numpy array
        train_features = np.array(train_features).reshape(
            -1, image_size, image_size, 3)  #3 bc three channels for RGB values
        train_labels = np.array(train_labels)
        val_features = np.array(val_features).reshape(
            -1, image_size, image_size, 3)  #3 bc three channels for RGB values
        val_labels = np.array(val_labels)
        # Create new model each time
        print("Training on fold " + str(index + 1) + "/" + str(n_folds))
        model = None
        model = build_model(image_size)
        print("Training model")
        es_callback = EarlyStopping(monitor='val_loss',
                                    patience=4,
                                    restore_best_weights=True)
        history = model.fit(train_features,
                            train_labels,
                            batch_size=64,
                            epochs=num_epochs,
                            callbacks=[es_callback],
                            validation_data=(val_features, val_labels))
        # save values of loss and accuracy into df
        len_history = len(history.history['loss'])
        df = df.append([[
            index + 1, history.history['loss'][len_history - 1],
            history.history['acc'][len_history - 1],
            history.history['val_loss'][len_history - 1],
            history.history['val_acc'][len_history - 1]
        ]])
        # model_json = model.to_json()
        # with open("model.json", "w") as json_file :
        # 	json_file.write(model_json)

        # model.save_weights("model.h5")
        # print("Saved model to disk")

        model.save('saved_models/CNN_' + str(index + 1) + '.model')

        # Printing a graph showing the accuracy changes during the training phase
        print(history.history.keys())
        plt.figure(1)
        plt.plot(history.history['acc'])
        plt.plot(history.history['val_acc'])
        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train', 'validation'], loc='upper left')
        plt.savefig('graphs/val_accuracy_' + str(index + 1) + '.png')
        # plt.show()
        plt.clf()

        plt.figure(2)
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'validation'], loc='upper left')
        plt.savefig('graphs/val_loss_' + str(index + 1) + '.png')
        # plt.show()
        plt.clf()

        # roc curve stuff
        probas_ = model.predict_proba(val_features)
        # Compute ROC curve and area the curve
        fpr, tpr, thresholds = roc_curve(val_labels, probas_[:, 1])
        tprs.append(interp(mean_fpr, fpr, tpr))
        tprs[-1][0] = 0.0
        roc_auc = auc(fpr, tpr)
        aucs.append(roc_auc)
        # Plots ROC for each individual fold:
        # plt.plot(fpr, tpr, lw=1, alpha=0.3,label='ROC fold %d (AUC = %0.2f)' % (index + 1, roc_auc))
        # use the mean statistics to compare each model (that we train/test using 10-fold cv)
        mean_tpr = np.mean(tprs, axis=0)
        mean_tpr[-1] = 1.0
        mean_auc = auc(mean_fpr, mean_tpr)
        std_auc = np.std(aucs)

        # plot the mean ROC curve and display AUC (mean/st dev)
        plot_ROC_for_Kfold(mean_fpr, mean_tpr, mean_auc, std_auc)
    df = df.rename({0: 'Fold Number',\
        1: 'Training Loss',\
        2: 'Training Accuracy',\
        3: 'Validation Loss', \
        4: 'Validation Accuracy'}, axis='columns')
    df.to_csv(os.path.join('graphs', 'final_acc_loss.csv'),
              encoding='utf-8',
              index=False)
Пример #34
0
def eval_class_model(xtest,
                     ytest,
                     model,
                     labels: str = "labels",
                     pred_params: dict = {}):
    """generate predictions and validation stats

    pred_params are non-default, scikit-learn api prediction-function parameters.
    For example, a tree-type of model may have a tree depth limit for its prediction
    function.

    :param xtest:        features array type Union(DataItem, DataFrame, np. Array)
    :param ytest:        ground-truth labels Union(DataItem, DataFrame, Series, np. Array, List)
    :param model:        estimated model
    :param labels:       ('labels') labels in ytest is a pd.DataFrame or Series
    :param pred_params:  (None) dict of predict function parameters
    """
    if isinstance(ytest, (pd.DataFrame, pd.Series)):
        unique_labels = ytest[labels].unique()
        ytest = ytest.values
    elif isinstance(ytest, np.ndarray):
        unique_labels = np.unique(ytest)
    elif isinstance(ytest, list):
        unique_labels = set(ytest)

    n_classes = len(unique_labels)
    is_multiclass = True if n_classes > 2 else False

    # PROBS
    ypred = model.predict(xtest, **pred_params)
    if hasattr(model, "predict_proba"):
        yprob = model.predict_proba(xtest, **pred_params)
    else:
        # todo if decision fn...
        raise Exception("not implemented for this classifier")

    # todo - calibrate
    # outputs are some stats and some plots and...
    # should be option, some classifiers don't need, some do it already, many don't

    model_metrics = {
        "plots": [],  # placeholder for plots
        "accuracy": float(metrics.accuracy_score(ytest, ypred)),
        "test-error-rate": np.sum(ytest != ypred) / ytest.shape[0],
    }

    # CONFUSION MATRIX
    gcf_clear(plt)
    cmd = metrics.plot_confusion_matrix(model,
                                        xtest,
                                        ytest,
                                        normalize="all",
                                        cmap=plt.cm.Blues)
    model_metrics["plots"].append(
        PlotArtifact("confusion-matrix", body=cmd.figure_))

    if is_multiclass:
        # PRECISION-RECALL CURVES MICRO AVGED
        # binarize/hot-encode here since we look at each class
        lb = LabelBinarizer()
        ytest_b = lb.fit_transform(ytest)

        precision = dict()
        recall = dict()
        avg_prec = dict()
        for i in range(n_classes):
            precision[i], recall[i], _ = metrics.precision_recall_curve(
                ytest_b[:, i], yprob[:, i])
            avg_prec[i] = metrics.average_precision_score(
                ytest_b[:, i], yprob[:, i])
        precision["micro"], recall[
            "micro"], _ = metrics.precision_recall_curve(
                ytest_b.ravel(), yprob.ravel())
        avg_prec["micro"] = metrics.average_precision_score(ytest_b,
                                                            yprob,
                                                            average="micro")
        ap_micro = avg_prec["micro"]
        model_metrics.update({"precision-micro-avg-classes": ap_micro})

        gcf_clear(plt)
        colors = cycle(
            ["navy", "turquoise", "darkorange", "cornflowerblue", "teal"])
        plt.figure(figsize=(7, 8))
        f_scores = np.linspace(0.2, 0.8, num=4)
        lines = []
        labels = []
        for f_score in f_scores:
            x = np.linspace(0.01, 1)
            y = f_score * x / (2 * x - f_score)
            (l, ) = plt.plot(x[y >= 0], y[y >= 0], color="gray", alpha=0.2)
            plt.annotate("f1={0:0.1f}".format(f_score), xy=(0.9, y[45] + 0.02))

        lines.append(l)
        labels.append("iso-f1 curves")
        (l, ) = plt.plot(recall["micro"],
                         precision["micro"],
                         color="gold",
                         lw=10)
        lines.append(l)
        labels.append(
            f"micro-average precision-recall (area = {ap_micro:0.2f})")

        for i, color in zip(range(n_classes), colors):
            (l, ) = plt.plot(recall[i], precision[i], color=color, lw=2)
            lines.append(l)
            labels.append(
                f"precision-recall for class {i} (area = {avg_prec[i]:0.2f})")

        fig = plt.gcf()
        fig.subplots_adjust(bottom=0.25)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel("recall")
        plt.ylabel("precision")
        plt.title("precision recall - multiclass")
        plt.legend(lines, labels, loc=(0, -0.38), prop=dict(size=10))
        model_metrics["plots"].append(
            PlotArtifact("precision-recall-multiclass", body=plt.gcf()))

        # ROC CURVES
        # Compute ROC curve and ROC area for each class
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(n_classes):
            fpr[i], tpr[i], _ = metrics.roc_curve(ytest_b[:, i], yprob[:, i])
            roc_auc[i] = metrics.auc(fpr[i], tpr[i])

        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = metrics.roc_curve(
            ytest_b.ravel(), yprob.ravel())
        roc_auc["micro"] = metrics.auc(fpr["micro"], tpr["micro"])

        # First aggregate all false positive rates
        all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

        # Then interpolate all ROC curves at this points
        mean_tpr = np.zeros_like(all_fpr)
        for i in range(n_classes):
            mean_tpr += interp(all_fpr, fpr[i], tpr[i])

        # Finally average it and compute AUC
        mean_tpr /= n_classes

        fpr["macro"] = all_fpr
        tpr["macro"] = mean_tpr
        roc_auc["macro"] = metrics.auc(fpr["macro"], tpr["macro"])

        # Plot all ROC curves
        gcf_clear(plt)
        plt.figure()
        plt.plot(
            fpr["micro"],
            tpr["micro"],
            label="micro-average ROC curve (area = {0:0.2f})"
            "".format(roc_auc["micro"]),
            color="deeppink",
            linestyle=":",
            linewidth=4,
        )

        plt.plot(
            fpr["macro"],
            tpr["macro"],
            label="macro-average ROC curve (area = {0:0.2f})"
            "".format(roc_auc["macro"]),
            color="navy",
            linestyle=":",
            linewidth=4,
        )

        colors = cycle(["aqua", "darkorange", "cornflowerblue"])
        for i, color in zip(range(n_classes), colors):
            plt.plot(
                fpr[i],
                tpr[i],
                color=color,
                lw=2,
                label="ROC curve of class {0} (area = {1:0.2f})"
                "".format(i, roc_auc[i]),
            )

        plt.plot([0, 1], [0, 1], "k--", lw=2)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        plt.title("receiver operating characteristic - multiclass")
        plt.legend(loc="lower right")
        model_metrics["plots"].append(
            PlotArtifact("roc-multiclass", body=plt.gcf()))
        # AUC multiclass
        model_metrics.update({
            "auc-macro":
            metrics.roc_auc_score(ytest_b,
                                  yprob,
                                  multi_class="ovo",
                                  average="macro"),
            "auc-weighted":
            metrics.roc_auc_score(ytest_b,
                                  yprob,
                                  multi_class="ovo",
                                  average="weighted"),
        })

        # others (todo - macro, micro...)
        model_metrics.update({
            "f1-score":
            metrics.f1_score(ytest, ypred, average="macro"),
            "recall_score":
            metrics.recall_score(ytest, ypred, average="macro"),
        })
    else:
        # binary
        yprob_pos = yprob[:, 1]

        model_metrics.update({
            "rocauc":
            metrics.roc_auc_score(ytest, yprob_pos),
            "brier_score":
            metrics.brier_score_loss(ytest, yprob_pos, pos_label=ytest.max()),
        })

        # precision-recall

        # ROC plot

    return model_metrics
def evaluation_report(test_data_dir,
                      checkpoints_list,
                      report_file,
                      batch_size,
                      input_shape,
                      extract_features=False,
                      feature_file=None,
                      roc_file=None):
    pred, true_labels, classes = predict(test_data_dir, checkpoints_list[0],
                                         input_shape, batch_size)
    predictions = np.zeros((len(pred), len(checkpoints_list)))
    predictions[:, 0] = pred
    for j, c in enumerate(checkpoints_list[1:]):
        pred, true_labels, classes = predict(test_data_dir, c, input_shape,
                                             batch_size)
        predictions[:, j + 1] = pred

    print(predictions.shape)
    y_pred = stats.mode(predictions, axis=1)
    y_pred = y_pred[0]
    print(y_pred.shape)
    y_pred = y_pred.ravel()

    y_pred = y_pred.astype(int)
    true_labels = true_labels.astype(int)

    acc = accuracy_score(true_labels, y_pred)
    kappa_score = cohen_kappa_score(true_labels, y_pred, weights="quadratic")
    cnf = confusion_matrix(true_labels, y_pred)
    plt.figure()
    plot_confusion_matrix(cnf,
                          classes=classes,
                          normalize=True,
                          title='Normalized confusion matrix')
    plt.show()

    y_pred = label_binarize(y_pred, classes=classes)
    true_labels = label_binarize(true_labels, classes=classes)

    print(true_labels.shape)
    print(y_pred.shape)

    num_classes = len(classes)
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    for i in range(num_classes):
        fpr[i], tpr[i], _ = roc_curve(true_labels[:, i], y_pred[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    fpr["micro"], tpr["micro"], _ = roc_curve(true_labels.ravel(),
                                              y_pred.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(num_classes)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(num_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])

    # Finally average it and compute AUC
    mean_tpr /= num_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    #np.savetxt('fpr_stage2.txt', fpr)
    #np.savetxt('tpr_stage2.txt', tpr)

    report = {}
    report['Accuracy'] = acc
    report['AUC1'] = roc_auc[0]
    report['AUC2'] = roc_auc[1]
    report['AUC3'] = roc_auc[2]
    report['AUC4'] = roc_auc[3]
    report['AUC_Macro'] = roc_auc["macro"]
    report['AUC_Micro'] = roc_auc["micro"]
    report['kappa'] = kappa_score
    #report['fpr'] = fpr
    #report['tpr'] = tpr
    with open(report_file, 'w') as fp:
        json.dump(report, fp)

    print('Model report. Report saved to: %s' % (report_file))
    print('Accuracy: %.2f' % report['Accuracy'])
    print('AUC1: %.2f' % report['AUC1'])
    print('AUC2: %.2f' % report['AUC2'])
    print('AUC3: %.2f' % report['AUC3'])
    print('AUC4: %.2f' % report['AUC4'])
    print('AUC (micro): %.2f' % report['AUC_Micro'])
    print('AUC (macro): %.2f' % report['AUC_Macro'])
    print('Kappa score: %.2f' % report['kappa'])

    if extract_features:
        features_model = Model(model.inputs, model.layers[-2].output)
        features = features_model.predict_generator(test_generator,
                                                    steps=test_steps)

        assert feature_file is not None
        np.savetxt(feature_file, features)

    if roc_file is not None:
        roc_title = roc_file.split('.')
        roc_title = roc_title[0]
        plt.figure()
        lw = 2
        plt.figure()
        plt.plot(fpr["micro"],
                 tpr["micro"],
                 label='micro-average ROC curve (area = {0:0.2f})'
                 ''.format(roc_auc["micro"]),
                 color='deeppink',
                 linestyle=':',
                 linewidth=4)

        plt.plot(fpr["macro"],
                 tpr["macro"],
                 label='macro-average ROC curve (area = {0:0.2f})'
                 ''.format(roc_auc["macro"]),
                 color='navy',
                 linestyle=':',
                 linewidth=4)
        plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic \n (' + roc_title + ')')
        plt.legend(loc="lower right")
        #plt.show()
        plt.savefig(roc_file)
target = y_test
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(target[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

fpr["micro"], tpr["micro"], _ = roc_curve(target.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
    mean_tpr += interp(all_fpr, fpr[i], tpr[i])

mean_tpr /= n_classes

fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# Plot all ROC curves
plt.figure()
plt.plot(fpr["micro"],
         tpr["micro"],
         label='micro-average ROC curve (area = {0:0.2f})'
         ''.format(roc_auc["micro"]),
         color='deeppink',
         linestyle=':',
Пример #37
0
def ROC_PRC(outtl, pdx, path, name, fdict, dm, accur, pmd):
    if pmd == 'immune':
        rdd = 4
    else:
        rdd = 2
    if rdd > 2:
        # Compute ROC and PRC curve and ROC and PRC area for each class
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        # PRC
        # For each class
        precision = dict()
        recall = dict()
        average_precision = dict()
        microy = []
        microscore = []
        for i in range(rdd):
            fpr[i], tpr[i], _ = sklearn.metrics.roc_curve(
                np.asarray((outtl.iloc[:,
                                       0].values == int(i)).astype('uint8')),
                np.asarray(pdx[:, i]).ravel())
            try:
                roc_auc[i] = sklearn.metrics.roc_auc_score(
                    np.asarray(
                        (outtl.iloc[:, 0].values == int(i)).astype('uint8')),
                    np.asarray(pdx[:, i]).ravel())
            except ValueError:
                roc_auc[i] = np.nan

            microy.extend(
                np.asarray((outtl.iloc[:,
                                       0].values == int(i)).astype('uint8')))
            microscore.extend(np.asarray(pdx[:, i]).ravel())

            precision[i], recall[i], _ = \
                sklearn.metrics.precision_recall_curve(np.asarray((outtl.iloc[:, 0].values == int(i)).astype('uint8')),
                                                   np.asarray(pdx[:, i]).ravel())
            try:
                average_precision[i] = \
                    sklearn.metrics.average_precision_score(np.asarray((outtl.iloc[:, 0].values == int(i)).astype('uint8')),
                                                        np.asarray(pdx[:, i]).ravel())
            except ValueError:
                average_precision[i] = np.nan

        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = sklearn.metrics.roc_curve(
            np.asarray(microy).ravel(),
            np.asarray(microscore).ravel())
        roc_auc["micro"] = sklearn.metrics.auc(fpr["micro"], tpr["micro"])

        # A "micro-average": quantifying score on all classes jointly
        precision["micro"], recall[
            "micro"], _ = sklearn.metrics.precision_recall_curve(
                np.asarray(microy).ravel(),
                np.asarray(microscore).ravel())
        average_precision["micro"] = sklearn.metrics.average_precision_score(
            np.asarray(microy).ravel(),
            np.asarray(microscore).ravel(),
            average="micro")

        # Compute macro-average ROC curve and ROC area

        # First aggregate all false positive rates
        all_fpr = np.unique(np.concatenate([fpr[i] for i in range(rdd)]))

        # Then interpolate all ROC curves at this points
        mean_tpr = np.zeros_like(all_fpr)
        for i in range(rdd):
            mean_tpr += interp(all_fpr, fpr[i], tpr[i])

        # Finally average it and compute AUC
        mean_tpr /= rdd

        fpr["macro"] = all_fpr
        tpr["macro"] = mean_tpr
        roc_auc["macro"] = sklearn.metrics.auc(fpr["macro"], tpr["macro"])

        # Plot all ROC curves
        plt.figure()
        plt.plot(fpr["micro"],
                 tpr["micro"],
                 label='micro-average ROC curve (area = {0:0.5f})'
                 ''.format(roc_auc["micro"]),
                 color='deeppink',
                 linestyle=':',
                 linewidth=4)

        plt.plot(fpr["macro"],
                 tpr["macro"],
                 label='macro-average ROC curve (area = {0:0.5f})'
                 ''.format(roc_auc["macro"]),
                 color='navy',
                 linestyle=':',
                 linewidth=4)

        colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'red', 'blue'])
        for i, color in zip(range(rdd), colors):
            plt.plot(fpr[i],
                     tpr[i],
                     color=color,
                     lw=2,
                     label='ROC curve of {0} (area = {1:0.5f})'.format(
                         fdict[i], roc_auc[i]))
            print('{0} {1} AUC of {2} = {3:0.5f}'.format(
                name, dm, fdict[i], roc_auc[i]))

        plt.plot([0, 1], [0, 1], 'k--', lw=2)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC of {}'.format(name))
        plt.legend(loc="lower right")
        plt.savefig("../Results/{}/out/{}_{}_ROC.png".format(path, name, dm))

        print(
            '{0} Average precision score, micro-averaged over all classes: {1:0.5f}'
            .format(name, average_precision["micro"]))
        # Plot all PRC curves
        colors = cycle([
            'navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal', 'red',
            'blue'
        ])
        plt.figure(figsize=(7, 9))
        f_scores = np.linspace(0.2, 0.8, num=4)
        lines = []
        labels = []
        for f_score in f_scores:
            x = np.linspace(0.01, 1)
            y = f_score * x / (2 * x - f_score)
            l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2)
            plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02))
        lines.append(l)
        labels.append('iso-f1 curves')

        l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2)
        lines.append(l)
        labels.append('micro-average Precision-recall (area = {0:0.5f})'
                      ''.format(average_precision["micro"]))

        for i, color in zip(range(rdd), colors):
            l, = plt.plot(recall[i], precision[i], color=color, lw=2)
            lines.append(l)
            labels.append('Precision-recall for {0} (area = {1:0.5f})'.format(
                fdict[i], average_precision[i]))
            print('{0} {1} Average Precision of {2} = {3:0.5f}'.format(
                name, dm, fdict[i], average_precision[i]))

        fig = plt.gcf()
        fig.subplots_adjust(bottom=0.25)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title('{} Precision-Recall curve: Average Accu={}'.format(
            name, accur))
        plt.legend(lines, labels, loc=(0, -.38), prop=dict(size=12))
        plt.savefig("../Results/{}/out/{}_{}_PRC.png".format(path, name, dm))

    else:
        tl = outtl.values[:, 0].ravel()
        y_score = np.asarray(pdx[:, 1]).ravel()
        auc = sklearn.metrics.roc_auc_score(tl, y_score)
        auc = round(auc, 5)
        print('{0} {1} AUC = {2:0.5f}'.format(name, dm, auc))
        fpr, tpr, _ = sklearn.metrics.roc_curve(tl, y_score)
        plt.figure()
        lw = 2
        plt.plot(fpr,
                 tpr,
                 color='darkorange',
                 lw=lw,
                 label='ROC curve (area = %0.5f)' % auc)
        plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('{} ROC of {}'.format(name, pmd))
        plt.legend(loc="lower right")
        plt.savefig("../Results/{}/out/{}_{}_ROC.png".format(path, name, dm))

        average_precision = sklearn.metrics.average_precision_score(
            tl, y_score)
        print('{0} Average precision-recall score: {1:0.5f}'.format(
            name, average_precision))
        plt.figure()
        f_scores = np.linspace(0.2, 0.8, num=4)
        for f_score in f_scores:
            x = np.linspace(0.01, 1)
            y = f_score * x / (2 * x - f_score)
            l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2)
            plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02))
        precision, recall, _ = sklearn.metrics.precision_recall_curve(
            tl, y_score)
        plt.step(recall, precision, color='b', alpha=0.2, where='post')
        plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])
        plt.title('{} {} PRC: AP={:0.5f}; Accu={}'.format(
            pmd, name, average_precision, accur))
        plt.savefig("../Results/{}/out/{}_{}_PRC.png".format(path, name, dm))
Пример #38
0
def runANNSearch(data_folder,cfg):
    epochs = 100
    #Gather the dataset
    #train_x, train_y are just regular samples
    train_x_t, train_y_t, test_x_t, test_y_t, num_input = gatherDataset_10times(data_folder, cfg, 0.9)

    #std_scale = preprocessing.StandardScaler().fit(train_x)
    #train_x = std_scale.transform(train_x)
    #test_x = std_scale.transform(test_x)

    max_auc = 0
    max_batch_size = 0
    max_hidden = 0
    max_repr_size = 0

    auc_report = []
    n_hidden_report = []
    repr_size_report = []
    batch_sizes_report = []

    best_config = []
    max_auc = 0

    learning_rates = [0.001]  # [0.01, 0.001] # default is 0.001
    batch_sizes = [32]#[8, 16, 32, 64, 128, 256]
    n_hiddens = [8, 16, 32, 64, 128, 256]#np.logspace(2, 10, base=2, num=12)
    #drop_inputs = [0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1]
    #drop_hiddens = [0.5, 0.6, 0.7, 0.8, 0.9, 1]
    repr_sizes = [4, 8, 16, 32, 64, 128, 256] #np.logspace(2, 10, base=2, num=12) #num 20

    for learning_rate in learning_rates:
        for batch_size in batch_sizes:
            for n_hidden in n_hiddens:
                for repr_size in repr_sizes:
                    if(repr_size <= n_hidden):
                        #start = time.time()
                        np.random.seed(1)
                        graph_level_seed = 1
                        operation_level_seed = 1
                        tf.set_random_seed(graph_level_seed)
                        random.seed(1)

                        step_auc = []
                        mean_fpr = np.linspace(0, 1, 100)
                        tprs = []
                        for n in range(0,10):
                            #n_features, batch_size, n_hidden, drop_input, drop_hidden, repr_size
                            ae = Autoencoder(num_input, batch_size, int(n_hidden), 1, 1, int(repr_size), learning_rate)

                            train_x = train_x_t[n]
                            train_y = train_y_t[n]
                            test_x = test_x_t[n]
                            test_y = test_y_t[n]

                            for i in range(epochs):
                                ae.run_epoch(train_x)

                            #Reconstruct samples
                            anomaly_errors = ae.reconstruction_errors(test_x[len(test_x)/2:])
                            normal_val_errors = ae.reconstruction_errors(test_x[:len(test_x)/2])

                            roc_y = [1 for _ in range(len(anomaly_errors))] + [0 for _ in range(len(normal_val_errors))]
                            roc_score = np.concatenate([anomaly_errors, normal_val_errors])


                            # Compute ROC curve and ROC area for each class
                            fpr, tpr, thresholds = roc_curve(roc_y, roc_score, drop_intermediate=True)
                            tprs.append(interp(mean_fpr, fpr, tpr))
                            tprs[-1][0] = 0.0
                            roc_auc = auc(fpr, tpr)
                            #print "Fold %i auc: %f" % (n, roc_auc)
                            step_auc.append(roc_auc)

                        avg_auc = sum(step_auc)/float(len(step_auc))

                        auc_report.append(avg_auc)
                        """
                        n_hidden_report.append(int(n_hidden))
                        repr_size_report.append(int(repr_size))
                        batch_sizes_report.append(batch_size)
                        """
                        mean_tpr = np.mean(tprs, axis=0)
                        mean_tpr[-1] = 1.0
                        mean_auc = auc(mean_fpr, mean_tpr)

                        if(mean_auc > max_auc):
                            max_auc = mean_auc
                            best_config = [mean_fpr, mean_tpr, n_hidden, repr_size]

                        #end = time.time()
                        #print(end - start)
                        print ("%f - Batch Size:%i, Learning Rate:%f, n_hidden:%i, repr_size:%i" % (avg_auc, batch_size, learning_rate, int(n_hidden), int(repr_size)))


    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.xlabel('False Positive Rate', fontsize=26)
    plt.ylabel('True Positive Rate', fontsize=26)

    ax1.plot([0, 1], [0, 1], 'k--', lw=2, color='orange', label = 'Random Guess')
    ax1.grid(color='black', linestyle='dotted')
    plt.setp(ax1.get_xticklabels(), fontsize=16)
    plt.setp(ax1.get_yticklabels(), fontsize=16)
    plt.plot(best_config[0], best_config[1], color='b', label=r'ROC (AUC = %0.2f)' % (max_auc), lw=2, alpha=.8)
    plt.legend(loc='lower right', fontsize='x-large')

    fig.savefig('Autoencoder/' + "Facet_Autoencoder_" + cfg[1] + ".pdf")   # save the figure to file
    plt.close(fig)

    print "################\n# Summary"
    print "Max. AUC: %f, N_hidden: %i, Repr_Size: %i" % (max_auc, best_config[2],best_config[3])
    print "Avg. AUC %f: " % (np.mean(auc_report,axis=0))
    """
Пример #39
0
            mdl_names.append(model_name)

            print('\nPerforming %s followed by %s for dataset %s\n' % (kernel, model_name, dataset))

            # To count number of folds
            i = 0

            for train, test in cv.split(X_kpca, y):

                probas_ = model.fit(X_kpca[train], y[train]).predict_proba(X_kpca[test])

                # Compute ROC curve and area the curve

                fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
                tprs.append(interp(mean_fpr, fpr, tpr))
                tprs[-1][0] = 0.0
                roc_auc = auc(fpr, tpr)
                aucs.append(roc_auc)
                plt.plot(fpr, tpr, lw=1, alpha=0.3,
                         label='ROC fold %d (AUC = %0.2f)' % (i+1, roc_auc))

                i += 1

            plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
                     label='Luck', alpha=.8)

            mean_tpr = np.mean(tprs, axis=0)
            mean_tpr[-1] = 1.0
            mean_auc = auc(mean_fpr, mean_tpr)
            std_auc = np.std(aucs)
def plot_ROC_curve(y_true, y_pred, labels, roc_path):
    """
    Plots the ROC curve from prediction scores
    y_true.shape  = [n_samples, n_classes]
    y_preds.shape = [n_samples, n_classes]
    labels.shape  = [n_classes]
    """
    n_classes = len(labels)
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for y, pred, label in zip(y_true.transpose(), y_pred.transpose(), labels):
        fpr[label], tpr[label], _ = roc_curve(y, pred)
        roc_auc[label] = auc(fpr[label], tpr[label])

    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[label] for label in labels]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for label in labels:
        mean_tpr += interp(all_fpr, fpr[label], tpr[label])

    # Finally average it and compute AUC
    mean_tpr /= n_classes

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_true.ravel(), y_pred.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

    # Plot all ROC curves
    plt.figure()
    lw = 2
    plt.plot(fpr["micro"],
             tpr["micro"],
             label='micro-average ROC curve (area = {0:0.3f})'
             ''.format(roc_auc["micro"]),
             color='deeppink',
             linestyle=':',
             linewidth=2)

    plt.plot(fpr["macro"],
             tpr["macro"],
             label='macro-average ROC curve (area = {0:0.3f})'
             ''.format(roc_auc["macro"]),
             color='navy',
             linestyle=':',
             linewidth=2)

    if len(labels) == 4:
        colors = ['green', 'cornflowerblue', 'darkorange', 'darkred']
    else:
        colors = ['green', 'cornflowerblue', 'darkred']
    for label, color in zip(labels, cycle(colors)):
        plt.plot(fpr[label],
                 tpr[label],
                 color=color,
                 lw=lw,
                 label='ROC curve of {0} (area = {1:0.3f})'
                 ''.format(label, roc_auc[label]))

    plt.plot([0, 1], [0, 1], 'k--', lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC curve')
    plt.legend(loc="lower right")
    matplotlib.rcParams.update({'font.size': 14})
    plt.savefig('%s.png' % roc_path, pad_inches=0, bbox_inches='tight')
    def cross_val(self,
                  X_train,
                  y_train,
                  cv=10,
                  plot_auc=False,
                  figsize=(16, 9)):
        """Computes stratified K-Fold cross-validation of the metrics provided 
        to the class and returns a dictionnary with the results for each fold ; 
        if the `plot_auc` argument is set to `True`, then cross-validation will only 
        be run with AUC as the metric, and returns a plot with ROC curves for each 
        fold
        
        Parameters
        ------
        X_train : numpy.ndarray or pandas.DataFrame
            Data to train the cross-validation on
        y_train : numpy.ndarray or pandas.Series
            Labels for `X_train`
        cv : int
            Number of folds for cross-validation
        plot_auc : bool
            Indication to plot ROC curves for each fold
        figsize : int tuple
            Plot size
        """
        if plot_auc == True:
            cv = StratifiedKFold(n_splits=cv)
            tprs = []
            aucs = []
            mean_fpr = np.linspace(0, 1, 100)

            i = 0
            plt.figure(figsize=figsize)
            for train, test in cv.split(X_train, y_train):
                probas_ = self.model.fit(X_train[train],
                                         y_train[train]).predict_proba(
                                             X_train[test])
                fpr, tpr, thresholds = roc_curve(y_train[test], probas_[:, 1])
                tprs.append(interp(mean_fpr, fpr, tpr))
                tprs[-1][0] = 0.0
                roc_auc = auc(fpr, tpr)
                aucs.append(roc_auc)
                plt.plot(fpr,
                         tpr,
                         lw=1,
                         alpha=0.3,
                         label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))
                i += 1

            plt.plot([0, 1], [0, 1],
                     linestyle='--',
                     lw=2,
                     color='r',
                     label='Chance',
                     alpha=.8)

            mean_tpr = np.mean(tprs, axis=0)
            mean_tpr[-1] = 1.0
            mean_auc = auc(mean_fpr, mean_tpr)
            std_auc = np.std(aucs)
            plt.plot(mean_fpr,
                     mean_tpr,
                     color='b',
                     label=r'Mean ROC (AUC = %0.3f $\pm$ %0.3f)' %
                     (mean_auc, std_auc),
                     lw=2,
                     alpha=.8)
            plt.xlim([-0.05, 1.05])
            plt.ylim([-0.05, 1.05])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title('ROC curve')
            plt.legend(loc="lower right")
        else:
            self.cv_scores_ = {}
            for metric in self.metrics_:
                if metric == "precision":
                    self.cv_scores_[metric] = cross_val_score(
                        self.model,
                        X_train,
                        y_train,
                        cv=cv,
                        scoring=self.custom_precision_score)
                elif metric == "recall":
                    self.cv_scores_[metric] = cross_val_score(
                        self.model,
                        X_train,
                        y_train,
                        cv=cv,
                        scoring=self.custom_recall_score)
                elif metric == "accuracy":
                    self.cv_scores_[metric] = cross_val_score(
                        self.model,
                        X_train,
                        y_train,
                        cv=cv,
                        scoring=self.custom_accuracy_score)
                elif metric == "f1_score":
                    self.cv_scores_[metric] = cross_val_score(
                        self.model,
                        X_train,
                        y_train,
                        cv=cv,
                        scoring=self.custom_f1_score)
                else:
                    self.cv_scores_[metric] = cross_val_score(self.model,
                                                              X_train,
                                                              y_train,
                                                              cv=cv,
                                                              scoring=metric)
            return self.cv_scores_
Пример #42
0
def cvROC(dataset):
    X = dataset.data
    y = dataset.target
    X, y = X[y != 2], y[y != 2]
    n_samples, n_features = X.shape

    # Add noisy features
    random_state = numpy.random.RandomState(0)
    X = numpy.c_[X, random_state.randn(n_samples, 200 * n_features)]
    cv = StratifiedKFold(n_splits=10)
    classifier = svm.SVC(kernel='linear',
                         probability=True,
                         random_state=random_state)

    tprs = []
    aucs = []
    mean_fpr = numpy.linspace(0, 1, 100)

    i = 0
    for train, test in cv.split(X, y):
        probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
        fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
        tprs.append(interp(mean_fpr, fpr, tpr))
        tprs[-1][0] = 0.0
        roc_auc = auc(fpr, tpr)
        aucs.append(roc_auc)
        plt.plot(fpr,
                 tpr,
                 lw=1,
                 alpha=0.3,
                 label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))

        i += 1
    plt.plot([0, 1], [0, 1],
             linestyle='--',
             lw=2,
             color='r',
             label='Chance',
             alpha=.8)

    mean_tpr = numpy.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = numpy.std(aucs)
    plt.plot(mean_fpr,
             mean_tpr,
             color='b',
             label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
             lw=2,
             alpha=.8)

    std_tpr = numpy.std(tprs, axis=0)
    tprs_upper = numpy.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = numpy.maximum(mean_tpr - std_tpr, 0)
    plt.fill_between(mean_fpr,
                     tprs_lower,
                     tprs_upper,
                     color='grey',
                     alpha=.2,
                     label=r'$\pm$ 1 std. dev.')

    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example')
    plt.legend(loc="lower right")
    plt.show()
Пример #43
0
def draw_roc( Y, Y_score):
    
    # Make binarized multi classes labels
    #Y = label_binarize(Y, classes =  [0,1,2])
    #print(Y.shape)
    n_classes = Y.shape[1]
    
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(Y[:, i], Y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(Y.ravel(), Y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    
    # Compute macro-average ROC curve and ROC area

    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])

    # Finally average it and compute AUC
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    
    
    lw = 4
    plt.rcParams.update({'font.size': 15})
    plt.figure(figsize=(10,7))
    plt.plot(fpr["micro"], tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["micro"]),
                   color='deeppink', linestyle=':', linewidth=lw)

    plt.plot(fpr["macro"], tpr["macro"],
             label='macro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["macro"]),
                   color='navy', linestyle=':', linewidth=lw)
    
    colors = itertools.cycle(['aqua', 'darkorange', 'cornflowerblue', 'darkred'])
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=lw,
                 label='ROC curve of class {0} (area = {1:0.2f})'
                 ''.format(i, roc_auc[i]))
    
    plt.plot([0, 1], [0, 1], 'k--', lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Some extension of Receiver operating characteristic to multi-class')
    plt.legend(loc="lower right")
    plt.show()
Пример #44
0
def five_fold_roc(spliting, all_slices, subtype, random_flag=True, clf=GB()):
    '''随机五折分组交叉验证'''
    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    # all_tpr = []
    residual = []
    performance = [0] * 8

    m = 5
    for i, split in enumerate(spliting):
        if random_flag:  # 随机分配
            tr_paths, te_paths = get_five_data(split, all_slices)
        else:  # 按固定名字分组
            tr_paths, te_paths = data_by_name(all_slices,
                                              split[0]), data_by_name(
                                                  all_slices, split[1])

        tr_slice, tr_data, tr_label, tr_area = read_data(tr_paths, num_=116)
        te_slice, te_data, te_label, te_area = read_data(te_paths, num_=116)

        #     return trainacc, testacc, tr_acc, te_acc, tr_recall, te_recall, tr_speci, te_speci,prb_2, l2
        result = \
            classify(tr_data, te_data, tr_label, te_label, tr_slice, te_slice, tr_area, te_area, clf=GB())

        fpr, tpr, thresholds = roc_curve(result[-1], result[-2])
        residual.append(comput_residual(result[-1], result[-2]))

        mean_tpr += interp(mean_fpr, fpr, tpr)  # 对mean_tpr在mean_fpr处进行插值
        mean_tpr[0] = 0.0  # 初始处为0
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr,
                 tpr,
                 lw=1,
                 label='ROC fold %d (area = %0.2f)' % (i, roc_auc))

        performance[0] += result[0]
        performance[1] += result[1]
        performance[2] += result[2]
        performance[3] += result[3]
        performance[4] += result[4]
        performance[5] += result[5]
        performance[6] += result[6]
        performance[7] += result[7]

    print 'train/slice', performance[0] / m
    print 'test/slice', performance[1] / m
    print 'train/person', performance[2] / m
    print 'test/person', performance[3] / m
    print 'train/recall', performance[4] / m
    print 'test/recall', performance[5] / m
    print 'train/speci', performance[6] / m
    print 'test/speci', performance[7] / m
    print np.mean(residual, axis=0)

    mean_tpr /= 5  # 在mean_fpr100个点,每个点处插值插值多次取平均
    mean_tpr[-1] = 1.0  # 坐标最后一个点为(1,1)
    mean_auc = auc(mean_fpr, mean_tpr)  # 计算平均AUC值

    # return mean_fpr, mean_tpr, mean_auc

    # 画对角线
    # plt.subplot(subplt)
    plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')

    plt.plot(mean_fpr,
             mean_tpr,
             'k--',
             label='Mean ROC (area = %0.2f)' % mean_auc,
             lw=2)
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('{} - Receiver operating characteristic'.format(subtype))
    plt.legend(loc="lower right")
    plt.show()
def graphs_ROC():  #data_random,resultado_r,valid_set,valid_results):
    a = opt.getcwd()  #me da directorio actual
    data = a + "/Archivos_training/Data.txt"
    resultado = a + "/Archivos_training/resultados.txt"
    clasificador = a + "/Archivos_training/clf_SVM.pkl"

    data_random = D.loadtxt(data)
    print("\tData cargada")
    resultado_r = D.loadtxt(resultado)
    print("\tResultados cargados")
    pkl_file = open(clasificador, 'rb')
    clf = pick.load(pkl_file)
    print("\tClasificador cargado")
    print("\tEn proceso ...")
    resultado_r = label_binarize(
        resultado_r,
        classes=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130])

    n_classes = resultado_r.shape[1]

    X_train, X_test, y_train, y_test = train_test_split(data_random,
                                                        resultado_r,
                                                        test_size=0.4,
                                                        random_state=0)
    classifier = OneVsRestClassifier(clf)  #,random_state=random_state))

    y_score = classifier.fit(X_train, y_train).decision_function(X_test)

    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    lw = 2

    # Compute macro-average ROC curve and ROC area
    # First aggregate all false positive rates
    all_fpr = D.unique(D.concatenate([fpr[i] for i in range(n_classes)]))
    # Then interpolate all ROC curves at this points
    mean_tpr = D.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])
    # Finally average it and compute AUC
    mean_tpr /= n_classes
    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

    #--------------------------------------------------------------------------
    # Plot all ROC curves
    plt.figure()
    plt.rc('font', size=8)
    for k in range(3):

        plt.subplot(2, 2, k + 1)

        colors = cycle(['r', 'darkorange', 'b', 'y'])
        for i, color in zip(range(3), colors):
            plt.plot(fpr[i + k * 3],
                     tpr[i + k * 3],
                     color=color,
                     lw=lw,
                     label='Curva ROC clase {0} (area = {1:0.2f})'
                     ''.format(i + k * 3 + 1, roc_auc[i + k * 3]))
        plt.plot([0, 1], [0, 1], 'k--', lw=lw)
        plt.xlim([-0.05, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('Rata de Falsos Positivos')
        plt.ylabel('Rata de Aciertos Positivos')
        titulo = "Graficas ROC de la clase " + str(
            k * 3 + 1) + " a la clase " + str(k * 3 + 3)
        plt.title(titulo)
        plt.legend(loc="lower right")
    #-------------------------------------------------------------------------

    plt.subplot(2, 2, 4)

    colors = cycle(['r', 'darkorange', 'b', 'y'])
    for i, color in zip(range(4), colors):

        plt.plot(fpr[i + 9],
                 tpr[i + 9],
                 color=color,
                 lw=lw,
                 label='Curva ROC clase {0} (area = {1:0.2f})'
                 ''.format(i + 10, roc_auc[i + 9]))
    plt.plot([0, 1], [0, 1], 'k--', lw=lw)
    plt.xlim([-0.05, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Rata de Falsos Positivos')
    plt.ylabel('Rata de Aciertos Positivos')
    titulo = "Graficas ROC de la clase " + str(10) + " a la clase " + str(13)
    plt.title(titulo)
    plt.legend(loc="lower right")
    #----------------------------------------------------------------------
    plt.figure()
    plt.rc('font', size=8)
    plt.plot(fpr["micro"],
             tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
             ''.format(roc_auc["micro"]),
             color='deeppink',
             linestyle='-',
             linewidth=4)
    plt.plot(fpr["macro"],
             tpr["macro"],
             label='macro-average ROC curve (area = {0:0.2f})'
             ''.format(roc_auc["macro"]),
             color='navy',
             linestyle='-',
             linewidth=4)
    plt.xlim([-0.05, 1.0])
    plt.ylim([0.0, 1.05])
    titulo = "Graficas ROC Promedio, minima y maxima"
    plt.title(titulo)
    plt.legend(loc="lower right")
    plt.show()
Пример #46
0
    def evaluate(self, y_true, y_pred_prob):
        """
        evaluate model performance by calculate ks, auc, confusion matrix and find the best segmentation
        通过计算ks值,auc值以及混淆矩阵,来评估模型效果,并且寻找最佳切割点
        :param y_true: true target
        :param y_pred_prob: predict value
        :return:
        """
        mean_tpr = 0.0
        mean_fpr = np.linspace(0, 1, 100)
        fpr, tpr, theadhold = roc_curve(y_true, y_pred_prob)
        # fpr, tpr, theadhold = roc_curve(y_true, y_pred_prob)
        size = len(tpr)
        max_value = 0
        index = 0
        for i in range(0, size):
            v = tpr[i] - fpr[i]
            if v > max_value:
                max_value = v
                index = i
        mean_tpr += interp(mean_fpr, fpr,
                           tpr)  # 对mean_tpr在mean_fpr处进行插值,通过scipy包调用interp()函数
        # mean_tpr[0] = 0.0  # 初始处为0
        roc_auc = auc(fpr, tpr)
        # 画图,只需要plt.plot(fpr,tpr),变量roc_auc只是记录auc的值,通过auc()函数能计算出来
        plt.plot(fpr, tpr, lw=1, label='ROC fold (area = %0.2f)' % roc_auc)
        plt.xlabel('Specificity (假阳率)')
        plt.ylabel('Sensitivity (真阳率)')
        plt.title('ROC curve')
        plt.grid()
        plt.legend()
        # plt.show()

        # print('-------------- ks, auc --------------')
        # print('ks: ' + str(max_value))
        # print('auc: ' + str(auc(fpr, tpr)))
        # print('threshold: ' + str(theadhold[index]))

        print('-------------- ks, auc --------------')
        print('ks: ' + str(max_value))
        print('auc: ' + str(auc(fpr, tpr)))
        print('threshold: ' + str(theadhold[index]))
        print('-------------------------------------')

        best_threshold = round(theadhold[index], 2)
        thres_pr_dict = {}
        threshold_recommended = 0.30
        best_precision = 0
        best_recall = 0
        best_pr = 0
        print('-------------- result details --------------')
        prob_thres = 0.01
        while prob_thres <= 1:
            # print('prob_thres: ' + str(prob_thres))
            print('prob_thres: ' + str(prob_thres))
            test_predict_new = []
            for prob in y_pred_prob:
                # if prob[1] > prob_thres:
                if prob > prob_thres:
                    test_predict_new.append(1)
                else:
                    test_predict_new.append(0)

            y_predict = np.array(test_predict_new)

            accuracy = accuracy_score(y_true, y_predict)
            precision, recall, f1, support = precision_recall_fscore_support(
                y_true, y_predict)
            matrix = confusion_matrix(y_true, y_predict)
            good_pass, bad_pass, good_deny, bad_deny = matrix[1][1], matrix[0][
                1], matrix[1][0], matrix[0][0]
            pass_ratio = float(good_pass + bad_pass) / (good_pass + bad_pass +
                                                        good_deny + bad_deny)
            print('pass_ratio: ' + str(pass_ratio))
            print('accuracy: ' + str(accuracy))
            print('precision: ' + str(precision))
            print('recall: ' + str(recall))
            print('f1: ' + str(f1))
            print('confusion_matrix:')
            print(matrix)
            print(" ")

            thres_pr_dict[prob_thres] = pass_ratio
            if float('%.2f' % prob_thres) == float('%.2f' % best_threshold):
                best_precision = str(precision)
                best_recall = str(recall)
                best_pr = str(pass_ratio)
            prob_thres += 0.01
Пример #47
0
    regr.fit(X, y)
    X1 = test[:, 1:65]
    y1 = test[:, 65]
    predict_y1 = regr.predict_proba(X1)[:, 1]
    false_positive_rate, true_positive_rate, thresholds = roc_curve(
        y1, predict_y1)
    fpr_load.append(false_positive_rate)
    tpr_load.append(true_positive_rate)

n_folds = len(fpr_load)
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
plt.figure(figsize=(10, 8))
for i in range(n_folds):
    tprs.append(interp(mean_fpr, fpr_load[i], tpr_load[i]))
    tprs[-1][0] = 0.0
    roc_auc = auc(fpr_load[i], tpr_load[i])
    aucs.append(roc_auc)
    plt.plot(fpr_load[i],
             tpr_load[i],
             lw=1,
             alpha=0.3,
             label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))

plt.plot([0, 1], [0, 1],
         linestyle='--',
         lw=2,
         color='r',
         label='Random prediction',
         alpha=.8)
Пример #48
0
def class_report(y_true, y_pred, y_score=None, average='micro'):
    if y_true.shape != y_pred.shape:
        print("Error! y_true %s is not the same shape as y_pred %s" % (
              y_true.shape,
              y_pred.shape)
        )
        return

    lb = LabelBinarizer()

    if len(y_true.shape) == 1:
        lb.fit(y_true)

    #Value counts of predictions
    labels, cnt = np.unique(
        y_pred,
        return_counts=True)
    n_classes = len(labels)
    pred_cnt = pd.Series(cnt, index=labels)

    metrics_summary = precision_recall_fscore_support(
            y_true=y_true,
            y_pred=y_pred,
            labels=labels)

    avg = list(precision_recall_fscore_support(
            y_true=y_true, 
            y_pred=y_pred,
            average='weighted'))

    metrics_sum_index = ['precision', 'recall', 'f1-score', 'support']
    class_report_df = pd.DataFrame(
        list(metrics_summary),
        index=metrics_sum_index,
        columns=labels)

    support = class_report_df.loc['support']
    total = support.sum() 
    class_report_df['avg / total'] = avg[:-1] + [total]

    class_report_df = class_report_df.T
    class_report_df['pred'] = pred_cnt
    class_report_df['pred'].iloc[-1] = total

    if not (y_score is None):
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for label_it, label in enumerate(labels):
            fpr[label], tpr[label], _ = roc_curve(
                (y_true == label).astype(int), 
                y_score[:, label_it])

            roc_auc[label] = auc(fpr[label], tpr[label])

        if average == 'micro':
            if n_classes <= 2:
                fpr["avg / total"], tpr["avg / total"], _ = roc_curve(
                    lb.transform(y_true).ravel(), 
                    y_score[:, 1].ravel())
            else:
                fpr["avg / total"], tpr["avg / total"], _ = roc_curve(
                        lb.transform(y_true).ravel(), 
                        y_score.ravel())

            roc_auc["avg / total"] = auc(
                fpr["avg / total"], 
                tpr["avg / total"])

        elif average == 'macro':
            # First aggregate all false positive rates
            all_fpr = np.unique(np.concatenate([
                fpr[i] for i in labels]
            ))

            # Then interpolate all ROC curves at this points
            mean_tpr = np.zeros_like(all_fpr)
            for i in labels:
                mean_tpr += interp(all_fpr, fpr[i], tpr[i])

            # Finally average it and compute AUC
            mean_tpr /= n_classes

            fpr["macro"] = all_fpr
            tpr["macro"] = mean_tpr

            roc_auc["avg / total"] = auc(fpr["macro"], tpr["macro"])

        class_report_df['AUC'] = pd.Series(roc_auc)

    return class_report_df
Пример #49
0
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
all_tpr = []

for i, (train, test) in enumerate(cv):
    #通过训练数据,使用svm线性核建立模型,并对测试集进行测试,求出预测得分
    probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
    #    print set(y[train])                     #set([0,1]) 即label有两个类别
    #    print len(X[train]),len(X[test])        #训练集有84个,测试集有16个
    #    print "++",probas_                      #predict_proba()函数输出的是测试集在lael各类别上的置信度,
    #    #在哪个类别上的置信度高,则分为哪类
    # Compute ROC curve and area the curve
    #通过roc_curve()函数,求出fpr和tpr,以及阈值
    fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
    mean_tpr += interp(mean_fpr, fpr,
                       tpr)  #对mean_tpr在mean_fpr处进行插值,通过scipy包调用interp()函数
    mean_tpr[0] = 0.0  #初始处为0
    roc_auc = auc(fpr, tpr)
    #画图,只需要plt.plot(fpr,tpr),变量roc_auc只是记录auc的值,通过auc()函数能计算出来
    plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))

#画对角线
plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')

mean_tpr /= len(cv)  #在mean_fpr100个点,每个点处插值插值多次取平均
mean_tpr[-1] = 1.0  #坐标最后一个点为(1,1)
mean_auc = auc(mean_fpr, mean_tpr)  #计算平均AUC值
#画平均ROC曲线
#print mean_fpr,len(mean_fpr)
#print mean_tpr
plt.plot(mean_fpr,
Пример #50
0
    def do_error(self, line):
        """Override the error calculation for TTS

        The error is calculated as the vertical distance between theory points, in the current view,\
        calculated over all possible pairs of theory tables, when the theories overlap in the horizontal direction and\
        they correspond to files with the same Mw (if the parameters Mw2 and phi exist, their values are also
        used to classify the error). 1/2 of the error is added to each file.
        Report the error of the current theory on all the files.\n\
        File error is calculated as the mean square of the residual, averaged over all calculated points in the shifted tables.\n\
        Total error is the mean square of the residual, averaged over all points considered in all files.

        """
        total_error = 0
        npoints = 0
        view = self.parent_dataset.parent_application.current_view
        nfiles = len(self.parent_dataset.files)
        file_error = np.zeros(nfiles)
        file_points = np.zeros(nfiles, dtype=np.int)
        xth = []
        yth = []
        xmin = np.zeros((nfiles, view.n))
        xmax = np.zeros((nfiles, view.n))
        for i in range(nfiles):
            Filei = self.parent_dataset.files[i]
            xthi, ythi, success = view.view_proc(
                self.tables[Filei.file_name_short], Filei.file_parameters
            )
            # We need to sort arrays
            for k in range(view.n):
                x = xthi[:, k]
                p = x.argsort()
                xthi[:, k] = xthi[p, k]
                ythi[:, k] = ythi[p, k]
            xth.append(xthi)
            yth.append(ythi)

            xmin[i, :] = np.amin(xthi, 0)
            xmax[i, :] = np.amax(xthi, 0)

        # Mwset, Mw, Tdict = self.get_cases()
        MwUnique = {}
        for o in self.Mwset:
            MwUnique[o] = [0.0, 0]

        for i in range(nfiles):
            for j in range(i + 1, nfiles):
                if self.Mw[i] != self.Mw[j]:
                    continue
                for k in range(view.n):
                    condition = (xth[j][:, k] > xmin[i, k]) * (
                        xth[j][:, k] < xmax[i, k]
                    )
                    x = np.extract(condition, xth[j][:, k])
                    y = np.extract(condition, yth[j][:, k])
                    yinterp = interp(x, xth[i][:, k], yth[i][:, k])
                    error = np.sum((yinterp - y) ** 2)
                    npt = len(y)
                    total_error += error
                    npoints += npt
                    MwUnique[self.Mw[i]][0] += error
                    MwUnique[self.Mw[i]][1] += npt

        if line == "":
            # table='''<table border="1" width="100%">'''
            # table+='''<tr><th>Mw</th><th>Mw2</th><th>phi</th><th>phi2</th><th>Error</th><th># Pts.</th></tr>'''
            table = [
                [
                    "%-12s" % "Mw",
                    "%-12s" % "Mw2",
                    "%-12s" % "phi",
                    "%-12s" % "phi2",
                    "%-12s" % "Error",
                    "%-12s" % "# Pts.",
                ],
            ]
            p = list(MwUnique.keys())
            p.sort()
            for o in p:
                if MwUnique[o][1] > 0:
                    # table+='''<tr><td>%4g</td><td>%4g</td><td>%4g</td><td>%4g</td><td>%8.3g</td><td>(%5d)</td></tr>'''%(o[0], o[1], o[2], o[3], MwUnique[o][0] / MwUnique[o][1], MwUnique[o][1])
                    table.append(
                        [
                            "%-12.4g" % o[0],
                            "%-12.4g" % o[1],
                            "%-12.4g" % o[2],
                            "%-12.4g" % o[3],
                            "%-12.3g" % (MwUnique[o][0] / MwUnique[o][1]),
                            "%-12d" % MwUnique[o][1],
                        ]
                    )
                else:
                    # table+='''<tr><td>%4g</td><td>%4g</td><td>%4g</td><td>%4g</td><td>%s</td><td>(%5d)</td></tr>'''%(o[0], o[1], o[2], o[3], "-", MwUnique[o][1])
                    table.append(
                        [
                            "%-12.4g" % o[0],
                            "%-12.4g" % o[1],
                            "%-12.4g" % o[2],
                            "%-12.4g" % o[3],
                            "%-12s" % "-",
                            "%-12d" % MwUnique[o][1],
                        ]
                    )
            # table+='''</table><br>'''
            self.Qprint(table)
        if npoints > 0:
            total_error /= npoints
        else:
            total_error = 1e10
        if line == "":
            self.Qprint("<b>TOTAL ERROR</b>: %12.5g (%6d)<br>" % (total_error, npoints))
        return total_error
Пример #51
0
def draw_roc_auc(ax,clf,X,y,title):    
    # 分割训练集与测试集
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.5,
                                                        random_state=0)
    
    n_classes = y.shape[1]
    
    # 1对多分类器
    oneVsRestclassifier = OneVsRestClassifier(clf)
    y_score = oneVsRestclassifier.fit(X_train, y_train).predict_proba(X_test)
    
    # 计算ROC曲线和面积
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = ms.roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = ms.auc(fpr[i], tpr[i])
    # 计算 micro-average ROC曲线和面积
    fpr["micro"], tpr["micro"], _ = ms.roc_curve(y_test.ravel(), y_score.ravel())
    roc_auc["micro"] = ms.auc(fpr["micro"], tpr["micro"])
    
    lw = 2
    
    # 计算 macro-average ROC曲线和面积,按照类别依次进行计算指标,求平均值
    # 该方法不考虑类别的不均衡
    # 首先汇总全部的fpr
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
    # 插值计算所有的ROC曲线上的点
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])
    
    # 计算AUC 
    mean_tpr /= n_classes
    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = ms.auc(fpr["macro"], tpr["macro"])
    
    # 绘制ROC曲线
    ax.plot(fpr["micro"], tpr["micro"],
        label='micro-average ROC曲线 (area = {0:0.2f})' 
        ''.format(roc_auc["micro"]),
        color='deeppink', linestyle=':', linewidth=4)
    ax.plot(fpr["macro"], tpr["macro"],
             label='macro-average ROC曲线 (area = {0:0.2f})'
             ''.format(roc_auc["macro"]),
             color='navy', linestyle=':', linewidth=4)
    colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=lw,
                 label='类{0}的ROC曲线 (area = {1:0.2f})'
                 ''.format(i, roc_auc[i]))
    ax.plot([0, 1], [0, 1], 'k--', lw=lw)
    
    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title(title,fontproperties=myfont)
    ax.legend(loc="best",prop=myfont)
    plt.show()
    return
Пример #52
0
    def do_fit(self, line):
        """Minimize the error"""
        self.fitting = True
        start_time = time.time()
        # view = self.parent_dataset.parent_application.current_view
        self.Qprint("""<hr><h2>Parameter Fitting</h2>""")
        self.Mwset, self.Mw, self.Tdict = self.get_cases()
        # Case by case, T by T, we optimize the overlap of all files with the
        # corresponding cases at the selected temperature
        Tdesired = self.parameters["T"].value
        # print (self.Tdict)
        self.aT_vs_T = {}
        for case in self.Tdict.keys():
            self.Qprint(
                "<h3>Mw=%g Mw2=%g phi=%g phi2=%g</h3>"
                % (case[0], case[1], case[2], case[3])
            )
            Temps0 = [x[0] for x in self.Tdict[case]]
            Temps = np.abs(np.array([x[0] for x in self.Tdict[case]]) - Tdesired)
            Filenames = [x[2] for x in self.Tdict[case]]
            Files = [x[3] for x in self.Tdict[case]]
            indices = np.argsort(Temps)

            # first master curve is built from first file in indices list
            fname = Filenames[indices[0]]
            self.parent_dataset

            self.current_master_curve = np.array(
                Files[indices[0]].data_table.data, copy=True
            )
            self.current_master_curve.view("i8,i8,i8").sort(order=["f1"], axis=0)
            self.shiftParameters[fname] = (0.0, 0.0)

            # table='''<table border="1" width="100%">'''
            # table+='''<tr><th>T</th><th>log(Hshift)</th><th>log(Vshift)</th></tr>'''
            table = [
                ["%-12s" % "T", "%-12s" % "log(Hshift)", "%-12s" % "log(Vshift)"],
            ]
            # self.Qprint('%6s %11s %11s' % ('T', 'log(Hshift)', 'log(Vshift)'))
            indices = np.delete(indices, 0, None)

            for i in indices:
                XSHIFT = 0.0
                YSHIFT = 0.0
                if Temps[i] == 0:
                    # Add to current_master_curve
                    fname = Filenames[i]

                    tt = np.array(Files[i].data_table.data, copy=True)
                    self.current_master_curve = np.concatenate(
                        (self.current_master_curve, tt), axis=0
                    )
                    self.current_master_curve = self.current_master_curve[
                        self.current_master_curve[:, 0].argsort()
                    ]
                    self.shiftParameters[fname] = (XSHIFT, YSHIFT)

                else:
                    fname = Filenames[i]
                    tt = np.array(Files[i].data_table.data, copy=True)
                    # Calculate preliminary shift factors (horizontal and vertical)
                    if any(Files[i].isshifted):
                        initial_guess = [Files[i].xshift[0], Files[i].yshift[0]]
                    else:
                        # Calculate mid-point of tt
                        indmiddle = int(len(tt[:, 0]) / 2)
                        xmid = tt[indmiddle, 0]
                        ymid = tt[indmiddle, 1]
                        xmidinterp = interp(
                            ymid,
                            self.current_master_curve[:, 1],
                            self.current_master_curve[:, 0],
                        )
                        xshift = np.log10(xmidinterp / xmid)

                        # minimize shift factors so the overlap is maximum
                        initial_guess = [xshift]
                        if self.parameters["vert"].value:
                            initial_guess.append(0)

                    self.current_table = tt
                    self.current_file_min = fname
                    res = minimize(
                        self.func_fitTTS_one, initial_guess, method="Nelder-Mead"
                    )
                    if not res["success"]:
                        self.Qprint("Solution not found: %s" % res["message"])
                        return
                    XSHIFT = res.x[0]
                    if self.parameters["vert"].value:
                        YSHIFT = res.x[1]
                    else:
                        YSHIFT = 0.0

                    # Add to current_master_curve
                    # Set the theory file for that particular file
                    ttcopy = np.array(tt, copy=True)
                    ttcopy[:, 0] = ttcopy[:, 0] * np.power(10.0, XSHIFT)
                    ttcopy[:, 1] = ttcopy[:, 1] * np.power(10.0, YSHIFT)
                    ttcopy[:, 2] = ttcopy[:, 2] * np.power(10.0, YSHIFT)
                    self.current_master_curve = np.concatenate(
                        (self.current_master_curve, ttcopy), axis=0
                    )
                    self.current_master_curve = self.current_master_curve[
                        self.current_master_curve[:, 0].argsort()
                    ]
                    self.shiftParameters[fname] = (XSHIFT, YSHIFT)

            # Print final table of T and shift factors
            indTsorted = sorted(range(len(Temps0)), key=lambda k: Temps0[k])
            self.aT_vs_T[case[0]] = []  # for Arrhenius activaiton Energy
            for i in indTsorted:
                fname = Filenames[i]
                sparam = self.shiftParameters[fname]
                # table+='''<tr><td>%6.3g</td><td>%11.3g</td><td>%11.3g</td></tr>'''%(Temps0[i], sparam[0], sparam[1])
                table.append(
                    [
                        "%-12.3g" % Temps0[i],
                        "%-12.3g" % sparam[0],
                        "%-12.3g" % sparam[1],
                    ]
                )
                # self.Qprint('%6.3g %11.3g %11.3g' % (Temps0[i], sparam[0], sparam[1]))
                self.aT_vs_T[case[0]].append((sparam[0], Temps0[i]))
            self.Qprint(table)
        self.fitting = False
        self.do_calculate(line, timing=False)
        self.Qprint(
            """<i>---Fitted in %.3g seconds---</i><br>""" % (time.time() - start_time)
        )
Пример #53
0
b.insert(1, 1)
b.insert(2, 2)
b.insert(3, 3)
b.insert(4, 4)

df1 = pd.DataFrame(df1.iloc[:, b])

df1 = df1.interpolate()  #To interpolate

#Demonstrate working of extrapolation
x = [101, 102, 103, 104, 105, 106, 107, 109, 111, 113]
y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
f = interpolate.interp1d(x, y, fill_value='extrapolate')
xnew = [95, 96, 97, 98, 99, 100, 101, 114, 115, 117]
ynew = f(xnew)  # use interpolation function returned by `interp1d`
interp([114, 100, 110, 112, 108], x, y)
#Application of extrapolation
p = np.size(df1.iloc[0, :])
for i in range(5, p):
    n = df1.iloc[:, i].isnull().sum()
    if (n > 0 and n < 5642):
        check2 = interpolate.interp1d(df1.iloc[n:, 4],
                                      df1.iloc[n:, i],
                                      fill_value='extrapolate')
        check3 = check2(df1.iloc[:n, 4])
        df1.iloc[:n, i] = check3

#Visualize to detect relationship
df1.hist()  #shows frequency of the values of every feature
plt.show()
def plotSepta(ax):
    from scipy import interp
    #    positions = [511.95, 902.95, 1293.95, 1684.95 , 2075.95]
    #    Xshift = math.sin(LindaAngle) *312/2
    #    Yshift = math.cos(LindaAngle)*312/2

    #ZSs
    ZSangle = math.degrees(1.414490E-3)
    r1 = patches.Rectangle((355.96, 6.79),
                           313,
                           0.020,
                           angle=-ZSangle,
                           color=septaColor)  #, label = 'septa'
    r2 = patches.Rectangle((746.95, 6.241),
                           313,
                           0.020,
                           angle=-ZSangle,
                           color=septaColor)
    r3 = patches.Rectangle((1137.95, 5.688),
                           313,
                           0.020,
                           angle=-ZSangle,
                           color=septaColor)
    r4 = patches.Rectangle((1528.95, 5.1349),
                           313,
                           0.020,
                           angle=-ZSangle,
                           color=septaColor)
    r5 = patches.Rectangle((1919.95, 4.582),
                           313,
                           0.020,
                           angle=-ZSangle,
                           color=septaColor)
    ax.add_patch(r1)
    ax.add_patch(r2)
    ax.add_patch(r3)
    ax.add_patch(r4)
    ax.add_patch(r5)

    #TPST
    #TPST = [4792.27, 2.77]
    TPSTangle = math.degrees(0.533948362E-03)
    #    MADXoffset = 1665.4231
    length = 215  #214
    width1 = 0.46
    width2 = 0.52
    Zcorner1 = 4684.768
    Xcorner1 = 3.760495
    Zcorner2 = 4771.96852
    Xcorner2 = 3.77706
    #    r6 = patches.Rectangle((4685.26,3.94), 214 , 0.46, angle= TPSTangle, color = septaColor, alpha = 0.75)
    #    ax.add_patch(r6)

    r1 = patches.Rectangle((Zcorner1, Xcorner1),
                           length,
                           width1,
                           angle=TPSTangle,
                           label='septa',
                           color=septaColor)
    ax.add_patch(r1)
    r2 = patches.Rectangle((Zcorner2, Xcorner2),
                           length - (Zcorner2 - Zcorner1),
                           width2,
                           angle=TPSTangle,
                           color=septaColor)
    ax.add_patch(r2)

    #MST
    MADXoffset = 1665.4231
    Zs = [5093.02, 5416.42, 5739.82]
    width = 0.414
    length = 240

    point1 = []
    point2 = []

    #Defines the baselines
    point1.append([100 * (1715.1633 - MADXoffset), 0.1 * 39.41577])
    point2.append([100 * (1717.5433 - MADXoffset), 0.1 * 40.96990])

    point1.append([100 * (1718.3973 - MADXoffset), 0.1 * 41.52756])
    point2.append([100 * (1720.7773 - MADXoffset), 0.1 * 43.08169])

    point1.append([100 * (1721.6313 - MADXoffset), 0.1 * 43.63934])
    point2.append([100 * (1724.0113 - MADXoffset), 0.1 * 45.19347])

    MSTangle = math.degrees(0.65299570E-03)
    for i in range(len(Zs)):

        z = np.zeros(2)
        x = np.zeros(2)
        z[0] = point1[i][0]
        z[1] = point2[i][0]
        x[0] = point1[i][1]
        x[1] = point2[i][1]
        #        f = interpolate.interp1d(z, x)
        newZ = Zs[i] - math.cos(MSTangle) * length / 2
        newX = interp(newZ, z, x)
        r7 = patches.Rectangle((newZ, newX),
                               length,
                               width,
                               angle=MSTangle,
                               color=septaColor,
                               alpha=0.75)
        ax.add_patch(r7)

#    "MSTangle = math.degrees(0.65299570E-03)
#    r7 = patches.Rectangle((4973.016812,4.079), length,width, angle= MSTangle, color = septaColor,alpha = 0.75)
#    r8 = patches.Rectangle((5296.416982,4.240), length,width, angle= MSTangle, color = septaColor,alpha = 0.75)
#    r9 = patches.Rectangle((5619.816971,4.401), length,width, angle= MSTangle, color = septaColor,alpha = 0.75)
#    ax.add_patch(r7)
#    ax.add_patch(r8)
#    ax.add_patch(r9)

#MSE
    Zs = [6838.29, 7161.69, 7485.09, 7808.49, 8131.89]
    length = 241.32
    width = 1.72

    point1 = []
    point2 = []

    #Defines the baselines
    point1.append([100 * (1732.616 - MADXoffset), 0.1 * 48.26604])
    point2.append([100 * (1734.996 - MADXoffset), 0.1 * 47.46876])

    point1.append([100 * (1735.85 - MADXoffset), 0.1 * 47.89989])
    point2.append([100 * (1738.23 - MADXoffset), 0.1 * 50.14024])

    point1.append([100 * (1739.084 - MADXoffset), 0.1 * 51.30396])
    point2.append([100 * (1741.464 - MADXoffset), 0.1 * 56.57150])

    point1.append([100 * (1742.318 - MADXoffset), 0.1 * 59.18803])
    point2.append([100 * (1744.698 - MADXoffset), 0.1 * 67.48276])

    point1.append([100 * (1745.552 - MADXoffset), 0.1 * 70.83187])
    point2.append([100 * (1747.932 - MADXoffset), 0.1 * 82.16424])
    for i in range(len(Zs)):

        z = np.zeros(2)
        x = np.zeros(2)
        z[0] = point1[i][0]
        z[1] = point2[i][0]
        x[0] = point1[i][1]
        x[1] = point2[i][1]
        #
        MSEangle = math.atan((x[1] - x[0]) / (z[1] - z[0]))
        #        print MSEangle
        newZ = Zs[i] - math.cos(MSTangle) * length / 2
        newX = interp(newZ, z, x)
        r7 = patches.Rectangle((newZ, newX),
                               length,
                               width,
                               angle=math.degrees(MSEangle),
                               color=septaColor,
                               alpha=0.75)
        ax.add_patch(r7)
Пример #55
0
    def train_model(self):
        """
        train and estimate model
        draw ROC curves of train and test
        :return: train_y, train_pred_y, test_y, test_pred_y
        """
        train_sample_X, train_sample_y, test_X, test_y = self.get_data(
        )  # generate formed train and test data
        cv_data = self.cv.split(
            train_sample_X,
            train_sample_y)  # split train data to train and validation data

        tprs = []  # list for saving TP rates in each cv
        aucs = []  # list for saving aucs in each cv
        mean_fpr = np.linspace(0, 1, 100)  # mean FP rates
        fig, ax = plt.subplots()  # initialize plt
        for i, (train,
                valid) in enumerate(cv_data):  # 5 fold training of model_lr
            self.clf.fit(train_sample_X[train],
                         train_sample_y[train])  # fit model using train data
            # plot ROC
            viz = metrics.plot_roc_curve(self.clf,
                                         train_sample_X[valid],
                                         train_sample_y[valid],
                                         name='ROC fold {}'.format(i),
                                         alpha=0.3,
                                         lw=1,
                                         ax=ax)
            interp_tpr = interp(mean_fpr, viz.fpr,
                                viz.tpr)  # get TP rates and do interp
            interp_tpr[0] = 0.0
            tprs.append(interp_tpr)  # add new interp_tpr to trprs list
            aucs.append(viz.roc_auc)  # add viz.roc_auc to aucs list
        # plot ROC of test data
        metrics.plot_roc_curve(self.clf,
                               test_X,
                               test_y,
                               name='ROC test',
                               alpha=0.8,
                               lw=1,
                               color='green',
                               ax=ax)
        ax.plot([0, 1], [0, 1],
                linestyle='--',
                lw=2,
                color='r',
                label='Chance',
                alpha=.8)
        # draw mean auc of 5 cv train
        mean_tpr = np.mean(tprs, axis=0)
        mean_tpr[-1] = 1.0
        mean_auc = metrics.auc(mean_fpr, mean_tpr)
        std_auc = np.std(aucs)
        ax.plot(mean_fpr,
                mean_tpr,
                color='b',
                label=r'Mean ROC of Train (AUC = %0.2f $\pm$ %0.2f)' %
                (mean_auc, std_auc),
                lw=2,
                alpha=.8)
        # draw confident interval
        std_tpr = np.std(tprs, axis=0)
        tprs_upper = np.minimum(mean_tpr + std_tpr, 1)  # get upper bound
        tprs_lower = np.maximum(mean_tpr - std_tpr, 0)  # get lower bound
        ax.fill_between(mean_fpr,
                        tprs_lower,
                        tprs_upper,
                        color='grey',
                        alpha=.2,
                        label=r'$\pm$ 1 std. dev.')
        ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05], title="ROC Curve")
        ax.legend(loc="lower right")
        plt.savefig('res_fig_knn/' + self.model_name + '.png')
        print(
            r'5 Cross Validation Mean AUC: %0.2f, Standard Deviation is %0.2f'
            % (mean_auc, std_auc))
        # train with all train data and compute the train and test accuracy respectively
        start = datetime.datetime.now()  # record start time
        self.clf.fit(train_sample_X, train_sample_y)  # fit all train data
        test_pred_y = self.clf.predict(test_X)  # predict test data
        end = datetime.datetime.now()  # record end time
        print('Fit Time:')  # calculate time cost
        print(end - start)
        dump(self.clf,
             'model_knn/' + self.model_name + '.joblib')  # save trained model
        train_acc = self.clf.score(self.train_X,
                                   self.train_y)  # calculate train accuracy
        test_acc = self.clf.score(test_X, test_y)  # calculate test accuracy
        print('Train Accuracy is %0.2f, Test Accuracy is %0.2f' %
              (train_acc, test_acc))
        train_pred_y = self.clf.predict(self.train_X)  # train data prediction
        return self.train_y, train_pred_y, test_y, test_pred_y
        __print(model_name +
                'all_check_test: {}'.format(test_negative_samples +
                                            test_positive_samples))

        trained_model = get_model(model_i)

        trained_model.fit(X_train, y_train)
        """ ROC """
        viz = plot_roc_curve(trained_model,
                             X_test,
                             y_test,
                             name='ROC fold {}'.format(k_iteration),
                             alpha=0.3,
                             lw=1,
                             ax=ax)
        interp_tpr = interp(mean_fpr, viz.fpr, viz.tpr)
        interp_tpr[0] = 0.0
        tprs.append(interp_tpr)
        aucs.append(viz.roc_auc)

        y_pred_train = trained_model.predict(X_train)
        y_pred_test = trained_model.predict(X_test)
        accuracy_train = accuracy_score(y_train, y_pred_train)
        accuracy_test = accuracy_score(y_test, y_pred_test)
        TN, FP, FN, TP = confusion_matrix(y_test, y_pred_test).ravel()

        __print(model_name + 'train_acc: {}'.format(accuracy_train))
        __print(model_name + 'test_acc: {}'.format(accuracy_test))
        __print(model_name + 'TN:{}'.format(TN))
        __print(model_name + 'FP:{}'.format(FP))
        __print(model_name + 'FN:{}'.format(FN))
Пример #57
0
def plot_roc_curve(testY, y_prob, label_to_class):
    lw = 2
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(label_to_class):
        fpr[i], tpr[i], _ = roc_curve(testY[:, i], y_prob[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(testY.ravel(), y_prob.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    all_fpr = np.unique(np.concatenate([fpr[i]
                                        for i in range(label_to_class)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(label_to_class):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])


# Finally average it and compute AUC
    mean_tpr /= label_to_class

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    # Plot all ROC curves
    plt.figure()
    plt.plot(fpr["micro"],
             tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
             ''.format(roc_auc["micro"]),
             color='deeppink',
             linestyle=':',
             linewidth=4)

    # plt.plot(fpr["macro"], tpr["macro"],
    #label='macro-average ROC curve (area = {0:0.2f})'
    #     ''.format(roc_auc["macro"]),
    #color='navy', linestyle=':', linewidth=4)

    colors = cycle([
        'yellowgreen', 'darkorange', 'cornflowerblue', 'green', 'red', 'brown',
        'gold'
    ])
    labels = mlb.classes_
    for (i, color, label) in zip(range(label_to_class), colors, labels):
        plt.plot(fpr[i],
                 tpr[i],
                 color=color,
                 lw=lw,
                 label='ROC curve of class {0} (area = {1:0.2f})'
                 ''.format(label, roc_auc[i]))

    plt.plot([0, 1], [0, 1], 'k--', lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    #plt.title('Roc Curve for each class')
    plt.legend(loc="lower right")
    plt.grid()
    plt.savefig('roc_smallvgg_2k.png')
    return plt
Пример #58
0
def AUCplot(spec_model, over_model, spec_xtest, over_xtest, yspec, yover, spec_rocs, over_rocs, spec_cals, over_cals, k, K):

	# Gather Probabilities from Models
	spec_model.eval() # This turns off BatchNorm and Dropout, as will be done at deployment
	over_model.eval() # This turns off BatchNorm and Dropout, as will be done at deployment
	probspec, _ = spec_model(spec_xtest)
	probover, _ = over_model(over_xtest)
	spec_model.train() #This turns BatchNorm and Dropout back on
	over_model.train() #This turns BatchNorm and Dropout back on

	# Generate ROC Curves
	fpr_spec, tpr_spec, thresholds_spec = roc_curve(yspec.data.cpu().numpy(), probspec.data.cpu().numpy())
	fpr_over, tpr_over, thresholds_over = roc_curve(yover.data.cpu().numpy(), probover.data.cpu().numpy())
	spec_rocs.append([fpr_spec, tpr_spec])
	over_rocs.append([fpr_over, tpr_over])

	# Generate Calibration Curves
	N = 5
	strategy = 'uniform'
	# These 4 lines are required to create the K calibration curves and find the mean w/ the +/-1 std region
	#spec_cal  = calibration_curve(yspec.data.cpu().numpy(), probspec.data.cpu().numpy(), n_bins=N, strategy=strategy)
	#over_cal  = calibration_curve(yover.data.cpu().numpy(), probover.data.cpu().numpy(), n_bins=N, strategy=strategy)
	#spec_cals.append(spec_cal)
	#over_cals.append(over_cal)
	# These 2 lines are required to create the single calibration curve comprised from all datapoints
	spec_cals.append([yspec.data.cpu().numpy(), probspec.data.cpu().numpy()])
	over_cals.append([yover.data.cpu().numpy(), probover.data.cpu().numpy()])

	# Compute AUC Scores
	AUCspec = roc_auc_score(yspec.data.cpu().numpy(), probspec.data.cpu().numpy())
	AUCover = roc_auc_score(yover.data.cpu().numpy(), probover.data.cpu().numpy())


	# Plot k-th Model Performance (ROC)
	plt.figure(0)
	plt.clf()
	plt.plot(fpr_spec, tpr_spec, label = f'Specific = {AUCspec:.2f}')#, alpha = 0.2)
	plt.plot(fpr_over, tpr_over, label =  f'Overall = {AUCover:.2f}')#, alpha = 0.2)
	if k == 0:
		plt.plot([0,1],[0,1],'k-', alpha = 0.2)
	plt.grid(alpha=0.2)
	plt.xlim(0,1)
	plt.ylim(0,1)
	plt.xlabel('False Positive Rate')
	plt.ylabel('True Positive Rate')
	plt.legend(title='AUC')
	plt.savefig(f'ROCs/ROC_{k}.png')


	if (k+1) == K:
		# Plot Mean Model Performance (ROC)
		mean_fpr = np.linspace(0,1,10000)

		mean_spec_roc = []

		for ROC in spec_rocs:
			 mean_spec_tpr = interp(mean_fpr, ROC[0], ROC[1])
			 mean_spec_tpr[0] = 0.0
			 mean_spec_roc.append(mean_spec_tpr)

		mean_spec_roc = np.array(mean_spec_roc)

		std_tpr = np.std(mean_spec_roc, axis=0)
		mean_spec_roc = np.mean(mean_spec_roc, axis=0)
		mean_spec_roc[-1] = 1.0
		spec_top = np.minimum(mean_spec_roc + std_tpr, 1)
		spec_bot = np.maximum(mean_spec_roc - std_tpr, 0)
		#pdb.set_trace()
		#pdb.set_trace()
		
		spec_mean_auc = trapz(mean_spec_roc, mean_fpr)
		aucs = [trapz(ROC[1], ROC[0]) for ROC in spec_rocs]
		spec_std_auc = np.std(aucs)

		spec_fig, spec_ax = plt.subplots()
		

		spec_ax.plot(mean_fpr, mean_spec_roc, color ='C0', label=f'Mean ROC, AUC = {spec_mean_auc:.2f} $\pm$ {spec_std_auc:.2f})')
		for ROC in spec_rocs:
			spec_ax.plot(ROC[0], ROC[1], alpha=0.2)
		spec_ax.fill_between(mean_fpr, spec_bot, spec_top, color='C0', alpha=.1, label=f'$\pm$ 1 std')
		spec_ax.grid(alpha=0.2)
		spec_ax.legend()
		spec_ax.set_xlabel('False Positive Rate')
		spec_ax.set_ylabel('True Positive Rate')
		spec_ax.title.set_text("Disease Specific Survival Model")
		plt.savefig('DSS_Mean_ROC.png', dpi = 230)



		mean_over_roc = []

		for ROC in over_rocs:
			 mean_over_tpr = interp(mean_fpr, ROC[0], ROC[1])
			 mean_over_tpr[0] = 0.0
			 mean_over_roc.append(mean_over_tpr)

		mean_over_roc = np.array(mean_over_roc)
		std_tpr = np.std(mean_over_roc, axis=0)
		mean_over_roc = np.mean(mean_over_roc, axis=0)
		mean_over_roc[-1] = 1.0
		over_top = np.minimum(mean_over_roc + std_tpr, 1) 
		over_bot = np.maximum(mean_over_roc - std_tpr, 0) 

		
		over_mean_auc = trapz(mean_over_roc, mean_fpr)
		aucs = [trapz(ROC[1], ROC[0]) for ROC in spec_rocs]
		over_std_auc = np.std(aucs)

		over_fig, over_ax = plt.subplots()
		over_ax.plot(mean_fpr, mean_over_roc, color ='C0', label=f'Mean ROC, AUC = {over_mean_auc:.2f} $\pm$ {over_std_auc:.2f})')
		for ROC in over_rocs:
			over_ax.plot(ROC[0], ROC[1], alpha=0.2)
		over_ax.fill_between(mean_fpr, over_bot, over_top, color='C0', alpha=.1, label=f'$\pm$ 1 std')
		over_ax.grid(alpha=0.2)
		over_ax.legend()
		over_ax.set_xlabel('False Positive Rate')
		over_ax.set_ylabel('True Positive Rate')
		over_ax.title.set_text("Overall Survival Model")
		plt.savefig('Overall_Mean_ROC.png', dpi = 230)

		both_fig, both_ax = plt.subplots()
		both_ax.plot(mean_fpr, mean_spec_roc, color ='C0', label=f'DSS AUC = {spec_mean_auc:.2f} $\pm$ {spec_std_auc:.2f})')
		both_ax.fill_between(mean_fpr, spec_bot, spec_top, color='C0', alpha=.1)#, label=f'$\pm$ 1 DSS std')
		both_ax.plot(mean_fpr, mean_over_roc, color ='C1', label=f'OS AUC = {over_mean_auc:.2f} $\pm$ {over_std_auc:.2f})')
		both_ax.fill_between(mean_fpr, over_bot, over_top, color='C1', alpha=.1)# label=f'$\pm$ 1 OVR std')
		both_ax.grid(alpha=0.2)
		both_ax.legend(loc=4)
		both_ax.set_xlabel('False Positive Rate')
		both_ax.set_ylabel('True Positive Rate')
		both_ax.title.set_text("Receiver Operating Characteristic Curve")
		plt.savefig('Both_Mean_ROC.png', dpi = 230)




		# Calibration Plot
		both_calfig, both_calax = plt.subplots()

		'''
		# This creates K calibration plots, and computes the mean (interpolated) with a +/- 1 std region

		both_calfig, both_calax = plt.subplots()
		mean_spec_cal = []
		mean_over_cal = []
		for CAL in spec_cals:
			 interp_spec_cal = interp(mean_fpr, CAL[1], CAL[0])
			 mean_spec_cal.append(interp_spec_cal)

		std_spec_cal = np.std(np.array(mean_spec_cal), axis=0)
		mean_spec_cal = np.mean(np.array(mean_spec_cal), axis=0)
		spec_top = np.minimum(mean_spec_cal + std_spec_cal, 1)
		spec_bot = np.maximum(mean_spec_cal - std_spec_cal, 0)
		spec_mbs = [np.polyfit(CAL[1], CAL[0], 1) for CAL in spec_cals]
		spec_ms = [m[0] for m in spec_mbs]
		spec_bs = [b[1] for b in spec_mbs]
		spec_m_std = np.std(spec_ms)
		spec_b_std = np.std(spec_bs)
		spec_m_mean = np.mean(spec_ms)
		spec_b_mean = np.mean(spec_bs)

		both_calax.plot(mean_fpr, mean_spec_cal, label = 'Specific', color = 'C0')
		both_calax.fill_between(mean_fpr, spec_bot, spec_top, color='C0', alpha=.1)#, label=f'$\pm$ 1 DSS std')

		for CAL in over_cals:
			 interp_over_cal = interp(mean_fpr, CAL[1], CAL[0])
			 mean_over_cal.append(interp_over_cal)

		std_over_cal = np.std(np.array(mean_over_cal), axis=0)
		mean_over_cal = np.mean(np.array(mean_over_cal), axis=0)
		over_top = np.minimum(mean_over_cal + std_over_cal, 1)
		over_bot = np.maximum(mean_over_cal - std_over_cal, 0)
		over_mbs = [np.polyfit(CAL[1], CAL[0], 1) for CAL in over_cals]
		over_ms = [m[0] for m in over_mbs]
		over_bs = [b[1] for b in over_mbs]
		over_m_std = np.std(over_ms)
		over_b_std = np.std(over_bs)
		over_m_mean = np.mean(over_ms)
		over_b_mean = np.mean(over_bs)

		print(np.abs(mean_over_cal-mean_spec_cal).mean())

		both_calax.plot(mean_fpr, mean_over_cal, label = 'Overall', color = 'C1')
		both_calax.fill_between(mean_fpr, over_bot, over_top, color='C1', alpha=.1)#, label=f'$\pm$ 1 DSS std')
		both_calax.plot([0, 1], [0, 1], 'r--')
		both_calax.grid(alpha=0.2)
		both_calax.legend(loc=4)
		both_calax.set_xlabel('Predicted Probability')
		both_calax.set_ylabel('Observed Population')
		both_calax.title.set_text("Calibration Curve")
		plt.savefig("BothCalibration.png", dpi=230)
		'''

		#pdb.set_trace()
		y_spec = np.concatenate([cal[0] for cal in spec_cals])
		p_spec = np.concatenate([cal[1] for cal in spec_cals])
		y_over = np.concatenate([cal[0] for cal in over_cals])
		p_over = np.concatenate([cal[1] for cal in over_cals])

		
		spec = calibration_curve(y_spec, p_spec, n_bins=N, strategy=strategy)
		over = calibration_curve(y_over, p_over, n_bins=N, strategy=strategy)
		spec_mb = np.polyfit(spec[1], spec[0], 1)
		over_mb = np.polyfit(over[1], over[0], 1)

		both_calax.plot(spec[1], spec[0], label=f'DSS, m = {spec_mb[0]:.2f}, b = {spec_mb[1]:.2f}', color='C0')
		both_calax.plot(over[1], over[0], label=f'OS, m = {over_mb[0]:.2f}, b = {over_mb[1]:.2f}', color='C1')
		both_calax.plot([0, 1], [0, 1], 'r--')
		both_calax.grid(alpha=0.2)
		both_calax.legend(loc=4)
		both_calax.set_xlabel('Predicted Probability')
		both_calax.set_ylabel('Observed Population')
		both_calax.title.set_text("Calibration Curve")
		plt.savefig("BothCalibration.png", dpi=230)






	print(f'\n\nGenerated Specific vs Overall ROC plot for test set #{k} using lastest models in ./ROCs/\n\n')

	return spec_rocs, over_rocs, spec_cals, over_cals
pipelines = [[
    '{}-{}'.format(samplers[0][0], classifier[0]),
    make_pipeline(samplers[0][1], classifier[1])
] for classifier in classifiers]

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

for name, pipeline in pipelines:
    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    for train, test in cv.split(X, y):
        probas_ = pipeline.fit(X[train], y[train]).predict_proba(X[test])
        fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        roc_auc = auc(fpr, tpr)

    mean_tpr /= cv.get_n_splits(X, y)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    plt.plot(mean_fpr,
             mean_tpr,
             linestyle='--',
             label='{} (area = %0.2f)'.format(name) % mean_auc,
             lw=LW)

plt.plot([0, 1], [0, 1], linestyle='--', lw=LW, color='k', label='Luck')

# make nice plotting
Пример #60
0
def roc_curve_multiclass(y_true, y_pred, n_classes):
    from scipy import interp
    import matplotlib.pyplot as plt
    from itertools import cycle
    from sklearn.metrics import roc_curve, auc

    # Plot linewidth
    lw = 2
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_pred[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_true.ravel(), y_pred.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # Compute macro-average ROC curve and ROC area
    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])

    # Finally average it and compute AUC
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

    # Plot all ROC curves
    plt.figure(1)
    plt.plot(fpr["micro"],
             tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
             ''.format(roc_auc["micro"]),
             color='deeppink',
             linestyle=':',
             linewidth=4)

    plt.plot(fpr["macro"],
             tpr["macro"],
             label='macro-average ROC curve (area = {0:0.2f})'
             ''.format(roc_auc["macro"]),
             color='navy',
             linestyle=':',
             linewidth=4)

    colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i],
                 tpr[i],
                 color=color,
                 lw=lw,
                 label='ROC curve of class {0} (area = {1:0.2f})'
                 ''.format(i, roc_auc[i]))

    plt.plot([0, 1], [0, 1], 'k--', lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(
        'Some extension of Receiver operating characteristic to multi-class')
    plt.legend(loc="lower right")
    plt.show()