def w_integral(s,W_ax,Y_ax): """ The integral part of the Bellman-equation. """ function = lambda theta,eps,s: max(interp(s*(theta+eps),W_ax,Y_ax), \ interp(s*(theta+eps)-COST,W_ax,v_func))*aggPDF(theta)*idiPDF(eps) return dblquad(function,MIN_VAL_E,MAX_VAL_E,lambda x: MIN_VAL_AG, lambda x: MAX_VAL_AG, args=(s,))
def __call__(self,x): if issubclass(type(x),list): y=scipy.interp(x,self['x'],self['y']); else: y=scipy.interp([x],self['x'],self['y']); y=y[0]; return y;
def e_r_read(self,waves): p = open('MaterialsDataFiles/'+self.material + '.txt','r') string=p.read() p.close() e_r=[] w=[] n=[] k=[] linecounter=0 for thisline in string.split('\n'): for x in thisline.split(): if linecounter==0 and len(x)>0: w.append(float(x)) if linecounter==1 and len(x)>0: n.append(float(x)) if linecounter==2 and len(x)>0: k.append(float(x)) linecounter+=1 # interpolate n&k walues n_new=sp.interp(waves,w,n) k_new=sp.interp(waves,w,k) e_r_new=[] # calculate epsilon from n&k for every wavelength for i in range(len(waves)): e_r_new.append(n_new[i]**2-k_new[i]**2+2j*n_new[i]*k_new[i]) # calculate the complex epsilon from n&k e_r.append(waves) e_r.append(e_r_new) self.permittivity=e_r
def e_r_read(self, waves): p = open("MaterialsDataFiles/" + self.material + ".txt", "r") string = p.read() p.close() e_r = [] w = [] n = [] k = [] linecounter = 0 for thisline in string.split("\n"): for x in thisline.split(): if linecounter == 0 and len(x) > 0: w.append(float(x)) if linecounter == 1 and len(x) > 0: n.append(float(x)) if linecounter == 2 and len(x) > 0: k.append(float(x)) linecounter += 1 # interpolate n&k walues n_new = sp.interp(waves, w, n) k_new = sp.interp(waves, w, k) e_r_new = [] # calculate epsilon from n&k for every wavelength for i in range(len(waves)): e_r_new.append( n_new[i] ** 2 - k_new[i] ** 2 + 2j * n_new[i] * k_new[i] ) # calculate the complex epsilon from n&k # not sure anymore why e_r (permittivity) gets appended the waves and e_r_new (complex epsilon) e_r.append(waves) e_r.append(e_r_new) self.permittivity = e_r
def plot_allkfolds_ROC(timestamp, cv, fpr_arr, tpr_arr): sns.set(style="white", palette="muted", color_codes=True) mean_tpr = 0.0 mean_fpr = 0.0 all_roc_auc = [] bins_roc = np.linspace(0, 1, 300) with plt.style.context(('seaborn-muted')): fig, ax = plt.subplots(figsize=(10, 8)) for i, (train, test) in enumerate(cv): mean_tpr += interp(bins_roc, fpr_arr[i], tpr_arr[i]) mean_tpr[0] = 0.0 mean_fpr += interp(bins_roc, fpr_arr[i], tpr_arr[i]) mean_fpr[0] = 0.0 roc_auc = metrics.auc(fpr_arr[i], tpr_arr[i]) all_roc_auc.append(roc_auc) ax.plot(fpr_arr[i], tpr_arr[i], lw=1, label='KFold %d (AUC = %0.2f)' % (i, roc_auc)) ax.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Random') mean_tpr /= len(cv) mean_tpr[-1] = 1.0 mean_auc = np.mean(all_roc_auc) ax.plot(bins_roc, mean_tpr, 'k--', label='Mean ROC (AUC = %0.2f)' % mean_auc, lw=2) ax.set_xlim([-0.05, 1.05]) ax.set_ylim([-0.05, 1.05]) ax.set_xlabel('False Positive Rate') ax.set_ylabel('True Positive Rate') ax.set_title('Receiver Operating Characteristic') ax.legend(loc="lower right") plt.savefig('{}_roc.png'.format(timestamp)) plt.close('all') return mean_auc
def calc_mean(self, intensity): """ Calculate mean loss ratio and sigma based on the specified points on the curve: | | + | + Mean loss ratio | + | + | + | + | + | + | + | + | + +----------------------------------- Intensity measure level For a given intensity, mean loss and sigma is determined by linearly interpolating the points on the curve. Note that sigma is calculated as cv * mean loss as cv = mean loss/sigma """ mean_loss = interp(intensity, self.intensity_measure_level, self.mean_loss) cv = interp(intensity, self.intensity_measure_level, self.coefficient_of_variation) # cv = sigma / mean sigma = cv * mean_loss return (mean_loss, sigma)
def test_calc_mean(self): vulnerability_set = self.from_xml() intensity_level = 9 expected_w1timbermetal_mean = interp(intensity_level, [8.99, 9.05], [0.13, 0.15]) expected_w1timbermetal_sigma = expected_w1timbermetal_mean * 0.3 (w1timbermetal_mean, w1timbermetal_sigma) = vulnerability_set.calc_mean("W1TIMBERMETAL", intensity_level) self.assertEqual( expected_w1timbermetal_mean, w1timbermetal_mean, self.msg % (expected_w1timbermetal_mean, w1timbermetal_mean), ) self.assertEqual( expected_w1timbermetal_sigma, w1timbermetal_sigma, self.msg % (expected_w1timbermetal_sigma, w1timbermetal_sigma), ) expected_w1bvmetal_mean = interp(intensity_level, [8.99, 9.05], [0.84, 0.85]) expected_w1bvmetal_sigma = expected_w1bvmetal_mean * 0.3 (w1bvmetal_mean, w1bvmetal_sigma) = vulnerability_set.calc_mean("W1BVMETAL", intensity_level) self.assertEqual(expected_w1bvmetal_mean, w1bvmetal_mean, self.msg % (expected_w1bvmetal_mean, w1bvmetal_mean)) self.assertEqual( expected_w1bvmetal_sigma, w1bvmetal_sigma, self.msg % (expected_w1bvmetal_sigma, w1bvmetal_sigma) )
def __call__(self, z): """Parameters: z is a number, sequence or array. This method makes an instance f of LinInterp callable, so f(z) returns the interpolation value(s) at z. """ if isinstance(z, int) or isinstance(z, float): return interp ([z], self.X, self.Y)[0] else: return interp(z, self.X, self.Y)
def interpolate_data(data, interpolation_timestep_ms): ''' Interpolates all axes of the data array such that samples occur at equidistant timestamps. ''' samples_count = len(data[:,TIME_AXIS_INDEX]) timestamps = np.arange(0.0, data[samples_count-1,TIME_AXIS_INDEX], interpolation_timestep_ms, dtype=np.float64) interx = scipy.interp(timestamps, data[:,0], data[:,X_AXIS_INDEX]) intery = scipy.interp(timestamps, data[:,0], data[:,Y_AXIS_INDEX]) interz = scipy.interp(timestamps, data[:,0], data[:,Z_AXIS_INDEX]) return np.array([timestamps, interx, intery, interz]).transpose()
def get_water_level(costs_all,link,proba,climat): '''extracts water levels. the 'proba' argument is how much frequency is increased because of climate change or el nino''' water = DataFrame(columns=['return_period','water_level','proba']) for RP in [5,10,25,50,100,250,500,1000]: col = "{}_RP{} (dm)".format(climat,RP) water.loc[len(water),:]=[RP/proba,costs_all.ix[(costs_all.scenarioID==str(link))&(costs_all.partial_or_full=="full")&(costs_all.improved_2nd==0),col].values[0],proba] inter = water.copy() #s = InterpolatedUnivariateSpline(water['return period'], water['water level'],k=1) water.loc[len(water),:] = [500,interp([500],inter['return_period'].astype(float), inter['water_level'].astype(float))[0],proba] water.loc[len(water),:] = [1000,interp([1000],inter['return_period'].astype(float), inter['water_level'].astype(float))[0],proba] return water
def ROC(scores): # Generate an ROC curve for each fold, ordered by increasing threshold roc = scores.groupby('user').apply(lambda x: pd.DataFrame(np.c_[roc_curve(x['genuine'], x['score'])][::-1], columns=['far', 'frr', 'threshold'])) # interpolate to get the same threshold values in each fold thresholds = np.sort(roc['threshold'].unique()) roc = roc.groupby(level='user').apply(lambda x: pd.DataFrame(np.c_[thresholds, interp(thresholds, x['threshold'], x['far']), interp(thresholds, x['threshold'], x['frr'])], columns=['threshold', 'far', 'frr'])) roc = roc.reset_index(level=1, drop=True).reset_index() return roc
def coordinate(self): """ Prediction position of the target at current_time @rtype: SkyCoord """ ra = scipy.interp(self.current_time.jd, self.ephemeris['Time'].jd, self.ephemeris['R.A._(ICRF/J2000.0)']) * units.degree dec = scipy.interp(self.current_time.jd, self.ephemeris['Time'].jd, self.ephemeris['DEC_(ICRF/J2000.0)']) * units.degree return SkyCoord(ra, dec)
def tophatfold(lam, flux, FWHM=0.035): lammin=min(lam) lammax=max(lam) dlambda=FWHM/17. interlam=np.arange(lammin,lammax,dlambda) interflux=interp(interlam,lam,flux) #convovle flux array with gaussian--use smooth fold=sp.ndimage.filters.uniform_filter(interflux,size=17) #interpolate back to original grid fluxfold=interp(lam,interlam,fold) return fluxfold
def execute(self): #print 'in CPCT_Interpolate' wind_speed_ax = np.cos(self.yaw*np.pi/180.0)**(self.pP/3.0)*self.wind_speed_hub # use interpolation on precalculated CP-CT curve wind_speed_ax = np.maximum(wind_speed_ax, self.windSpeedToCPCT.wind_speed[0]) wind_speed_ax = np.minimum(wind_speed_ax, self.windSpeedToCPCT.wind_speed[-1]) self.CP = interp(wind_speed_ax, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CP) self.CT = interp(wind_speed_ax, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CT) # normalize on incoming wind speed to correct coefficients for yaw self.CP = self.CP * np.cos(self.yaw*np.pi/180.0)**self.pP self.CT = self.CT * np.cos(self.yaw*np.pi/180.0)**2
def populate_wing_sections(avl_wing,suave_wing): symm = avl_wing.symmetric sweep = avl_wing.sweep dihedral = avl_wing.dihedral span = suave_wing.spans.projected semispan = suave_wing.spans.projected * 0.5 * (2 - symm) origin = suave_wing.origin root_section = Section() root_section.tag = 'root_section' root_section.origin = origin root_section.chord = suave_wing.chords.root root_section.twist = suave_wing.twists.root tip_section = Section() tip_section.tag = 'tip_section' tip_section.chord = suave_wing.chords.tip tip_section.twist = suave_wing.twists.tip tip_section.origin = [origin[0]+semispan*np.tan(sweep),origin[1]+semispan,origin[2]+semispan*np.tan(dihedral)] if avl_wing.vertical: temp = tip_section.origin[2] tip_section.origin[2] = tip_section.origin[1] tip_section.origin[1] = temp avl_wing.append_section(root_section) avl_wing.append_section(tip_section) if suave_wing.control_surfaces: for ctrl in suave_wing.control_surfaces: num = 1 for section in ctrl.sections: semispan_fraction = (span/semispan) * section.origins.span_fraction s = Section() s.chord = scipy.interp(semispan_fraction,[0.,1.],[root_section.chord,tip_section.chord]) s.tag = '{0}_section{1}'.format(ctrl.tag,num) s.origin = section.origins.dimensional s.origin[0] = s.origin[0] - s.chord*section.origins.chord_fraction s.twist = scipy.interp(semispan_fraction,[0.,1.],[root_section.twist,tip_section.twist]) c = Control_Surface() c.tag = ctrl.tag c.x_hinge = 1. - section.chord_fraction c.sign_duplicate = ctrl.deflection_symmetry s.append_control_surface(c) avl_wing.append_section(s) num += 1 return avl_wing
def provideJ(self): #print 'in CPCT_Interpolate - provideJ' # standard central differencing # set step size for finite differencing h = 1e-6 # calculate upper and lower function values wind_speed_ax_high_yaw = np.cos((self.yaw+h)*np.pi/180.0)**(self.pP/3.0)*self.wind_speed_hub wind_speed_ax_low_yaw = np.cos((self.yaw-h)*np.pi/180.0)**(self.pP/3.0)*self.wind_speed_hub wind_speed_ax_high_wind = np.cos(self.yaw*np.pi/180.0)**(self.pP/3.0)*(self.wind_speed_hub+h) wind_speed_ax_low_wind = np.cos(self.yaw*np.pi/180.0)**(self.pP/3.0)*(self.wind_speed_hub-h) # use interpolation on precalculated CP-CT curve wind_speed_ax_high_yaw = np.maximum(wind_speed_ax_high_yaw, self.windSpeedToCPCT.wind_speed[0]) wind_speed_ax_low_yaw = np.maximum(wind_speed_ax_low_yaw, self.windSpeedToCPCT.wind_speed[0]) wind_speed_ax_high_wind = np.maximum(wind_speed_ax_high_wind, self.windSpeedToCPCT.wind_speed[0]) wind_speed_ax_low_wind = np.maximum(wind_speed_ax_low_wind, self.windSpeedToCPCT.wind_speed[0]) wind_speed_ax_high_yaw = np.minimum(wind_speed_ax_high_yaw, self.windSpeedToCPCT.wind_speed[-1]) wind_speed_ax_low_yaw = np.minimum(wind_speed_ax_low_yaw, self.windSpeedToCPCT.wind_speed[-1]) wind_speed_ax_high_wind = np.minimum(wind_speed_ax_high_wind, self.windSpeedToCPCT.wind_speed[-1]) wind_speed_ax_low_wind = np.minimum(wind_speed_ax_low_wind, self.windSpeedToCPCT.wind_speed[-1]) CP_high_yaw = interp(wind_speed_ax_high_yaw, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CP) CP_low_yaw = interp(wind_speed_ax_low_yaw, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CP) CP_high_wind = interp(wind_speed_ax_high_wind, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CP) CP_low_wind = interp(wind_speed_ax_low_wind, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CP) CT_high_yaw = interp(wind_speed_ax_high_yaw, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CT) CT_low_yaw = interp(wind_speed_ax_low_yaw, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CT) CT_high_wind = interp(wind_speed_ax_high_wind, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CT) CT_low_wind = interp(wind_speed_ax_low_wind, self.windSpeedToCPCT.wind_speed, self.windSpeedToCPCT.CT) # normalize on incoming wind speed to correct coefficients for yaw CP_high_yaw = CP_high_yaw * np.cos((self.yaw+h)*np.pi/180.0)**self.pP CP_low_yaw = CP_low_yaw * np.cos((self.yaw-h)*np.pi/180.0)**self.pP CP_high_wind = CP_high_wind * np.cos((self.yaw)*np.pi/180.0)**self.pP CP_low_wind = CP_low_wind * np.cos((self.yaw)*np.pi/180.0)**self.pP CT_high_yaw = CT_high_yaw * np.cos((self.yaw+h)*np.pi/180.0)**2 CT_low_yaw = CT_low_yaw * np.cos((self.yaw-h)*np.pi/180.0)**2 CT_high_wind = CT_high_wind * np.cos((self.yaw)*np.pi/180.0)**2 CT_low_wind = CT_low_wind * np.cos((self.yaw)*np.pi/180.0)**2 # compute derivative via central differencing and arrange in sub-matrices of the Jacobian dCP_dyaw = np.eye(self.nTurbines)*(CP_high_yaw-CP_low_yaw)/(2.0*h) dCP_dwind = np.eye(self.nTurbines)*(CP_high_wind-CP_low_wind)/(2.0*h) dCT_dyaw = np.eye(self.nTurbines)*(CT_high_yaw-CT_low_yaw)/(2.0*h) dCT_dwind = np.eye(self.nTurbines)*(CT_high_wind-CT_low_wind)/(2.0*h) # compile full Jacobian from sub-matrices dCP = np.hstack((dCP_dyaw, dCP_dwind)) dCT = np.hstack((dCT_dyaw, dCT_dwind)) J = np.vstack((dCP, dCT)) return J
def _read_iop_from_file(self, file_name): """ Generic IOP reader that interpolates the iop to the common wavelengths defined in the constructor returns: interpolated iop """ lg.info('Reading :: ' + file_name + ' :: and interpolating to ' + str(self.wavelengths)) if os.path.isfile(file_name): iop_reader = csv.reader(open(file_name), delimiter=',', quotechar='"') wave = scipy.float32(iop_reader.next()) iop = scipy.zeros_like(wave) for row in iop_reader: iop = scipy.vstack((iop, row)) iop = scipy.float32(iop[1:, :]) # drop the first row of zeros else: lg.exception('Problem reading file :: ' + file_name) raise IOError try: int_iop = scipy.zeros((iop.shape[0], self.wavelengths.shape[1])) for i_iter in range(0, iop.shape[0]): # r = scipy.interp(self.wavelengths[0, :], wave, iop[i_iter, :]) int_iop[i_iter, :] = scipy.interp(self.wavelengths, wave, iop[i_iter, :]) return int_iop except IOError: lg.exception('Error interpolating IOP to common wavelength') return -1
def bellman_operator(self, w, compute_policy=False): """ The approximate Bellman operator, which computes and returns the updated value function Tw on the grid points. Parameters ========== w : a flat NumPy array with len(w) = len(grid) The vector w represents the value of the input function on the grid points. """ # === Apply linear interpolation to w === # Aw = lambda x: interp(x, self.grid, w) if compute_policy: sigma = np.empty(len(w)) # === set Tw[i] equal to max_c { u(c) + beta w(f(k_i) - c)} === # Tw = np.empty(len(w)) for i, k in enumerate(self.grid): objective = lambda c: - self.u(c) - self.beta * Aw(self.f(k) - c) c_star = fminbound(objective, 1e-6, self.f(k)) if compute_policy: # sigma[i] = argmax_c { u(c) + beta w(f(k_i) - c)} sigma[i] = c_star Tw[i] = - objective(c_star) if compute_policy: return Tw, sigma else: return Tw
def crossval_roc(X, y): cv = StratifiedKFold(y, n_folds=10) clf = RandomForestClassifier() mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] for i, (train, test) in enumerate(cv): fitted = clf.fit(X[train], y[train]) probas_ = fitted.predict_proba(X[test]) scored_ = fitted.predict(X[test]) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 #roc_auc = auc(fpr, tpr) roc_auc = roc_auc_score(scored_, y[test], average="micro") #plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) mean_tpr /= len(cv) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) return plt.plot(mean_fpr, mean_tpr, label='Mean ROC (area = %0.2f)' % mean_auc, lw=1)
def plot_roc_cv(classifier, X, y, cv): ''' cv = KFold(len(y),n_folds=5) ''' mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] for i, (train, test) in enumerate(cv): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') mean_tpr /= len(cv) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic example') plt.legend(loc="lower right") plt.show()
def plot_roc_estimator(estimator, x, y): kf = KFold(len(y), n_folds=10, shuffle=True) y_prob = np.zeros((len(y), 2)) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) for i, (train_index, test_index) in enumerate(kf): x_train, x_test = x[train_index], x[test_index] y_train = y[train_index] estimator.fit(x_train, y_train) y_prob[test_index] = estimator.predict_proba(x_test) fpr, tpr, thresholds = roc_curve(y[test_index], y_prob[test_index, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) mean_tpr /= len(kf) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2) plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Random') plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) plt.show()
def plot_roc_class(x, y, fit_class, **kwargs): kf = KFold(len(y), n_folds=10, shuffle=True) y_prob = np.zeros((len(y), 2)) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) for i, (train_index, test_index) in enumerate(kf): x_train, x_test = x[train_index], x[test_index] y_train = y[train_index] clf = fit_class(**kwargs) clf.fit(x_train, y_train) # Predict probabilities, not classes y_prob[test_index] = clf.predict_proba(x_test) fpr, tpr, thresholds = roc_curve(y[test_index], y_prob[test_index, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) mean_tpr /= len(kf) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2) plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Random') plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") plt.show()
def draw(X, y, classifier): cv = StratifiedKFold(y, n_folds=6) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] for i, (train, test) in enumerate(cv): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label="ROC fold %d (area = %0.2f)" % (i, roc_auc)) plt.plot([0, 1], [0, 1], "--", color=(0.6, 0.6, 0.6), label="Luck") mean_tpr /= len(cv) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, "k--", label="Mean ROC (area = %0.2f)" % mean_auc, lw=2) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.title("Receiver operating characteristic example") plt.legend(loc="lower right") plt.show()
def classify_only(X, Y, model): cv = cross_validation.StratifiedKFold(Y, n_folds=K_FOLDS) # print len(Y) mean_tpr = 0.0 mean_fpr = numpy.linspace(0, 1, 100) all_tpr = [] for i, (train, test) in enumerate(cv): probas_ = model.fit(X.values[train], Y.values[train]).predict_proba(X.values[test]) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(Y.values[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') mean_tpr /= len(cv) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic example: '+model.__class__.__name__) plt.legend(loc="lower right") plt.show() print "Plot done"
def bellman_operator(self, w, compute_policy=False): """ The approximate Bellman operator, which computes and returns the updated value function Tw on the grid points. Parameters ---------- w : array_like(float, ndim=1) The value of the input function on different grid points compute_policy : Boolean, optional(default=False) Whether or not to compute policy function """ # === Apply linear interpolation to w === # Aw = lambda x: interp(x, self.grid, w) if compute_policy: sigma = np.empty(len(w)) # == set Tw[i] equal to max_c { u(c) + beta w(f(k_i) - c)} == # Tw = np.empty(len(w)) for i, k in enumerate(self.grid): objective = lambda c: - self.u(c) - self.beta * Aw(self.f(k) - c) c_star = fminbound(objective, 1e-6, self.f(k)) if compute_policy: # sigma[i] = argmax_c { u(c) + beta w(f(k_i) - c)} sigma[i] = c_star Tw[i] = - objective(c_star) if compute_policy: return Tw, sigma else: return Tw
def draw_roc_curve(classifier, cv, X, y): mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) colors = cycle(['cyan', 'indigo', 'seagreen', 'yellow', 'blue', 'darkorange']) lw = 2 for i, (train, test) in enumerate(cv): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) plt.plot([0, 1], [0, 1], linestyle='--', lw=lw, color='k', label='Luck') mean_tpr /= len(cv) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, color='g', linestyle='--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=lw) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") plt.show()
def calculate_roc(truth, predictions): lb_truth = label_binarize(truth.iloc[:, -1].astype(int), np.arange(n_classes)) lb_prediction = label_binarize(predictions.iloc[:, -1].astype(int), np.arange(n_classes)) # Compute ROC curve and ROC area for each class fpr = dict() tpr = dict() roc_auc = dict() for i in range(len(letter_set)): fpr[i], tpr[i], _ = roc_curve(lb_truth[:, i], lb_prediction[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(lb_truth.ravel(), lb_prediction.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) return fpr, tpr, roc_auc
def LogisticRegressionROC(Data,Label,TrueLabel, Color,legend, marker): Majority_Logistic = LogisticRegression() Majority_Logistic.verbose=0 N_folds=5 kf= cv.KFold(X.shape[0],n_folds=N_folds) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) for i, (train, test) in enumerate(kf): probas_ = Majority_Logistic.fit(Data[train], Label[train]).predict_proba(Data[test]) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(TrueLabel[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 #roc_auc = auc(fpr, tpr) #plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) mean_tpr /= len(kf) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, marker ,color=Color,label='%s AUC:%0.2f' % (legend,mean_auc), lw=2) plt.xlim([-0.05,1.05]) plt.ylim([-0.05,1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('R.O.C. Curves for iono data') plt.legend(loc="lower right")
def predictForTarget(target): modTargets = numpy.array([(-1.0, 1.0)[x == target] for x in targets]) mean_tpr = 0.0 mean_fpr = numpy.linspace(0, 1, 100) for trainIndices, testIndices in cv: if algorithm.startswith("svm"): clf = sklearn.svm.SVC(C=1.0, kernel=algorithm.replace("svm", ""), gamma=0.0, shrinking=True, probability=True, tol=0.001, cache_size=200, class_weight='auto', verbose=False, max_iter=-1, random_state=0) trainData = data[trainIndices,] trainTargets = modTargets[trainIndices] testData = data[testIndices,] testTargets = modTargets[testIndices] model = clf.fit(trainData, trainTargets) probs = model.predict_proba(testData)[:,1] fpr, tpr, thresholds = sklearn.metrics.roc_curve(testTargets, probs) mean_tpr += scipy.interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = sklearn.metrics.auc(fpr, tpr) mean_tpr /= len(cv) mean_tpr[-1] = 1.0 return sklearn.metrics.auc(mean_fpr, mean_tpr)
def find_fingers3(self, img, prev_img): if img is None or prev_img is None: return [] crop_spec = [0, 0, img.width, img.height] scale_factor = 2 r1 = img.grayscale().crop(*crop_spec) r2 = prev_img.grayscale().crop(*crop_spec) # modified diff = (r2 - r1).binarize(40) edge_mask = diff.erode(5).dilate(5) - diff.erode(5) edge_mask = edge_mask.dilate(5) scaled = (diff.edges() & edge_mask).resize(r1.width / scale_factor) points = [] for x in range(scaled.width): points.append(scaled.edgeIntersections((x, 0), (x, scaled.height))[0]) points = [xy for xy in points if xy is not None] if not points: return [] xs = range(scaled.width) ys = scipy.interp(range(scaled.width), [a[0] for a in points], [a[1] for a in points]) peaks = scipy.signal.find_peaks_cwt(-ys, np.arange(7, 11)) if len(peaks) == 0: return [] positions = np.array(zip(peaks, np.array(ys)[peaks])) * scale_factor + np.array(crop_spec[:2]) return positions
def runClassification_CV(data_folder, cfg, classifier): print "Gather dataset" train_x, train_y = gatherAllData(data_folder, cfg) model = classifier[0] clf_name = classifier[1] #Report Cross-Validation Accuracy scores = cross_val_score(model, np.asarray(train_x), np.asarray(train_y), cv=10) print clf_name print "Avg. Accuracy: " + str(sum(scores) / float(len(scores))) cv = KFold(n_splits=10) tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) #Split the data in k-folds, perform classification, and report ROC i = 0 for train, test in cv.split(train_x, train_y): probas_ = model.fit( np.asarray(train_x)[train], np.asarray(train_y)[train]).predict_proba( np.asarray(train_x)[test]) # Compute ROC curve and area under the curve fpr, tpr, thresholds = roc_curve( np.asarray(train_y)[test], probas_[:, 1]) tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) aucs.append(roc_auc) #plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc)) i += 1 plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Random Guess', alpha=.8) mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) unblock70 = True unblock80 = True unblock90 = True unblock95 = True for n, i in enumerate(mean_tpr): if (i >= 0.7 and unblock70): print '70% TPR = ' + str(mean_fpr[n]) unblock70 = False if (i >= 0.8 and unblock80): print '80% TPR = ' + str(mean_fpr[n]) unblock80 = False if (i >= 0.9 and unblock90): print '90% TPR = ' + str(mean_fpr[n]) unblock90 = False if (i >= 0.95 and unblock95): print '95% TPR = ' + str(mean_fpr[n]) unblock95 = False #Figure properties fig = plt.figure() ax1 = fig.add_subplot(111) std_auc = np.std(aucs) plt.plot(mean_fpr, mean_tpr, color='b', label=r'Mean ROC (AUC = %0.2f $\pm$ %0.3f)' % (mean_auc, std_auc), lw=2, alpha=.8) #Compute Standard Deviation between folds std_tpr = np.std(tprs, axis=0) tprs_upper = np.minimum(mean_tpr + std_tpr, 1) tprs_lower = np.maximum(mean_tpr - std_tpr, 0) plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.3, label=r'$\pm$ ROC Std. Dev.') ax1.plot([0, 1], [0, 1], 'k--', lw=2, color='orange', label='Random Guess') ax1.grid(color='black', linestyle='dotted') plt.title('Receiver Operating Characteristic (ROC)') plt.xlabel('False Positive Rate', fontsize='x-large') plt.ylabel('True Positive Rate', fontsize='x-large') plt.legend(loc='lower right', fontsize='large') plt.setp(ax1.get_xticklabels(), fontsize=14) plt.setp(ax1.get_yticklabels(), fontsize=14) fig.savefig('xgBoost/' + "ROC_" + clf_name + "_" + cfg[1] + ".pdf") # save the figure to file plt.close(fig)
def model_evaluaiton(model, fig_prefix=''): """ 预测模型的好坏:多分类 :param model: :param fig_prefix: :return: """ model.load_weights(model_store_path) pre = model.predict(x_test) # 计算每一类的ROC Curve和AUC-ROC fpr = dict() tpr = dict() roc_auc = dict() for i in range(CLASS_NUMBER): fpr[i], tpr[i], thresholds_ = roc_curve(y_test[:, i], pre[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr['micro'], tpr['micro'], _ = roc_curve(y_test.ravel(), pre.ravel()) roc_auc['micro'] = auc(fpr['micro'], tpr['micro']) # Compute macro-average ROC curve and ROC area # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[i] for i in range(CLASS_NUMBER)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(CLASS_NUMBER): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= CLASS_NUMBER fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) # Plot all ROC curves lw = 2 fig = plt.figure() plt.plot(fpr["micro"], tpr["micro"], label='micro (area={0:0.2f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=lw) plt.plot(fpr["macro"], tpr["macro"], label='macro (area={0:0.2f})' ''.format(roc_auc["macro"]), color='blue', linestyle=':', linewidth=lw) colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'pink', 'chocolate', 'seagreen', 'mediumslateblue', 'orangered', 'slategray']) for i, color in zip(range(CLASS_NUMBER), colors): plt.plot(fpr[i], tpr[i], color=color, lw=lw, label='{0} (area={1:0.2f})' ''.format(label_strs[i], roc_auc[i])) plt.plot([0, 1], [0, 1], 'k--', lw=lw) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC curve of compiler family identification') plt.legend(loc="lower right") # plt.show() fig.savefig(fig_prefix + '#ROC-curve.eps') plt.close(fig) for i in range(len(pre)): max_value = max(pre[i]) for j in range(len(pre[i])): if max_value == pre[i][j]: pre[i][j] = 1 else: pre[i][j] = 0 # 生成分类评估报告 report_str = str(classification_report(y_test, pre, digits=4, target_names=label_strs)) with open(fig_prefix + '#classification_report.txt', 'w') as f: f.write(report_str) f.close() print(report_str)
def train_cross_validate(n_folds, data_dir, categories, image_size, num_epochs): # initialize stratifying k fold skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=SEED) # open pickle files # features = pickle.load(open("features.pickle","rb")) #already reshaped as numpy array # features = features/255.0 # labels = pickle.load(open("labels.pickle","rb")) # labels = np.array(labels) # img_names = pickle.load(open("img_names.pickle","rb")) # data frame to save values of loss and validation after each fold df = pd.DataFrame() #obtain images data = import_images(data_dir, categories, image_size) # features = data[0] # labels = data[1] print("Stored features and labels") # for roc plotting tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) # for index, (train_indices, val_indices) in enumerate(skf.split(features, labels)): # print("Training on fold " + str(index + 1) + "/" + str(n_folds)) # train_features = features[train_indices] # train_labels = labels[train_indices] # print("Training data obtained") # val_features = features[val_indices] # val_labels = labels[val_indices] # print("Validation data obtained") # train_labels, val_labels = labels[train_indices], labels[val_indices] for index in range(n_folds): boot = resample(data, replace=True, n_samples=len(data), random_state=index) booted_imgs = set([]) train_features = [] train_labels = [] for entry in boot: train_features.append(entry[0]) train_labels.append(entry[1]) booted_imgs.add(entry[2]) val_features = [] val_labels = [] for entry in data: if entry[2] not in booted_imgs: val_features.append(entry[0]) val_labels.append(entry[1]) #reshape into numpy array train_features = np.array(train_features).reshape( -1, image_size, image_size, 3) #3 bc three channels for RGB values train_labels = np.array(train_labels) val_features = np.array(val_features).reshape( -1, image_size, image_size, 3) #3 bc three channels for RGB values val_labels = np.array(val_labels) # Create new model each time print("Training on fold " + str(index + 1) + "/" + str(n_folds)) model = None model = build_model(image_size) print("Training model") es_callback = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True) history = model.fit(train_features, train_labels, batch_size=64, epochs=num_epochs, callbacks=[es_callback], validation_data=(val_features, val_labels)) # save values of loss and accuracy into df len_history = len(history.history['loss']) df = df.append([[ index + 1, history.history['loss'][len_history - 1], history.history['acc'][len_history - 1], history.history['val_loss'][len_history - 1], history.history['val_acc'][len_history - 1] ]]) # model_json = model.to_json() # with open("model.json", "w") as json_file : # json_file.write(model_json) # model.save_weights("model.h5") # print("Saved model to disk") model.save('saved_models/CNN_' + str(index + 1) + '.model') # Printing a graph showing the accuracy changes during the training phase print(history.history.keys()) plt.figure(1) plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'validation'], loc='upper left') plt.savefig('graphs/val_accuracy_' + str(index + 1) + '.png') # plt.show() plt.clf() plt.figure(2) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'validation'], loc='upper left') plt.savefig('graphs/val_loss_' + str(index + 1) + '.png') # plt.show() plt.clf() # roc curve stuff probas_ = model.predict_proba(val_features) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(val_labels, probas_[:, 1]) tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) aucs.append(roc_auc) # Plots ROC for each individual fold: # plt.plot(fpr, tpr, lw=1, alpha=0.3,label='ROC fold %d (AUC = %0.2f)' % (index + 1, roc_auc)) # use the mean statistics to compare each model (that we train/test using 10-fold cv) mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) std_auc = np.std(aucs) # plot the mean ROC curve and display AUC (mean/st dev) plot_ROC_for_Kfold(mean_fpr, mean_tpr, mean_auc, std_auc) df = df.rename({0: 'Fold Number',\ 1: 'Training Loss',\ 2: 'Training Accuracy',\ 3: 'Validation Loss', \ 4: 'Validation Accuracy'}, axis='columns') df.to_csv(os.path.join('graphs', 'final_acc_loss.csv'), encoding='utf-8', index=False)
def eval_class_model(xtest, ytest, model, labels: str = "labels", pred_params: dict = {}): """generate predictions and validation stats pred_params are non-default, scikit-learn api prediction-function parameters. For example, a tree-type of model may have a tree depth limit for its prediction function. :param xtest: features array type Union(DataItem, DataFrame, np. Array) :param ytest: ground-truth labels Union(DataItem, DataFrame, Series, np. Array, List) :param model: estimated model :param labels: ('labels') labels in ytest is a pd.DataFrame or Series :param pred_params: (None) dict of predict function parameters """ if isinstance(ytest, (pd.DataFrame, pd.Series)): unique_labels = ytest[labels].unique() ytest = ytest.values elif isinstance(ytest, np.ndarray): unique_labels = np.unique(ytest) elif isinstance(ytest, list): unique_labels = set(ytest) n_classes = len(unique_labels) is_multiclass = True if n_classes > 2 else False # PROBS ypred = model.predict(xtest, **pred_params) if hasattr(model, "predict_proba"): yprob = model.predict_proba(xtest, **pred_params) else: # todo if decision fn... raise Exception("not implemented for this classifier") # todo - calibrate # outputs are some stats and some plots and... # should be option, some classifiers don't need, some do it already, many don't model_metrics = { "plots": [], # placeholder for plots "accuracy": float(metrics.accuracy_score(ytest, ypred)), "test-error-rate": np.sum(ytest != ypred) / ytest.shape[0], } # CONFUSION MATRIX gcf_clear(plt) cmd = metrics.plot_confusion_matrix(model, xtest, ytest, normalize="all", cmap=plt.cm.Blues) model_metrics["plots"].append( PlotArtifact("confusion-matrix", body=cmd.figure_)) if is_multiclass: # PRECISION-RECALL CURVES MICRO AVGED # binarize/hot-encode here since we look at each class lb = LabelBinarizer() ytest_b = lb.fit_transform(ytest) precision = dict() recall = dict() avg_prec = dict() for i in range(n_classes): precision[i], recall[i], _ = metrics.precision_recall_curve( ytest_b[:, i], yprob[:, i]) avg_prec[i] = metrics.average_precision_score( ytest_b[:, i], yprob[:, i]) precision["micro"], recall[ "micro"], _ = metrics.precision_recall_curve( ytest_b.ravel(), yprob.ravel()) avg_prec["micro"] = metrics.average_precision_score(ytest_b, yprob, average="micro") ap_micro = avg_prec["micro"] model_metrics.update({"precision-micro-avg-classes": ap_micro}) gcf_clear(plt) colors = cycle( ["navy", "turquoise", "darkorange", "cornflowerblue", "teal"]) plt.figure(figsize=(7, 8)) f_scores = np.linspace(0.2, 0.8, num=4) lines = [] labels = [] for f_score in f_scores: x = np.linspace(0.01, 1) y = f_score * x / (2 * x - f_score) (l, ) = plt.plot(x[y >= 0], y[y >= 0], color="gray", alpha=0.2) plt.annotate("f1={0:0.1f}".format(f_score), xy=(0.9, y[45] + 0.02)) lines.append(l) labels.append("iso-f1 curves") (l, ) = plt.plot(recall["micro"], precision["micro"], color="gold", lw=10) lines.append(l) labels.append( f"micro-average precision-recall (area = {ap_micro:0.2f})") for i, color in zip(range(n_classes), colors): (l, ) = plt.plot(recall[i], precision[i], color=color, lw=2) lines.append(l) labels.append( f"precision-recall for class {i} (area = {avg_prec[i]:0.2f})") fig = plt.gcf() fig.subplots_adjust(bottom=0.25) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel("recall") plt.ylabel("precision") plt.title("precision recall - multiclass") plt.legend(lines, labels, loc=(0, -0.38), prop=dict(size=10)) model_metrics["plots"].append( PlotArtifact("precision-recall-multiclass", body=plt.gcf())) # ROC CURVES # Compute ROC curve and ROC area for each class fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = metrics.roc_curve(ytest_b[:, i], yprob[:, i]) roc_auc[i] = metrics.auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = metrics.roc_curve( ytest_b.ravel(), yprob.ravel()) roc_auc["micro"] = metrics.auc(fpr["micro"], tpr["micro"]) # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = metrics.auc(fpr["macro"], tpr["macro"]) # Plot all ROC curves gcf_clear(plt) plt.figure() plt.plot( fpr["micro"], tpr["micro"], label="micro-average ROC curve (area = {0:0.2f})" "".format(roc_auc["micro"]), color="deeppink", linestyle=":", linewidth=4, ) plt.plot( fpr["macro"], tpr["macro"], label="macro-average ROC curve (area = {0:0.2f})" "".format(roc_auc["macro"]), color="navy", linestyle=":", linewidth=4, ) colors = cycle(["aqua", "darkorange", "cornflowerblue"]) for i, color in zip(range(n_classes), colors): plt.plot( fpr[i], tpr[i], color=color, lw=2, label="ROC curve of class {0} (area = {1:0.2f})" "".format(i, roc_auc[i]), ) plt.plot([0, 1], [0, 1], "k--", lw=2) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.title("receiver operating characteristic - multiclass") plt.legend(loc="lower right") model_metrics["plots"].append( PlotArtifact("roc-multiclass", body=plt.gcf())) # AUC multiclass model_metrics.update({ "auc-macro": metrics.roc_auc_score(ytest_b, yprob, multi_class="ovo", average="macro"), "auc-weighted": metrics.roc_auc_score(ytest_b, yprob, multi_class="ovo", average="weighted"), }) # others (todo - macro, micro...) model_metrics.update({ "f1-score": metrics.f1_score(ytest, ypred, average="macro"), "recall_score": metrics.recall_score(ytest, ypred, average="macro"), }) else: # binary yprob_pos = yprob[:, 1] model_metrics.update({ "rocauc": metrics.roc_auc_score(ytest, yprob_pos), "brier_score": metrics.brier_score_loss(ytest, yprob_pos, pos_label=ytest.max()), }) # precision-recall # ROC plot return model_metrics
def evaluation_report(test_data_dir, checkpoints_list, report_file, batch_size, input_shape, extract_features=False, feature_file=None, roc_file=None): pred, true_labels, classes = predict(test_data_dir, checkpoints_list[0], input_shape, batch_size) predictions = np.zeros((len(pred), len(checkpoints_list))) predictions[:, 0] = pred for j, c in enumerate(checkpoints_list[1:]): pred, true_labels, classes = predict(test_data_dir, c, input_shape, batch_size) predictions[:, j + 1] = pred print(predictions.shape) y_pred = stats.mode(predictions, axis=1) y_pred = y_pred[0] print(y_pred.shape) y_pred = y_pred.ravel() y_pred = y_pred.astype(int) true_labels = true_labels.astype(int) acc = accuracy_score(true_labels, y_pred) kappa_score = cohen_kappa_score(true_labels, y_pred, weights="quadratic") cnf = confusion_matrix(true_labels, y_pred) plt.figure() plot_confusion_matrix(cnf, classes=classes, normalize=True, title='Normalized confusion matrix') plt.show() y_pred = label_binarize(y_pred, classes=classes) true_labels = label_binarize(true_labels, classes=classes) print(true_labels.shape) print(y_pred.shape) num_classes = len(classes) fpr = dict() tpr = dict() roc_auc = dict() for i in range(num_classes): fpr[i], tpr[i], _ = roc_curve(true_labels[:, i], y_pred[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) fpr["micro"], tpr["micro"], _ = roc_curve(true_labels.ravel(), y_pred.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) all_fpr = np.unique(np.concatenate([fpr[i] for i in range(num_classes)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(num_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= num_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) #np.savetxt('fpr_stage2.txt', fpr) #np.savetxt('tpr_stage2.txt', tpr) report = {} report['Accuracy'] = acc report['AUC1'] = roc_auc[0] report['AUC2'] = roc_auc[1] report['AUC3'] = roc_auc[2] report['AUC4'] = roc_auc[3] report['AUC_Macro'] = roc_auc["macro"] report['AUC_Micro'] = roc_auc["micro"] report['kappa'] = kappa_score #report['fpr'] = fpr #report['tpr'] = tpr with open(report_file, 'w') as fp: json.dump(report, fp) print('Model report. Report saved to: %s' % (report_file)) print('Accuracy: %.2f' % report['Accuracy']) print('AUC1: %.2f' % report['AUC1']) print('AUC2: %.2f' % report['AUC2']) print('AUC3: %.2f' % report['AUC3']) print('AUC4: %.2f' % report['AUC4']) print('AUC (micro): %.2f' % report['AUC_Micro']) print('AUC (macro): %.2f' % report['AUC_Macro']) print('Kappa score: %.2f' % report['kappa']) if extract_features: features_model = Model(model.inputs, model.layers[-2].output) features = features_model.predict_generator(test_generator, steps=test_steps) assert feature_file is not None np.savetxt(feature_file, features) if roc_file is not None: roc_title = roc_file.split('.') roc_title = roc_title[0] plt.figure() lw = 2 plt.figure() plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4) plt.plot(fpr["macro"], tpr["macro"], label='macro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["macro"]), color='navy', linestyle=':', linewidth=4) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic \n (' + roc_title + ')') plt.legend(loc="lower right") #plt.show() plt.savefig(roc_file)
target = y_test fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(target[:, i], y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) fpr["micro"], tpr["micro"], _ = roc_curve(target.ravel(), y_score.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) # Plot all ROC curves plt.figure() plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':',
def ROC_PRC(outtl, pdx, path, name, fdict, dm, accur, pmd): if pmd == 'immune': rdd = 4 else: rdd = 2 if rdd > 2: # Compute ROC and PRC curve and ROC and PRC area for each class fpr = dict() tpr = dict() roc_auc = dict() # PRC # For each class precision = dict() recall = dict() average_precision = dict() microy = [] microscore = [] for i in range(rdd): fpr[i], tpr[i], _ = sklearn.metrics.roc_curve( np.asarray((outtl.iloc[:, 0].values == int(i)).astype('uint8')), np.asarray(pdx[:, i]).ravel()) try: roc_auc[i] = sklearn.metrics.roc_auc_score( np.asarray( (outtl.iloc[:, 0].values == int(i)).astype('uint8')), np.asarray(pdx[:, i]).ravel()) except ValueError: roc_auc[i] = np.nan microy.extend( np.asarray((outtl.iloc[:, 0].values == int(i)).astype('uint8'))) microscore.extend(np.asarray(pdx[:, i]).ravel()) precision[i], recall[i], _ = \ sklearn.metrics.precision_recall_curve(np.asarray((outtl.iloc[:, 0].values == int(i)).astype('uint8')), np.asarray(pdx[:, i]).ravel()) try: average_precision[i] = \ sklearn.metrics.average_precision_score(np.asarray((outtl.iloc[:, 0].values == int(i)).astype('uint8')), np.asarray(pdx[:, i]).ravel()) except ValueError: average_precision[i] = np.nan # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = sklearn.metrics.roc_curve( np.asarray(microy).ravel(), np.asarray(microscore).ravel()) roc_auc["micro"] = sklearn.metrics.auc(fpr["micro"], tpr["micro"]) # A "micro-average": quantifying score on all classes jointly precision["micro"], recall[ "micro"], _ = sklearn.metrics.precision_recall_curve( np.asarray(microy).ravel(), np.asarray(microscore).ravel()) average_precision["micro"] = sklearn.metrics.average_precision_score( np.asarray(microy).ravel(), np.asarray(microscore).ravel(), average="micro") # Compute macro-average ROC curve and ROC area # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[i] for i in range(rdd)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(rdd): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= rdd fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = sklearn.metrics.auc(fpr["macro"], tpr["macro"]) # Plot all ROC curves plt.figure() plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.5f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4) plt.plot(fpr["macro"], tpr["macro"], label='macro-average ROC curve (area = {0:0.5f})' ''.format(roc_auc["macro"]), color='navy', linestyle=':', linewidth=4) colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'red', 'blue']) for i, color in zip(range(rdd), colors): plt.plot(fpr[i], tpr[i], color=color, lw=2, label='ROC curve of {0} (area = {1:0.5f})'.format( fdict[i], roc_auc[i])) print('{0} {1} AUC of {2} = {3:0.5f}'.format( name, dm, fdict[i], roc_auc[i])) plt.plot([0, 1], [0, 1], 'k--', lw=2) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC of {}'.format(name)) plt.legend(loc="lower right") plt.savefig("../Results/{}/out/{}_{}_ROC.png".format(path, name, dm)) print( '{0} Average precision score, micro-averaged over all classes: {1:0.5f}' .format(name, average_precision["micro"])) # Plot all PRC curves colors = cycle([ 'navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal', 'red', 'blue' ]) plt.figure(figsize=(7, 9)) f_scores = np.linspace(0.2, 0.8, num=4) lines = [] labels = [] for f_score in f_scores: x = np.linspace(0.01, 1) y = f_score * x / (2 * x - f_score) l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2) plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02)) lines.append(l) labels.append('iso-f1 curves') l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2) lines.append(l) labels.append('micro-average Precision-recall (area = {0:0.5f})' ''.format(average_precision["micro"])) for i, color in zip(range(rdd), colors): l, = plt.plot(recall[i], precision[i], color=color, lw=2) lines.append(l) labels.append('Precision-recall for {0} (area = {1:0.5f})'.format( fdict[i], average_precision[i])) print('{0} {1} Average Precision of {2} = {3:0.5f}'.format( name, dm, fdict[i], average_precision[i])) fig = plt.gcf() fig.subplots_adjust(bottom=0.25) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('{} Precision-Recall curve: Average Accu={}'.format( name, accur)) plt.legend(lines, labels, loc=(0, -.38), prop=dict(size=12)) plt.savefig("../Results/{}/out/{}_{}_PRC.png".format(path, name, dm)) else: tl = outtl.values[:, 0].ravel() y_score = np.asarray(pdx[:, 1]).ravel() auc = sklearn.metrics.roc_auc_score(tl, y_score) auc = round(auc, 5) print('{0} {1} AUC = {2:0.5f}'.format(name, dm, auc)) fpr, tpr, _ = sklearn.metrics.roc_curve(tl, y_score) plt.figure() lw = 2 plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.5f)' % auc) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('{} ROC of {}'.format(name, pmd)) plt.legend(loc="lower right") plt.savefig("../Results/{}/out/{}_{}_ROC.png".format(path, name, dm)) average_precision = sklearn.metrics.average_precision_score( tl, y_score) print('{0} Average precision-recall score: {1:0.5f}'.format( name, average_precision)) plt.figure() f_scores = np.linspace(0.2, 0.8, num=4) for f_score in f_scores: x = np.linspace(0.01, 1) y = f_score * x / (2 * x - f_score) l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2) plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02)) precision, recall, _ = sklearn.metrics.precision_recall_curve( tl, y_score) plt.step(recall, precision, color='b', alpha=0.2, where='post') plt.fill_between(recall, precision, step='post', alpha=0.2, color='b') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('{} {} PRC: AP={:0.5f}; Accu={}'.format( pmd, name, average_precision, accur)) plt.savefig("../Results/{}/out/{}_{}_PRC.png".format(path, name, dm))
def runANNSearch(data_folder,cfg): epochs = 100 #Gather the dataset #train_x, train_y are just regular samples train_x_t, train_y_t, test_x_t, test_y_t, num_input = gatherDataset_10times(data_folder, cfg, 0.9) #std_scale = preprocessing.StandardScaler().fit(train_x) #train_x = std_scale.transform(train_x) #test_x = std_scale.transform(test_x) max_auc = 0 max_batch_size = 0 max_hidden = 0 max_repr_size = 0 auc_report = [] n_hidden_report = [] repr_size_report = [] batch_sizes_report = [] best_config = [] max_auc = 0 learning_rates = [0.001] # [0.01, 0.001] # default is 0.001 batch_sizes = [32]#[8, 16, 32, 64, 128, 256] n_hiddens = [8, 16, 32, 64, 128, 256]#np.logspace(2, 10, base=2, num=12) #drop_inputs = [0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1] #drop_hiddens = [0.5, 0.6, 0.7, 0.8, 0.9, 1] repr_sizes = [4, 8, 16, 32, 64, 128, 256] #np.logspace(2, 10, base=2, num=12) #num 20 for learning_rate in learning_rates: for batch_size in batch_sizes: for n_hidden in n_hiddens: for repr_size in repr_sizes: if(repr_size <= n_hidden): #start = time.time() np.random.seed(1) graph_level_seed = 1 operation_level_seed = 1 tf.set_random_seed(graph_level_seed) random.seed(1) step_auc = [] mean_fpr = np.linspace(0, 1, 100) tprs = [] for n in range(0,10): #n_features, batch_size, n_hidden, drop_input, drop_hidden, repr_size ae = Autoencoder(num_input, batch_size, int(n_hidden), 1, 1, int(repr_size), learning_rate) train_x = train_x_t[n] train_y = train_y_t[n] test_x = test_x_t[n] test_y = test_y_t[n] for i in range(epochs): ae.run_epoch(train_x) #Reconstruct samples anomaly_errors = ae.reconstruction_errors(test_x[len(test_x)/2:]) normal_val_errors = ae.reconstruction_errors(test_x[:len(test_x)/2]) roc_y = [1 for _ in range(len(anomaly_errors))] + [0 for _ in range(len(normal_val_errors))] roc_score = np.concatenate([anomaly_errors, normal_val_errors]) # Compute ROC curve and ROC area for each class fpr, tpr, thresholds = roc_curve(roc_y, roc_score, drop_intermediate=True) tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) #print "Fold %i auc: %f" % (n, roc_auc) step_auc.append(roc_auc) avg_auc = sum(step_auc)/float(len(step_auc)) auc_report.append(avg_auc) """ n_hidden_report.append(int(n_hidden)) repr_size_report.append(int(repr_size)) batch_sizes_report.append(batch_size) """ mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) if(mean_auc > max_auc): max_auc = mean_auc best_config = [mean_fpr, mean_tpr, n_hidden, repr_size] #end = time.time() #print(end - start) print ("%f - Batch Size:%i, Learning Rate:%f, n_hidden:%i, repr_size:%i" % (avg_auc, batch_size, learning_rate, int(n_hidden), int(repr_size))) fig = plt.figure() ax1 = fig.add_subplot(111) plt.xlim([0, 1]) plt.ylim([0, 1]) plt.xlabel('False Positive Rate', fontsize=26) plt.ylabel('True Positive Rate', fontsize=26) ax1.plot([0, 1], [0, 1], 'k--', lw=2, color='orange', label = 'Random Guess') ax1.grid(color='black', linestyle='dotted') plt.setp(ax1.get_xticklabels(), fontsize=16) plt.setp(ax1.get_yticklabels(), fontsize=16) plt.plot(best_config[0], best_config[1], color='b', label=r'ROC (AUC = %0.2f)' % (max_auc), lw=2, alpha=.8) plt.legend(loc='lower right', fontsize='x-large') fig.savefig('Autoencoder/' + "Facet_Autoencoder_" + cfg[1] + ".pdf") # save the figure to file plt.close(fig) print "################\n# Summary" print "Max. AUC: %f, N_hidden: %i, Repr_Size: %i" % (max_auc, best_config[2],best_config[3]) print "Avg. AUC %f: " % (np.mean(auc_report,axis=0)) """
mdl_names.append(model_name) print('\nPerforming %s followed by %s for dataset %s\n' % (kernel, model_name, dataset)) # To count number of folds i = 0 for train, test in cv.split(X_kpca, y): probas_ = model.fit(X_kpca[train], y[train]).predict_proba(X_kpca[test]) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) aucs.append(roc_auc) plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (i+1, roc_auc)) i += 1 plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Luck', alpha=.8) mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) std_auc = np.std(aucs)
def plot_ROC_curve(y_true, y_pred, labels, roc_path): """ Plots the ROC curve from prediction scores y_true.shape = [n_samples, n_classes] y_preds.shape = [n_samples, n_classes] labels.shape = [n_classes] """ n_classes = len(labels) # Compute ROC curve and ROC area for each class fpr = dict() tpr = dict() roc_auc = dict() for y, pred, label in zip(y_true.transpose(), y_pred.transpose(), labels): fpr[label], tpr[label], _ = roc_curve(y, pred) roc_auc[label] = auc(fpr[label], tpr[label]) # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[label] for label in labels])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for label in labels: mean_tpr += interp(all_fpr, fpr[label], tpr[label]) # Finally average it and compute AUC mean_tpr /= n_classes # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(y_true.ravel(), y_pred.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) # Plot all ROC curves plt.figure() lw = 2 plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.3f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=2) plt.plot(fpr["macro"], tpr["macro"], label='macro-average ROC curve (area = {0:0.3f})' ''.format(roc_auc["macro"]), color='navy', linestyle=':', linewidth=2) if len(labels) == 4: colors = ['green', 'cornflowerblue', 'darkorange', 'darkred'] else: colors = ['green', 'cornflowerblue', 'darkred'] for label, color in zip(labels, cycle(colors)): plt.plot(fpr[label], tpr[label], color=color, lw=lw, label='ROC curve of {0} (area = {1:0.3f})' ''.format(label, roc_auc[label])) plt.plot([0, 1], [0, 1], 'k--', lw=lw) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC curve') plt.legend(loc="lower right") matplotlib.rcParams.update({'font.size': 14}) plt.savefig('%s.png' % roc_path, pad_inches=0, bbox_inches='tight')
def cross_val(self, X_train, y_train, cv=10, plot_auc=False, figsize=(16, 9)): """Computes stratified K-Fold cross-validation of the metrics provided to the class and returns a dictionnary with the results for each fold ; if the `plot_auc` argument is set to `True`, then cross-validation will only be run with AUC as the metric, and returns a plot with ROC curves for each fold Parameters ------ X_train : numpy.ndarray or pandas.DataFrame Data to train the cross-validation on y_train : numpy.ndarray or pandas.Series Labels for `X_train` cv : int Number of folds for cross-validation plot_auc : bool Indication to plot ROC curves for each fold figsize : int tuple Plot size """ if plot_auc == True: cv = StratifiedKFold(n_splits=cv) tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) i = 0 plt.figure(figsize=figsize) for train, test in cv.split(X_train, y_train): probas_ = self.model.fit(X_train[train], y_train[train]).predict_proba( X_train[test]) fpr, tpr, thresholds = roc_curve(y_train[test], probas_[:, 1]) tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) aucs.append(roc_auc) plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc)) i += 1 plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8) mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) std_auc = np.std(aucs) plt.plot(mean_fpr, mean_tpr, color='b', label=r'Mean ROC (AUC = %0.3f $\pm$ %0.3f)' % (mean_auc, std_auc), lw=2, alpha=.8) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC curve') plt.legend(loc="lower right") else: self.cv_scores_ = {} for metric in self.metrics_: if metric == "precision": self.cv_scores_[metric] = cross_val_score( self.model, X_train, y_train, cv=cv, scoring=self.custom_precision_score) elif metric == "recall": self.cv_scores_[metric] = cross_val_score( self.model, X_train, y_train, cv=cv, scoring=self.custom_recall_score) elif metric == "accuracy": self.cv_scores_[metric] = cross_val_score( self.model, X_train, y_train, cv=cv, scoring=self.custom_accuracy_score) elif metric == "f1_score": self.cv_scores_[metric] = cross_val_score( self.model, X_train, y_train, cv=cv, scoring=self.custom_f1_score) else: self.cv_scores_[metric] = cross_val_score(self.model, X_train, y_train, cv=cv, scoring=metric) return self.cv_scores_
def cvROC(dataset): X = dataset.data y = dataset.target X, y = X[y != 2], y[y != 2] n_samples, n_features = X.shape # Add noisy features random_state = numpy.random.RandomState(0) X = numpy.c_[X, random_state.randn(n_samples, 200 * n_features)] cv = StratifiedKFold(n_splits=10) classifier = svm.SVC(kernel='linear', probability=True, random_state=random_state) tprs = [] aucs = [] mean_fpr = numpy.linspace(0, 1, 100) i = 0 for train, test in cv.split(X, y): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) aucs.append(roc_auc) plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc)) i += 1 plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8) mean_tpr = numpy.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) std_auc = numpy.std(aucs) plt.plot(mean_fpr, mean_tpr, color='b', label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc), lw=2, alpha=.8) std_tpr = numpy.std(tprs, axis=0) tprs_upper = numpy.minimum(mean_tpr + std_tpr, 1) tprs_lower = numpy.maximum(mean_tpr - std_tpr, 0) plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2, label=r'$\pm$ 1 std. dev.') plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic example') plt.legend(loc="lower right") plt.show()
def draw_roc( Y, Y_score): # Make binarized multi classes labels #Y = label_binarize(Y, classes = [0,1,2]) #print(Y.shape) n_classes = Y.shape[1] # Compute ROC curve and ROC area for each class fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(Y[:, i], Y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(Y.ravel(), Y_score.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) # Compute macro-average ROC curve and ROC area # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) lw = 4 plt.rcParams.update({'font.size': 15}) plt.figure(figsize=(10,7)) plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=lw) plt.plot(fpr["macro"], tpr["macro"], label='macro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["macro"]), color='navy', linestyle=':', linewidth=lw) colors = itertools.cycle(['aqua', 'darkorange', 'cornflowerblue', 'darkred']) for i, color in zip(range(n_classes), colors): plt.plot(fpr[i], tpr[i], color=color, lw=lw, label='ROC curve of class {0} (area = {1:0.2f})' ''.format(i, roc_auc[i])) plt.plot([0, 1], [0, 1], 'k--', lw=lw) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Some extension of Receiver operating characteristic to multi-class') plt.legend(loc="lower right") plt.show()
def five_fold_roc(spliting, all_slices, subtype, random_flag=True, clf=GB()): '''随机五折分组交叉验证''' mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) # all_tpr = [] residual = [] performance = [0] * 8 m = 5 for i, split in enumerate(spliting): if random_flag: # 随机分配 tr_paths, te_paths = get_five_data(split, all_slices) else: # 按固定名字分组 tr_paths, te_paths = data_by_name(all_slices, split[0]), data_by_name( all_slices, split[1]) tr_slice, tr_data, tr_label, tr_area = read_data(tr_paths, num_=116) te_slice, te_data, te_label, te_area = read_data(te_paths, num_=116) # return trainacc, testacc, tr_acc, te_acc, tr_recall, te_recall, tr_speci, te_speci,prb_2, l2 result = \ classify(tr_data, te_data, tr_label, te_label, tr_slice, te_slice, tr_area, te_area, clf=GB()) fpr, tpr, thresholds = roc_curve(result[-1], result[-2]) residual.append(comput_residual(result[-1], result[-2])) mean_tpr += interp(mean_fpr, fpr, tpr) # 对mean_tpr在mean_fpr处进行插值 mean_tpr[0] = 0.0 # 初始处为0 roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) performance[0] += result[0] performance[1] += result[1] performance[2] += result[2] performance[3] += result[3] performance[4] += result[4] performance[5] += result[5] performance[6] += result[6] performance[7] += result[7] print 'train/slice', performance[0] / m print 'test/slice', performance[1] / m print 'train/person', performance[2] / m print 'test/person', performance[3] / m print 'train/recall', performance[4] / m print 'test/recall', performance[5] / m print 'train/speci', performance[6] / m print 'test/speci', performance[7] / m print np.mean(residual, axis=0) mean_tpr /= 5 # 在mean_fpr100个点,每个点处插值插值多次取平均 mean_tpr[-1] = 1.0 # 坐标最后一个点为(1,1) mean_auc = auc(mean_fpr, mean_tpr) # 计算平均AUC值 # return mean_fpr, mean_tpr, mean_auc # 画对角线 # plt.subplot(subplt) plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('{} - Receiver operating characteristic'.format(subtype)) plt.legend(loc="lower right") plt.show()
def graphs_ROC(): #data_random,resultado_r,valid_set,valid_results): a = opt.getcwd() #me da directorio actual data = a + "/Archivos_training/Data.txt" resultado = a + "/Archivos_training/resultados.txt" clasificador = a + "/Archivos_training/clf_SVM.pkl" data_random = D.loadtxt(data) print("\tData cargada") resultado_r = D.loadtxt(resultado) print("\tResultados cargados") pkl_file = open(clasificador, 'rb') clf = pick.load(pkl_file) print("\tClasificador cargado") print("\tEn proceso ...") resultado_r = label_binarize( resultado_r, classes=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130]) n_classes = resultado_r.shape[1] X_train, X_test, y_train, y_test = train_test_split(data_random, resultado_r, test_size=0.4, random_state=0) classifier = OneVsRestClassifier(clf) #,random_state=random_state)) y_score = classifier.fit(X_train, y_train).decision_function(X_test) fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) lw = 2 # Compute macro-average ROC curve and ROC area # First aggregate all false positive rates all_fpr = D.unique(D.concatenate([fpr[i] for i in range(n_classes)])) # Then interpolate all ROC curves at this points mean_tpr = D.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) #-------------------------------------------------------------------------- # Plot all ROC curves plt.figure() plt.rc('font', size=8) for k in range(3): plt.subplot(2, 2, k + 1) colors = cycle(['r', 'darkorange', 'b', 'y']) for i, color in zip(range(3), colors): plt.plot(fpr[i + k * 3], tpr[i + k * 3], color=color, lw=lw, label='Curva ROC clase {0} (area = {1:0.2f})' ''.format(i + k * 3 + 1, roc_auc[i + k * 3])) plt.plot([0, 1], [0, 1], 'k--', lw=lw) plt.xlim([-0.05, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Rata de Falsos Positivos') plt.ylabel('Rata de Aciertos Positivos') titulo = "Graficas ROC de la clase " + str( k * 3 + 1) + " a la clase " + str(k * 3 + 3) plt.title(titulo) plt.legend(loc="lower right") #------------------------------------------------------------------------- plt.subplot(2, 2, 4) colors = cycle(['r', 'darkorange', 'b', 'y']) for i, color in zip(range(4), colors): plt.plot(fpr[i + 9], tpr[i + 9], color=color, lw=lw, label='Curva ROC clase {0} (area = {1:0.2f})' ''.format(i + 10, roc_auc[i + 9])) plt.plot([0, 1], [0, 1], 'k--', lw=lw) plt.xlim([-0.05, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Rata de Falsos Positivos') plt.ylabel('Rata de Aciertos Positivos') titulo = "Graficas ROC de la clase " + str(10) + " a la clase " + str(13) plt.title(titulo) plt.legend(loc="lower right") #---------------------------------------------------------------------- plt.figure() plt.rc('font', size=8) plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle='-', linewidth=4) plt.plot(fpr["macro"], tpr["macro"], label='macro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["macro"]), color='navy', linestyle='-', linewidth=4) plt.xlim([-0.05, 1.0]) plt.ylim([0.0, 1.05]) titulo = "Graficas ROC Promedio, minima y maxima" plt.title(titulo) plt.legend(loc="lower right") plt.show()
def evaluate(self, y_true, y_pred_prob): """ evaluate model performance by calculate ks, auc, confusion matrix and find the best segmentation 通过计算ks值,auc值以及混淆矩阵,来评估模型效果,并且寻找最佳切割点 :param y_true: true target :param y_pred_prob: predict value :return: """ mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) fpr, tpr, theadhold = roc_curve(y_true, y_pred_prob) # fpr, tpr, theadhold = roc_curve(y_true, y_pred_prob) size = len(tpr) max_value = 0 index = 0 for i in range(0, size): v = tpr[i] - fpr[i] if v > max_value: max_value = v index = i mean_tpr += interp(mean_fpr, fpr, tpr) # 对mean_tpr在mean_fpr处进行插值,通过scipy包调用interp()函数 # mean_tpr[0] = 0.0 # 初始处为0 roc_auc = auc(fpr, tpr) # 画图,只需要plt.plot(fpr,tpr),变量roc_auc只是记录auc的值,通过auc()函数能计算出来 plt.plot(fpr, tpr, lw=1, label='ROC fold (area = %0.2f)' % roc_auc) plt.xlabel('Specificity (假阳率)') plt.ylabel('Sensitivity (真阳率)') plt.title('ROC curve') plt.grid() plt.legend() # plt.show() # print('-------------- ks, auc --------------') # print('ks: ' + str(max_value)) # print('auc: ' + str(auc(fpr, tpr))) # print('threshold: ' + str(theadhold[index])) print('-------------- ks, auc --------------') print('ks: ' + str(max_value)) print('auc: ' + str(auc(fpr, tpr))) print('threshold: ' + str(theadhold[index])) print('-------------------------------------') best_threshold = round(theadhold[index], 2) thres_pr_dict = {} threshold_recommended = 0.30 best_precision = 0 best_recall = 0 best_pr = 0 print('-------------- result details --------------') prob_thres = 0.01 while prob_thres <= 1: # print('prob_thres: ' + str(prob_thres)) print('prob_thres: ' + str(prob_thres)) test_predict_new = [] for prob in y_pred_prob: # if prob[1] > prob_thres: if prob > prob_thres: test_predict_new.append(1) else: test_predict_new.append(0) y_predict = np.array(test_predict_new) accuracy = accuracy_score(y_true, y_predict) precision, recall, f1, support = precision_recall_fscore_support( y_true, y_predict) matrix = confusion_matrix(y_true, y_predict) good_pass, bad_pass, good_deny, bad_deny = matrix[1][1], matrix[0][ 1], matrix[1][0], matrix[0][0] pass_ratio = float(good_pass + bad_pass) / (good_pass + bad_pass + good_deny + bad_deny) print('pass_ratio: ' + str(pass_ratio)) print('accuracy: ' + str(accuracy)) print('precision: ' + str(precision)) print('recall: ' + str(recall)) print('f1: ' + str(f1)) print('confusion_matrix:') print(matrix) print(" ") thres_pr_dict[prob_thres] = pass_ratio if float('%.2f' % prob_thres) == float('%.2f' % best_threshold): best_precision = str(precision) best_recall = str(recall) best_pr = str(pass_ratio) prob_thres += 0.01
regr.fit(X, y) X1 = test[:, 1:65] y1 = test[:, 65] predict_y1 = regr.predict_proba(X1)[:, 1] false_positive_rate, true_positive_rate, thresholds = roc_curve( y1, predict_y1) fpr_load.append(false_positive_rate) tpr_load.append(true_positive_rate) n_folds = len(fpr_load) tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) plt.figure(figsize=(10, 8)) for i in range(n_folds): tprs.append(interp(mean_fpr, fpr_load[i], tpr_load[i])) tprs[-1][0] = 0.0 roc_auc = auc(fpr_load[i], tpr_load[i]) aucs.append(roc_auc) plt.plot(fpr_load[i], tpr_load[i], lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc)) plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Random prediction', alpha=.8)
def class_report(y_true, y_pred, y_score=None, average='micro'): if y_true.shape != y_pred.shape: print("Error! y_true %s is not the same shape as y_pred %s" % ( y_true.shape, y_pred.shape) ) return lb = LabelBinarizer() if len(y_true.shape) == 1: lb.fit(y_true) #Value counts of predictions labels, cnt = np.unique( y_pred, return_counts=True) n_classes = len(labels) pred_cnt = pd.Series(cnt, index=labels) metrics_summary = precision_recall_fscore_support( y_true=y_true, y_pred=y_pred, labels=labels) avg = list(precision_recall_fscore_support( y_true=y_true, y_pred=y_pred, average='weighted')) metrics_sum_index = ['precision', 'recall', 'f1-score', 'support'] class_report_df = pd.DataFrame( list(metrics_summary), index=metrics_sum_index, columns=labels) support = class_report_df.loc['support'] total = support.sum() class_report_df['avg / total'] = avg[:-1] + [total] class_report_df = class_report_df.T class_report_df['pred'] = pred_cnt class_report_df['pred'].iloc[-1] = total if not (y_score is None): fpr = dict() tpr = dict() roc_auc = dict() for label_it, label in enumerate(labels): fpr[label], tpr[label], _ = roc_curve( (y_true == label).astype(int), y_score[:, label_it]) roc_auc[label] = auc(fpr[label], tpr[label]) if average == 'micro': if n_classes <= 2: fpr["avg / total"], tpr["avg / total"], _ = roc_curve( lb.transform(y_true).ravel(), y_score[:, 1].ravel()) else: fpr["avg / total"], tpr["avg / total"], _ = roc_curve( lb.transform(y_true).ravel(), y_score.ravel()) roc_auc["avg / total"] = auc( fpr["avg / total"], tpr["avg / total"]) elif average == 'macro': # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([ fpr[i] for i in labels] )) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in labels: mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["avg / total"] = auc(fpr["macro"], tpr["macro"]) class_report_df['AUC'] = pd.Series(roc_auc) return class_report_df
mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] for i, (train, test) in enumerate(cv): #通过训练数据,使用svm线性核建立模型,并对测试集进行测试,求出预测得分 probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) # print set(y[train]) #set([0,1]) 即label有两个类别 # print len(X[train]),len(X[test]) #训练集有84个,测试集有16个 # print "++",probas_ #predict_proba()函数输出的是测试集在lael各类别上的置信度, # #在哪个类别上的置信度高,则分为哪类 # Compute ROC curve and area the curve #通过roc_curve()函数,求出fpr和tpr,以及阈值 fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) #对mean_tpr在mean_fpr处进行插值,通过scipy包调用interp()函数 mean_tpr[0] = 0.0 #初始处为0 roc_auc = auc(fpr, tpr) #画图,只需要plt.plot(fpr,tpr),变量roc_auc只是记录auc的值,通过auc()函数能计算出来 plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) #画对角线 plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') mean_tpr /= len(cv) #在mean_fpr100个点,每个点处插值插值多次取平均 mean_tpr[-1] = 1.0 #坐标最后一个点为(1,1) mean_auc = auc(mean_fpr, mean_tpr) #计算平均AUC值 #画平均ROC曲线 #print mean_fpr,len(mean_fpr) #print mean_tpr plt.plot(mean_fpr,
def do_error(self, line): """Override the error calculation for TTS The error is calculated as the vertical distance between theory points, in the current view,\ calculated over all possible pairs of theory tables, when the theories overlap in the horizontal direction and\ they correspond to files with the same Mw (if the parameters Mw2 and phi exist, their values are also used to classify the error). 1/2 of the error is added to each file. Report the error of the current theory on all the files.\n\ File error is calculated as the mean square of the residual, averaged over all calculated points in the shifted tables.\n\ Total error is the mean square of the residual, averaged over all points considered in all files. """ total_error = 0 npoints = 0 view = self.parent_dataset.parent_application.current_view nfiles = len(self.parent_dataset.files) file_error = np.zeros(nfiles) file_points = np.zeros(nfiles, dtype=np.int) xth = [] yth = [] xmin = np.zeros((nfiles, view.n)) xmax = np.zeros((nfiles, view.n)) for i in range(nfiles): Filei = self.parent_dataset.files[i] xthi, ythi, success = view.view_proc( self.tables[Filei.file_name_short], Filei.file_parameters ) # We need to sort arrays for k in range(view.n): x = xthi[:, k] p = x.argsort() xthi[:, k] = xthi[p, k] ythi[:, k] = ythi[p, k] xth.append(xthi) yth.append(ythi) xmin[i, :] = np.amin(xthi, 0) xmax[i, :] = np.amax(xthi, 0) # Mwset, Mw, Tdict = self.get_cases() MwUnique = {} for o in self.Mwset: MwUnique[o] = [0.0, 0] for i in range(nfiles): for j in range(i + 1, nfiles): if self.Mw[i] != self.Mw[j]: continue for k in range(view.n): condition = (xth[j][:, k] > xmin[i, k]) * ( xth[j][:, k] < xmax[i, k] ) x = np.extract(condition, xth[j][:, k]) y = np.extract(condition, yth[j][:, k]) yinterp = interp(x, xth[i][:, k], yth[i][:, k]) error = np.sum((yinterp - y) ** 2) npt = len(y) total_error += error npoints += npt MwUnique[self.Mw[i]][0] += error MwUnique[self.Mw[i]][1] += npt if line == "": # table='''<table border="1" width="100%">''' # table+='''<tr><th>Mw</th><th>Mw2</th><th>phi</th><th>phi2</th><th>Error</th><th># Pts.</th></tr>''' table = [ [ "%-12s" % "Mw", "%-12s" % "Mw2", "%-12s" % "phi", "%-12s" % "phi2", "%-12s" % "Error", "%-12s" % "# Pts.", ], ] p = list(MwUnique.keys()) p.sort() for o in p: if MwUnique[o][1] > 0: # table+='''<tr><td>%4g</td><td>%4g</td><td>%4g</td><td>%4g</td><td>%8.3g</td><td>(%5d)</td></tr>'''%(o[0], o[1], o[2], o[3], MwUnique[o][0] / MwUnique[o][1], MwUnique[o][1]) table.append( [ "%-12.4g" % o[0], "%-12.4g" % o[1], "%-12.4g" % o[2], "%-12.4g" % o[3], "%-12.3g" % (MwUnique[o][0] / MwUnique[o][1]), "%-12d" % MwUnique[o][1], ] ) else: # table+='''<tr><td>%4g</td><td>%4g</td><td>%4g</td><td>%4g</td><td>%s</td><td>(%5d)</td></tr>'''%(o[0], o[1], o[2], o[3], "-", MwUnique[o][1]) table.append( [ "%-12.4g" % o[0], "%-12.4g" % o[1], "%-12.4g" % o[2], "%-12.4g" % o[3], "%-12s" % "-", "%-12d" % MwUnique[o][1], ] ) # table+='''</table><br>''' self.Qprint(table) if npoints > 0: total_error /= npoints else: total_error = 1e10 if line == "": self.Qprint("<b>TOTAL ERROR</b>: %12.5g (%6d)<br>" % (total_error, npoints)) return total_error
def draw_roc_auc(ax,clf,X,y,title): # 分割训练集与测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) n_classes = y.shape[1] # 1对多分类器 oneVsRestclassifier = OneVsRestClassifier(clf) y_score = oneVsRestclassifier.fit(X_train, y_train).predict_proba(X_test) # 计算ROC曲线和面积 fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = ms.roc_curve(y_test[:, i], y_score[:, i]) roc_auc[i] = ms.auc(fpr[i], tpr[i]) # 计算 micro-average ROC曲线和面积 fpr["micro"], tpr["micro"], _ = ms.roc_curve(y_test.ravel(), y_score.ravel()) roc_auc["micro"] = ms.auc(fpr["micro"], tpr["micro"]) lw = 2 # 计算 macro-average ROC曲线和面积,按照类别依次进行计算指标,求平均值 # 该方法不考虑类别的不均衡 # 首先汇总全部的fpr all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) # 插值计算所有的ROC曲线上的点 mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # 计算AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = ms.auc(fpr["macro"], tpr["macro"]) # 绘制ROC曲线 ax.plot(fpr["micro"], tpr["micro"], label='micro-average ROC曲线 (area = {0:0.2f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4) ax.plot(fpr["macro"], tpr["macro"], label='macro-average ROC曲线 (area = {0:0.2f})' ''.format(roc_auc["macro"]), color='navy', linestyle=':', linewidth=4) colors = cycle(['aqua', 'darkorange', 'cornflowerblue']) for i, color in zip(range(n_classes), colors): plt.plot(fpr[i], tpr[i], color=color, lw=lw, label='类{0}的ROC曲线 (area = {1:0.2f})' ''.format(i, roc_auc[i])) ax.plot([0, 1], [0, 1], 'k--', lw=lw) ax.set_xlim([0.0, 1.0]) ax.set_ylim([0.0, 1.05]) ax.set_xlabel('False Positive Rate') ax.set_ylabel('True Positive Rate') ax.set_title(title,fontproperties=myfont) ax.legend(loc="best",prop=myfont) plt.show() return
def do_fit(self, line): """Minimize the error""" self.fitting = True start_time = time.time() # view = self.parent_dataset.parent_application.current_view self.Qprint("""<hr><h2>Parameter Fitting</h2>""") self.Mwset, self.Mw, self.Tdict = self.get_cases() # Case by case, T by T, we optimize the overlap of all files with the # corresponding cases at the selected temperature Tdesired = self.parameters["T"].value # print (self.Tdict) self.aT_vs_T = {} for case in self.Tdict.keys(): self.Qprint( "<h3>Mw=%g Mw2=%g phi=%g phi2=%g</h3>" % (case[0], case[1], case[2], case[3]) ) Temps0 = [x[0] for x in self.Tdict[case]] Temps = np.abs(np.array([x[0] for x in self.Tdict[case]]) - Tdesired) Filenames = [x[2] for x in self.Tdict[case]] Files = [x[3] for x in self.Tdict[case]] indices = np.argsort(Temps) # first master curve is built from first file in indices list fname = Filenames[indices[0]] self.parent_dataset self.current_master_curve = np.array( Files[indices[0]].data_table.data, copy=True ) self.current_master_curve.view("i8,i8,i8").sort(order=["f1"], axis=0) self.shiftParameters[fname] = (0.0, 0.0) # table='''<table border="1" width="100%">''' # table+='''<tr><th>T</th><th>log(Hshift)</th><th>log(Vshift)</th></tr>''' table = [ ["%-12s" % "T", "%-12s" % "log(Hshift)", "%-12s" % "log(Vshift)"], ] # self.Qprint('%6s %11s %11s' % ('T', 'log(Hshift)', 'log(Vshift)')) indices = np.delete(indices, 0, None) for i in indices: XSHIFT = 0.0 YSHIFT = 0.0 if Temps[i] == 0: # Add to current_master_curve fname = Filenames[i] tt = np.array(Files[i].data_table.data, copy=True) self.current_master_curve = np.concatenate( (self.current_master_curve, tt), axis=0 ) self.current_master_curve = self.current_master_curve[ self.current_master_curve[:, 0].argsort() ] self.shiftParameters[fname] = (XSHIFT, YSHIFT) else: fname = Filenames[i] tt = np.array(Files[i].data_table.data, copy=True) # Calculate preliminary shift factors (horizontal and vertical) if any(Files[i].isshifted): initial_guess = [Files[i].xshift[0], Files[i].yshift[0]] else: # Calculate mid-point of tt indmiddle = int(len(tt[:, 0]) / 2) xmid = tt[indmiddle, 0] ymid = tt[indmiddle, 1] xmidinterp = interp( ymid, self.current_master_curve[:, 1], self.current_master_curve[:, 0], ) xshift = np.log10(xmidinterp / xmid) # minimize shift factors so the overlap is maximum initial_guess = [xshift] if self.parameters["vert"].value: initial_guess.append(0) self.current_table = tt self.current_file_min = fname res = minimize( self.func_fitTTS_one, initial_guess, method="Nelder-Mead" ) if not res["success"]: self.Qprint("Solution not found: %s" % res["message"]) return XSHIFT = res.x[0] if self.parameters["vert"].value: YSHIFT = res.x[1] else: YSHIFT = 0.0 # Add to current_master_curve # Set the theory file for that particular file ttcopy = np.array(tt, copy=True) ttcopy[:, 0] = ttcopy[:, 0] * np.power(10.0, XSHIFT) ttcopy[:, 1] = ttcopy[:, 1] * np.power(10.0, YSHIFT) ttcopy[:, 2] = ttcopy[:, 2] * np.power(10.0, YSHIFT) self.current_master_curve = np.concatenate( (self.current_master_curve, ttcopy), axis=0 ) self.current_master_curve = self.current_master_curve[ self.current_master_curve[:, 0].argsort() ] self.shiftParameters[fname] = (XSHIFT, YSHIFT) # Print final table of T and shift factors indTsorted = sorted(range(len(Temps0)), key=lambda k: Temps0[k]) self.aT_vs_T[case[0]] = [] # for Arrhenius activaiton Energy for i in indTsorted: fname = Filenames[i] sparam = self.shiftParameters[fname] # table+='''<tr><td>%6.3g</td><td>%11.3g</td><td>%11.3g</td></tr>'''%(Temps0[i], sparam[0], sparam[1]) table.append( [ "%-12.3g" % Temps0[i], "%-12.3g" % sparam[0], "%-12.3g" % sparam[1], ] ) # self.Qprint('%6.3g %11.3g %11.3g' % (Temps0[i], sparam[0], sparam[1])) self.aT_vs_T[case[0]].append((sparam[0], Temps0[i])) self.Qprint(table) self.fitting = False self.do_calculate(line, timing=False) self.Qprint( """<i>---Fitted in %.3g seconds---</i><br>""" % (time.time() - start_time) )
b.insert(1, 1) b.insert(2, 2) b.insert(3, 3) b.insert(4, 4) df1 = pd.DataFrame(df1.iloc[:, b]) df1 = df1.interpolate() #To interpolate #Demonstrate working of extrapolation x = [101, 102, 103, 104, 105, 106, 107, 109, 111, 113] y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1] f = interpolate.interp1d(x, y, fill_value='extrapolate') xnew = [95, 96, 97, 98, 99, 100, 101, 114, 115, 117] ynew = f(xnew) # use interpolation function returned by `interp1d` interp([114, 100, 110, 112, 108], x, y) #Application of extrapolation p = np.size(df1.iloc[0, :]) for i in range(5, p): n = df1.iloc[:, i].isnull().sum() if (n > 0 and n < 5642): check2 = interpolate.interp1d(df1.iloc[n:, 4], df1.iloc[n:, i], fill_value='extrapolate') check3 = check2(df1.iloc[:n, 4]) df1.iloc[:n, i] = check3 #Visualize to detect relationship df1.hist() #shows frequency of the values of every feature plt.show()
def plotSepta(ax): from scipy import interp # positions = [511.95, 902.95, 1293.95, 1684.95 , 2075.95] # Xshift = math.sin(LindaAngle) *312/2 # Yshift = math.cos(LindaAngle)*312/2 #ZSs ZSangle = math.degrees(1.414490E-3) r1 = patches.Rectangle((355.96, 6.79), 313, 0.020, angle=-ZSangle, color=septaColor) #, label = 'septa' r2 = patches.Rectangle((746.95, 6.241), 313, 0.020, angle=-ZSangle, color=septaColor) r3 = patches.Rectangle((1137.95, 5.688), 313, 0.020, angle=-ZSangle, color=septaColor) r4 = patches.Rectangle((1528.95, 5.1349), 313, 0.020, angle=-ZSangle, color=septaColor) r5 = patches.Rectangle((1919.95, 4.582), 313, 0.020, angle=-ZSangle, color=septaColor) ax.add_patch(r1) ax.add_patch(r2) ax.add_patch(r3) ax.add_patch(r4) ax.add_patch(r5) #TPST #TPST = [4792.27, 2.77] TPSTangle = math.degrees(0.533948362E-03) # MADXoffset = 1665.4231 length = 215 #214 width1 = 0.46 width2 = 0.52 Zcorner1 = 4684.768 Xcorner1 = 3.760495 Zcorner2 = 4771.96852 Xcorner2 = 3.77706 # r6 = patches.Rectangle((4685.26,3.94), 214 , 0.46, angle= TPSTangle, color = septaColor, alpha = 0.75) # ax.add_patch(r6) r1 = patches.Rectangle((Zcorner1, Xcorner1), length, width1, angle=TPSTangle, label='septa', color=septaColor) ax.add_patch(r1) r2 = patches.Rectangle((Zcorner2, Xcorner2), length - (Zcorner2 - Zcorner1), width2, angle=TPSTangle, color=septaColor) ax.add_patch(r2) #MST MADXoffset = 1665.4231 Zs = [5093.02, 5416.42, 5739.82] width = 0.414 length = 240 point1 = [] point2 = [] #Defines the baselines point1.append([100 * (1715.1633 - MADXoffset), 0.1 * 39.41577]) point2.append([100 * (1717.5433 - MADXoffset), 0.1 * 40.96990]) point1.append([100 * (1718.3973 - MADXoffset), 0.1 * 41.52756]) point2.append([100 * (1720.7773 - MADXoffset), 0.1 * 43.08169]) point1.append([100 * (1721.6313 - MADXoffset), 0.1 * 43.63934]) point2.append([100 * (1724.0113 - MADXoffset), 0.1 * 45.19347]) MSTangle = math.degrees(0.65299570E-03) for i in range(len(Zs)): z = np.zeros(2) x = np.zeros(2) z[0] = point1[i][0] z[1] = point2[i][0] x[0] = point1[i][1] x[1] = point2[i][1] # f = interpolate.interp1d(z, x) newZ = Zs[i] - math.cos(MSTangle) * length / 2 newX = interp(newZ, z, x) r7 = patches.Rectangle((newZ, newX), length, width, angle=MSTangle, color=septaColor, alpha=0.75) ax.add_patch(r7) # "MSTangle = math.degrees(0.65299570E-03) # r7 = patches.Rectangle((4973.016812,4.079), length,width, angle= MSTangle, color = septaColor,alpha = 0.75) # r8 = patches.Rectangle((5296.416982,4.240), length,width, angle= MSTangle, color = septaColor,alpha = 0.75) # r9 = patches.Rectangle((5619.816971,4.401), length,width, angle= MSTangle, color = septaColor,alpha = 0.75) # ax.add_patch(r7) # ax.add_patch(r8) # ax.add_patch(r9) #MSE Zs = [6838.29, 7161.69, 7485.09, 7808.49, 8131.89] length = 241.32 width = 1.72 point1 = [] point2 = [] #Defines the baselines point1.append([100 * (1732.616 - MADXoffset), 0.1 * 48.26604]) point2.append([100 * (1734.996 - MADXoffset), 0.1 * 47.46876]) point1.append([100 * (1735.85 - MADXoffset), 0.1 * 47.89989]) point2.append([100 * (1738.23 - MADXoffset), 0.1 * 50.14024]) point1.append([100 * (1739.084 - MADXoffset), 0.1 * 51.30396]) point2.append([100 * (1741.464 - MADXoffset), 0.1 * 56.57150]) point1.append([100 * (1742.318 - MADXoffset), 0.1 * 59.18803]) point2.append([100 * (1744.698 - MADXoffset), 0.1 * 67.48276]) point1.append([100 * (1745.552 - MADXoffset), 0.1 * 70.83187]) point2.append([100 * (1747.932 - MADXoffset), 0.1 * 82.16424]) for i in range(len(Zs)): z = np.zeros(2) x = np.zeros(2) z[0] = point1[i][0] z[1] = point2[i][0] x[0] = point1[i][1] x[1] = point2[i][1] # MSEangle = math.atan((x[1] - x[0]) / (z[1] - z[0])) # print MSEangle newZ = Zs[i] - math.cos(MSTangle) * length / 2 newX = interp(newZ, z, x) r7 = patches.Rectangle((newZ, newX), length, width, angle=math.degrees(MSEangle), color=septaColor, alpha=0.75) ax.add_patch(r7)
def train_model(self): """ train and estimate model draw ROC curves of train and test :return: train_y, train_pred_y, test_y, test_pred_y """ train_sample_X, train_sample_y, test_X, test_y = self.get_data( ) # generate formed train and test data cv_data = self.cv.split( train_sample_X, train_sample_y) # split train data to train and validation data tprs = [] # list for saving TP rates in each cv aucs = [] # list for saving aucs in each cv mean_fpr = np.linspace(0, 1, 100) # mean FP rates fig, ax = plt.subplots() # initialize plt for i, (train, valid) in enumerate(cv_data): # 5 fold training of model_lr self.clf.fit(train_sample_X[train], train_sample_y[train]) # fit model using train data # plot ROC viz = metrics.plot_roc_curve(self.clf, train_sample_X[valid], train_sample_y[valid], name='ROC fold {}'.format(i), alpha=0.3, lw=1, ax=ax) interp_tpr = interp(mean_fpr, viz.fpr, viz.tpr) # get TP rates and do interp interp_tpr[0] = 0.0 tprs.append(interp_tpr) # add new interp_tpr to trprs list aucs.append(viz.roc_auc) # add viz.roc_auc to aucs list # plot ROC of test data metrics.plot_roc_curve(self.clf, test_X, test_y, name='ROC test', alpha=0.8, lw=1, color='green', ax=ax) ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8) # draw mean auc of 5 cv train mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = metrics.auc(mean_fpr, mean_tpr) std_auc = np.std(aucs) ax.plot(mean_fpr, mean_tpr, color='b', label=r'Mean ROC of Train (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc), lw=2, alpha=.8) # draw confident interval std_tpr = np.std(tprs, axis=0) tprs_upper = np.minimum(mean_tpr + std_tpr, 1) # get upper bound tprs_lower = np.maximum(mean_tpr - std_tpr, 0) # get lower bound ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2, label=r'$\pm$ 1 std. dev.') ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05], title="ROC Curve") ax.legend(loc="lower right") plt.savefig('res_fig_knn/' + self.model_name + '.png') print( r'5 Cross Validation Mean AUC: %0.2f, Standard Deviation is %0.2f' % (mean_auc, std_auc)) # train with all train data and compute the train and test accuracy respectively start = datetime.datetime.now() # record start time self.clf.fit(train_sample_X, train_sample_y) # fit all train data test_pred_y = self.clf.predict(test_X) # predict test data end = datetime.datetime.now() # record end time print('Fit Time:') # calculate time cost print(end - start) dump(self.clf, 'model_knn/' + self.model_name + '.joblib') # save trained model train_acc = self.clf.score(self.train_X, self.train_y) # calculate train accuracy test_acc = self.clf.score(test_X, test_y) # calculate test accuracy print('Train Accuracy is %0.2f, Test Accuracy is %0.2f' % (train_acc, test_acc)) train_pred_y = self.clf.predict(self.train_X) # train data prediction return self.train_y, train_pred_y, test_y, test_pred_y
__print(model_name + 'all_check_test: {}'.format(test_negative_samples + test_positive_samples)) trained_model = get_model(model_i) trained_model.fit(X_train, y_train) """ ROC """ viz = plot_roc_curve(trained_model, X_test, y_test, name='ROC fold {}'.format(k_iteration), alpha=0.3, lw=1, ax=ax) interp_tpr = interp(mean_fpr, viz.fpr, viz.tpr) interp_tpr[0] = 0.0 tprs.append(interp_tpr) aucs.append(viz.roc_auc) y_pred_train = trained_model.predict(X_train) y_pred_test = trained_model.predict(X_test) accuracy_train = accuracy_score(y_train, y_pred_train) accuracy_test = accuracy_score(y_test, y_pred_test) TN, FP, FN, TP = confusion_matrix(y_test, y_pred_test).ravel() __print(model_name + 'train_acc: {}'.format(accuracy_train)) __print(model_name + 'test_acc: {}'.format(accuracy_test)) __print(model_name + 'TN:{}'.format(TN)) __print(model_name + 'FP:{}'.format(FP)) __print(model_name + 'FN:{}'.format(FN))
def plot_roc_curve(testY, y_prob, label_to_class): lw = 2 fpr = dict() tpr = dict() roc_auc = dict() for i in range(label_to_class): fpr[i], tpr[i], _ = roc_curve(testY[:, i], y_prob[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(testY.ravel(), y_prob.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) all_fpr = np.unique(np.concatenate([fpr[i] for i in range(label_to_class)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(label_to_class): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= label_to_class fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) # Plot all ROC curves plt.figure() plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4) # plt.plot(fpr["macro"], tpr["macro"], #label='macro-average ROC curve (area = {0:0.2f})' # ''.format(roc_auc["macro"]), #color='navy', linestyle=':', linewidth=4) colors = cycle([ 'yellowgreen', 'darkorange', 'cornflowerblue', 'green', 'red', 'brown', 'gold' ]) labels = mlb.classes_ for (i, color, label) in zip(range(label_to_class), colors, labels): plt.plot(fpr[i], tpr[i], color=color, lw=lw, label='ROC curve of class {0} (area = {1:0.2f})' ''.format(label, roc_auc[i])) plt.plot([0, 1], [0, 1], 'k--', lw=lw) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') #plt.title('Roc Curve for each class') plt.legend(loc="lower right") plt.grid() plt.savefig('roc_smallvgg_2k.png') return plt
def AUCplot(spec_model, over_model, spec_xtest, over_xtest, yspec, yover, spec_rocs, over_rocs, spec_cals, over_cals, k, K): # Gather Probabilities from Models spec_model.eval() # This turns off BatchNorm and Dropout, as will be done at deployment over_model.eval() # This turns off BatchNorm and Dropout, as will be done at deployment probspec, _ = spec_model(spec_xtest) probover, _ = over_model(over_xtest) spec_model.train() #This turns BatchNorm and Dropout back on over_model.train() #This turns BatchNorm and Dropout back on # Generate ROC Curves fpr_spec, tpr_spec, thresholds_spec = roc_curve(yspec.data.cpu().numpy(), probspec.data.cpu().numpy()) fpr_over, tpr_over, thresholds_over = roc_curve(yover.data.cpu().numpy(), probover.data.cpu().numpy()) spec_rocs.append([fpr_spec, tpr_spec]) over_rocs.append([fpr_over, tpr_over]) # Generate Calibration Curves N = 5 strategy = 'uniform' # These 4 lines are required to create the K calibration curves and find the mean w/ the +/-1 std region #spec_cal = calibration_curve(yspec.data.cpu().numpy(), probspec.data.cpu().numpy(), n_bins=N, strategy=strategy) #over_cal = calibration_curve(yover.data.cpu().numpy(), probover.data.cpu().numpy(), n_bins=N, strategy=strategy) #spec_cals.append(spec_cal) #over_cals.append(over_cal) # These 2 lines are required to create the single calibration curve comprised from all datapoints spec_cals.append([yspec.data.cpu().numpy(), probspec.data.cpu().numpy()]) over_cals.append([yover.data.cpu().numpy(), probover.data.cpu().numpy()]) # Compute AUC Scores AUCspec = roc_auc_score(yspec.data.cpu().numpy(), probspec.data.cpu().numpy()) AUCover = roc_auc_score(yover.data.cpu().numpy(), probover.data.cpu().numpy()) # Plot k-th Model Performance (ROC) plt.figure(0) plt.clf() plt.plot(fpr_spec, tpr_spec, label = f'Specific = {AUCspec:.2f}')#, alpha = 0.2) plt.plot(fpr_over, tpr_over, label = f'Overall = {AUCover:.2f}')#, alpha = 0.2) if k == 0: plt.plot([0,1],[0,1],'k-', alpha = 0.2) plt.grid(alpha=0.2) plt.xlim(0,1) plt.ylim(0,1) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(title='AUC') plt.savefig(f'ROCs/ROC_{k}.png') if (k+1) == K: # Plot Mean Model Performance (ROC) mean_fpr = np.linspace(0,1,10000) mean_spec_roc = [] for ROC in spec_rocs: mean_spec_tpr = interp(mean_fpr, ROC[0], ROC[1]) mean_spec_tpr[0] = 0.0 mean_spec_roc.append(mean_spec_tpr) mean_spec_roc = np.array(mean_spec_roc) std_tpr = np.std(mean_spec_roc, axis=0) mean_spec_roc = np.mean(mean_spec_roc, axis=0) mean_spec_roc[-1] = 1.0 spec_top = np.minimum(mean_spec_roc + std_tpr, 1) spec_bot = np.maximum(mean_spec_roc - std_tpr, 0) #pdb.set_trace() #pdb.set_trace() spec_mean_auc = trapz(mean_spec_roc, mean_fpr) aucs = [trapz(ROC[1], ROC[0]) for ROC in spec_rocs] spec_std_auc = np.std(aucs) spec_fig, spec_ax = plt.subplots() spec_ax.plot(mean_fpr, mean_spec_roc, color ='C0', label=f'Mean ROC, AUC = {spec_mean_auc:.2f} $\pm$ {spec_std_auc:.2f})') for ROC in spec_rocs: spec_ax.plot(ROC[0], ROC[1], alpha=0.2) spec_ax.fill_between(mean_fpr, spec_bot, spec_top, color='C0', alpha=.1, label=f'$\pm$ 1 std') spec_ax.grid(alpha=0.2) spec_ax.legend() spec_ax.set_xlabel('False Positive Rate') spec_ax.set_ylabel('True Positive Rate') spec_ax.title.set_text("Disease Specific Survival Model") plt.savefig('DSS_Mean_ROC.png', dpi = 230) mean_over_roc = [] for ROC in over_rocs: mean_over_tpr = interp(mean_fpr, ROC[0], ROC[1]) mean_over_tpr[0] = 0.0 mean_over_roc.append(mean_over_tpr) mean_over_roc = np.array(mean_over_roc) std_tpr = np.std(mean_over_roc, axis=0) mean_over_roc = np.mean(mean_over_roc, axis=0) mean_over_roc[-1] = 1.0 over_top = np.minimum(mean_over_roc + std_tpr, 1) over_bot = np.maximum(mean_over_roc - std_tpr, 0) over_mean_auc = trapz(mean_over_roc, mean_fpr) aucs = [trapz(ROC[1], ROC[0]) for ROC in spec_rocs] over_std_auc = np.std(aucs) over_fig, over_ax = plt.subplots() over_ax.plot(mean_fpr, mean_over_roc, color ='C0', label=f'Mean ROC, AUC = {over_mean_auc:.2f} $\pm$ {over_std_auc:.2f})') for ROC in over_rocs: over_ax.plot(ROC[0], ROC[1], alpha=0.2) over_ax.fill_between(mean_fpr, over_bot, over_top, color='C0', alpha=.1, label=f'$\pm$ 1 std') over_ax.grid(alpha=0.2) over_ax.legend() over_ax.set_xlabel('False Positive Rate') over_ax.set_ylabel('True Positive Rate') over_ax.title.set_text("Overall Survival Model") plt.savefig('Overall_Mean_ROC.png', dpi = 230) both_fig, both_ax = plt.subplots() both_ax.plot(mean_fpr, mean_spec_roc, color ='C0', label=f'DSS AUC = {spec_mean_auc:.2f} $\pm$ {spec_std_auc:.2f})') both_ax.fill_between(mean_fpr, spec_bot, spec_top, color='C0', alpha=.1)#, label=f'$\pm$ 1 DSS std') both_ax.plot(mean_fpr, mean_over_roc, color ='C1', label=f'OS AUC = {over_mean_auc:.2f} $\pm$ {over_std_auc:.2f})') both_ax.fill_between(mean_fpr, over_bot, over_top, color='C1', alpha=.1)# label=f'$\pm$ 1 OVR std') both_ax.grid(alpha=0.2) both_ax.legend(loc=4) both_ax.set_xlabel('False Positive Rate') both_ax.set_ylabel('True Positive Rate') both_ax.title.set_text("Receiver Operating Characteristic Curve") plt.savefig('Both_Mean_ROC.png', dpi = 230) # Calibration Plot both_calfig, both_calax = plt.subplots() ''' # This creates K calibration plots, and computes the mean (interpolated) with a +/- 1 std region both_calfig, both_calax = plt.subplots() mean_spec_cal = [] mean_over_cal = [] for CAL in spec_cals: interp_spec_cal = interp(mean_fpr, CAL[1], CAL[0]) mean_spec_cal.append(interp_spec_cal) std_spec_cal = np.std(np.array(mean_spec_cal), axis=0) mean_spec_cal = np.mean(np.array(mean_spec_cal), axis=0) spec_top = np.minimum(mean_spec_cal + std_spec_cal, 1) spec_bot = np.maximum(mean_spec_cal - std_spec_cal, 0) spec_mbs = [np.polyfit(CAL[1], CAL[0], 1) for CAL in spec_cals] spec_ms = [m[0] for m in spec_mbs] spec_bs = [b[1] for b in spec_mbs] spec_m_std = np.std(spec_ms) spec_b_std = np.std(spec_bs) spec_m_mean = np.mean(spec_ms) spec_b_mean = np.mean(spec_bs) both_calax.plot(mean_fpr, mean_spec_cal, label = 'Specific', color = 'C0') both_calax.fill_between(mean_fpr, spec_bot, spec_top, color='C0', alpha=.1)#, label=f'$\pm$ 1 DSS std') for CAL in over_cals: interp_over_cal = interp(mean_fpr, CAL[1], CAL[0]) mean_over_cal.append(interp_over_cal) std_over_cal = np.std(np.array(mean_over_cal), axis=0) mean_over_cal = np.mean(np.array(mean_over_cal), axis=0) over_top = np.minimum(mean_over_cal + std_over_cal, 1) over_bot = np.maximum(mean_over_cal - std_over_cal, 0) over_mbs = [np.polyfit(CAL[1], CAL[0], 1) for CAL in over_cals] over_ms = [m[0] for m in over_mbs] over_bs = [b[1] for b in over_mbs] over_m_std = np.std(over_ms) over_b_std = np.std(over_bs) over_m_mean = np.mean(over_ms) over_b_mean = np.mean(over_bs) print(np.abs(mean_over_cal-mean_spec_cal).mean()) both_calax.plot(mean_fpr, mean_over_cal, label = 'Overall', color = 'C1') both_calax.fill_between(mean_fpr, over_bot, over_top, color='C1', alpha=.1)#, label=f'$\pm$ 1 DSS std') both_calax.plot([0, 1], [0, 1], 'r--') both_calax.grid(alpha=0.2) both_calax.legend(loc=4) both_calax.set_xlabel('Predicted Probability') both_calax.set_ylabel('Observed Population') both_calax.title.set_text("Calibration Curve") plt.savefig("BothCalibration.png", dpi=230) ''' #pdb.set_trace() y_spec = np.concatenate([cal[0] for cal in spec_cals]) p_spec = np.concatenate([cal[1] for cal in spec_cals]) y_over = np.concatenate([cal[0] for cal in over_cals]) p_over = np.concatenate([cal[1] for cal in over_cals]) spec = calibration_curve(y_spec, p_spec, n_bins=N, strategy=strategy) over = calibration_curve(y_over, p_over, n_bins=N, strategy=strategy) spec_mb = np.polyfit(spec[1], spec[0], 1) over_mb = np.polyfit(over[1], over[0], 1) both_calax.plot(spec[1], spec[0], label=f'DSS, m = {spec_mb[0]:.2f}, b = {spec_mb[1]:.2f}', color='C0') both_calax.plot(over[1], over[0], label=f'OS, m = {over_mb[0]:.2f}, b = {over_mb[1]:.2f}', color='C1') both_calax.plot([0, 1], [0, 1], 'r--') both_calax.grid(alpha=0.2) both_calax.legend(loc=4) both_calax.set_xlabel('Predicted Probability') both_calax.set_ylabel('Observed Population') both_calax.title.set_text("Calibration Curve") plt.savefig("BothCalibration.png", dpi=230) print(f'\n\nGenerated Specific vs Overall ROC plot for test set #{k} using lastest models in ./ROCs/\n\n') return spec_rocs, over_rocs, spec_cals, over_cals
pipelines = [[ '{}-{}'.format(samplers[0][0], classifier[0]), make_pipeline(samplers[0][1], classifier[1]) ] for classifier in classifiers] fig = plt.figure() ax = fig.add_subplot(1, 1, 1) for name, pipeline in pipelines: mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) for train, test in cv.split(X, y): probas_ = pipeline.fit(X[train], y[train]).predict_proba(X[test]) fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) mean_tpr /= cv.get_n_splits(X, y) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, linestyle='--', label='{} (area = %0.2f)'.format(name) % mean_auc, lw=LW) plt.plot([0, 1], [0, 1], linestyle='--', lw=LW, color='k', label='Luck') # make nice plotting
def roc_curve_multiclass(y_true, y_pred, n_classes): from scipy import interp import matplotlib.pyplot as plt from itertools import cycle from sklearn.metrics import roc_curve, auc # Plot linewidth lw = 2 # Compute ROC curve and ROC area for each class fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_pred[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(y_true.ravel(), y_pred.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) # Compute macro-average ROC curve and ROC area # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) # Plot all ROC curves plt.figure(1) plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4) plt.plot(fpr["macro"], tpr["macro"], label='macro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["macro"]), color='navy', linestyle=':', linewidth=4) colors = cycle(['aqua', 'darkorange', 'cornflowerblue']) for i, color in zip(range(n_classes), colors): plt.plot(fpr[i], tpr[i], color=color, lw=lw, label='ROC curve of class {0} (area = {1:0.2f})' ''.format(i, roc_auc[i])) plt.plot([0, 1], [0, 1], 'k--', lw=lw) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title( 'Some extension of Receiver operating characteristic to multi-class') plt.legend(loc="lower right") plt.show()