def get_confidence_interval_bootstrap(self, alpha, repet = 4000): """Return the confidence interval computed in a bootsrap way. @param alpha: Confidence on the interval @param repet: Number of repetition @todo save the diff errors ? I norder to display otherwise """ diff_error = [] #Result from the complete set estimated_error = self.get_error_estimation() #Launch boostrap procedure #diff_error = [compute_boostrap_error() for i in range(repet)] diff_error = Parallel(n_jobs=-1)(delayed(bootstrap_helper)(self, estimated_error) \ for i in range(repet)) #Get the percentile value of the difference e_lower, e_upper = get_confidence_limits(diff_error, alpha) return estimated_error, max(0, estimated_error-e_upper), min(1, estimated_error-e_lower)
def confidence_interval_of_two_independant_eer(roc1, roc2, alpha, verbose=True): """Get the confidence interval of two independant eers'. The confidence interval of the EERS must already be computed. page 206 @param roc1: First system @param roc2: Second system @param alpha: Confidence interval """ assert roc1.estimated_eer and roc1.bootstraped_eers, \ "You must call get_confidence_interval before for roc1" assert roc2.estimated_eer and roc2.bootstraped_eers, \ "You must call get_confidence_interval before for roc2" e = np.array(roc1.bootstraped_eers) - np.array(roc2.bootstraped_eers) \ - (roc1.estimated_eer - roc2.estimated_eer ) e_L, e_U = get_confidence_limits(e, alpha) base = roc1.estimated_eer - roc2.estimated_eer if verbose: print "Comparison of two independant EER" print "=================================" print "Estimated difference EER1-EER2: %0.6f" % base print "Lower boundary: %0.6f" % ( base - e_U) print "Upper boundary: %0.6f" % ( base - e_L) return base, base - e_U, base - e_L, e
def compute_epc(self, nb_bootstrap=1000, confidence=0.05, alpha_min=0.00001, alpha_max=1, alpha_step=1000, nb_thresholds=1000): # Get initial result base_epc = EPC(devel=self.devel, test=self.test) base_epc.compute_epc(alpha_min, alpha_max, alpha_step, nb_thresholds) base_curve = base_epc.curve[:,1] alphas = base_epc.curve[:,0] bcurves = [] plt.plot(alphas, base_curve, linewidth=2, color='blue') bcurves = Parallel(n_jobs=-1, verbose=1)\ (delayed(bootstrap_helper)(self.devel, self.test, alpha_min, alpha_max, alpha_step, nb_thresholds) \ for i in range(nb_bootstrap)) # for b in range(nb_bootstrap): # bcurves.append(bootstrap_helper(self.devel, self.test)) bcurves = np.array(bcurves) for bcurve in bcurves: plt.plot(alphas, bcurve, linestyle='steps:', color='gray') diff = bcurves - base_curve # TODO get real s s = np.std(diff, axis=0) e = diff/s #Compute maximum absolute standard residual min_e = np.amin(e, axis=1) max_e = np.amax(e, axis=1) min_e.shape = (-1,1) max_e.shape = (-1,1) indicies1 = np.where( np.abs(min_e) < np.abs(max_e) ) indicies2 = np.where( np.abs(min_e) >= np.abs(max_e) ) w = np.empty( (e.shape[0],1) ) w[indicies1] = max_e[indicies1] w[indicies2] = min_e[indicies2] #Get ROC confidence delta_L, delta_U = get_confidence_limits(w, confidence) rU = base_curve - (delta_L * s) rL = base_curve - (delta_U * s) plt.plot(alphas, rU, linewidth=2, color='red') plt.plot(alphas, rL, linewidth=2, color='red') plt.xlabel('$\\alpha$') plt.ylabel('HTER')
def get_confidence_interval(self, alpha=0.05, repet=1000, angles=None): """Return the confidence interval of the ROC curve. The curves are displayed in the actual figure. The method also compute the EER interval. @todo: split the method in several parts @param repet: Number of repetition """ ####################### # Launch boostrapping # ####################### #Calculate the estimated ROC and its EER estimated_roc = self.get_roc().get_polar_representation() estimated_eer = estimated_roc.get_eer() self.estimated_eer = estimated_eer estimated_roc.shrink_angles(angles) #Launch computation in parralel res = Parallel(n_jobs=-1, verbose=1)(delayed(bootstrap_helper)(self, estimated_roc) \ for i in range(repet)) # Merge results tmp_bootstrap, tmp = zip(*res) del res #delete old results #Display the curves, compute the EER and free memory for bootstraped_roc in tmp_bootstrap: bootstraped_roc.tmpplot() # Display curve #Compute EER bootstraped_eers = [bootstraped_roc.get_eer() for bootstraped_roc in tmp_bootstrap] self.bootstraped_eers = bootstraped_eers del tmp_bootstrap # delete inutile roc curves ################################ ##### Compute EER interval ##### ################################ diff_eers = N.asarray(bootstraped_eers) - estimated_eer del bootstraped_eers # free memory lower, upper = get_confidence_limits(diff_eers, alpha) lower_eer = estimated_eer - upper upper_eer = estimated_eer - lower #Store diff_eer for offline interpretation self._diff_eers = diff_eers ################################ ##### Extract information ###### ################################ tmp = N.asarray(tmp) #Compute adjusted standard deviation theta = estimated_roc.get_raw_theta() # List of theta angles (the same for all) theta_min = N.min(theta) theta_max = N.max(theta) Nb = self.get_number_of_presentations() numerator = N.abs( (theta - theta_min) * (theta_max - theta)) denominator = ((theta_max - theta_min)**2)*(10**2)*Nb t = numerator/float(denominator + N.finfo(N.float).eps) s = N.sqrt(1/float(len(tmp)-1)*N.sum(tmp**2, axis=0) + t) assert s.shape[0] == len(theta) #Compute residual error e = tmp / s assert e.shape[0] == len(tmp) and e.shape[1] == len(theta) # Remove boundaries e = e[:, 1:-1] #Compute maximum absolute standard residual min_e = N.amin(e, axis=1) max_e = N.amax(e, axis=1) assert len(min_e) == len(max_e) == len(tmp) min_e.shape = (-1,1) max_e.shape = (-1,1) indicies1 = N.where( N.abs(min_e) < N.abs(max_e) ) indicies2 = N.where( N.abs(min_e) >= N.abs(max_e) ) w = N.empty( (len(tmp),1) ) w[indicies1] = max_e[indicies1] w[indicies2] = min_e[indicies2] #Get ROC confidence delta_L, delta_U = get_confidence_limits(w, alpha) assert len(w) == len (tmp) R = estimated_roc.get_raw_r() rU = R - (delta_L * s) rL = R - (delta_U * s) #plot boundaries PolarRocCurve.plot_boundary(estimated_roc.get_raw_theta(), rL) PolarRocCurve.plot_boundary(estimated_roc.get_raw_theta(), rU) estimated_roc.plot() #displayed here to be in front print "EER information" print "===============" print 'Estimated EER: %0.6f' % estimated_eer print 'Lower boundary: %0.6f' % lower_eer print 'Upper boundary: %0.6f' % upper_eer print return estimated_eer, (lower_eer, upper_eer)
def get_confidence_region(self, nb_iter=1000, alpha=0.05, angles=None): """Launch the computation of the confidence region of the two ROC curves using a parametric way. @param nb_iter : Number of bootstrap to do @param alpha: confidence @param angles: angles to use """ ############## # First step # ############## # Get estimated ROC curves for the two ROCs estimated_roc1 = self.roc1.get_roc().get_polar_representation() estimated_roc1.shrink_angles(angles) estimated_roc2 = self.roc2.get_roc().get_polar_representation() estimated_roc2.shrink_angles(angles) if angles is None: angles = estimated_roc1.get_raw_theta() # Compute difference for each angle original_diff = PolarRocCurve.substract_curves(\ estimated_roc1, estimated_roc2) ############################# # Repeat for each bootstrap # ############################# diffs = Parallel(n_jobs=-1, verbose=True)( delayed(bootstrap_helper)(self.roc1, self.roc2, estimated_roc1, estimated_roc2, angles) for iteration in range(nb_iter)) # Get the differences diffs = N.asarray(diffs) diffs1b1 = diffs[:,0] diffs2b2 = diffs[:,1] diffs1b2b = diffs[:,2] del diffs ################### # Compute results # ################### #Get t min_angle = N.min(angles) max_angle = N.max(angles) N1 = self.roc1.get_number_of_presentations() N2 = self.roc2.get_number_of_presentations() t = N.abs((angles - min_angle)*(max_angle - angles))/ \ ((max_angle - min_angle)*(max_angle - min_angle) * 100 * (N1 + N2)) #Get s tmp = N.mean(diffs1b2b, axis=0) tmp = N.square(diffs1b2b - tmp) tmp = N.sum(tmp, axis=0) tmp = tmp/float(nb_iter-1) tmp = tmp + t s = N.sqrt(tmp) del tmp #get replicated normalized difference e = (diffs1b1 - diffs2b2)/s #Compute maximum absolute standard residual #Pass extrem values (problem with them ...) min_e = N.amin(e[:,1:-1], axis=1) max_e = N.amax(e[:,1:-1], axis=1) min_e.shape = (-1,1) max_e.shape = (-1,1) indicies1 = N.where( N.abs(min_e) < N.abs(max_e) ) indicies2 = N.where( N.abs(min_e) >= N.abs(max_e) ) w = N.empty( (e.shape[0],1) ) w[indicies1] = max_e[indicies1] w[indicies2] = min_e[indicies2] #Get percentiles delta_L, delta_U = get_confidence_limits(w, alpha) #Get confidence interval R = original_diff rU = R - (delta_L * s) rL = R - (delta_U * s) ################## # Display result # ################## plt.hlines([0], N.pi/2, N.pi) for diff in diffs1b2b: plt.plot(angles, diff, linestyle="steps:", color='gray') plt.plot(angles, rU, linewidth=3, color='red') plt.plot(angles, rL, linewidth=3, color='red') plt.plot(angles, original_diff, linewidth=2, color='blue') plt.ylim((-0.45,0.45)) plt.yticks(N.linspace(-0.4,0.4,5)) plt.xlim((N.pi/2,N.pi)) plt.xticks(N.linspace(N.pi/2, N.pi, 5), [r'$\pi/2$', r'', r'$3\pi/4$', '', r'$\pi$']) plt.xlabel(r'$\theta$')#todo set latex notation plt.ylabel(r'$\hat{r}_{\theta}^{(1)}-\hat{r}_{\theta}^{(2)}$') plt.gca().invert_xaxis()