def find_sigma_margin(best_data, worst_data, best_sim, worst_sim, detection_threshold): # find the precision, recall and F-score for different margins using the best and worst expected significances sigma_thresh=np.arange(0.,100.,1) best_plot_data=[] worst_plot_data=[] for sigma in sigma_thresh: best_tp=len([a for a in best_sim if a>=(detection_threshold+sigma)]) best_fn=len([a for a in best_sim if a<(detection_threshold+sigma)]) best_fp=len([a for a in best_data if a>=(detection_threshold+sigma)]) best_tn=len([a for a in best_data if a<(detection_threshold+sigma)]) worst_tp=len([a for a in worst_sim if a>=(detection_threshold+sigma)]) worst_fn=len([a for a in worst_sim if a<(detection_threshold+sigma)]) worst_fp=len([a for a in worst_data if a>=(detection_threshold+sigma)]) worst_tn=len([a for a in worst_data if a<(detection_threshold+sigma)]) best_precision,best_recall = generic_tools.precision_and_recall(best_tp,best_fp,best_fn) worst_precision,worst_recall = generic_tools.precision_and_recall(worst_tp,worst_fp,worst_fn) if best_precision==0 or best_recall==0: best_plot_data.append([sigma,best_precision,best_recall,0]) else: best_plot_data.append([sigma,best_precision,best_recall,(2*best_precision*best_recall)/(best_precision+best_recall)]) if worst_precision==0 or best_recall==0: worst_plot_data.append([sigma,worst_precision,worst_recall,0]) else: worst_plot_data.append([sigma,worst_precision,worst_recall,(2*worst_precision*worst_recall)/(worst_precision+worst_recall)]) return best_plot_data, worst_plot_data
def tests(args): # Test multiple input precision and recall values to check out if we are meeting and exceeding the input parameters xi,yi,zi1,zi2, data, xvals, yvals, xstable, ystable, precis, recall = args # Find the combination of x and y which is closest to the two thresholds combinations=[[xi[a][b],yi[a][b],zi1[a][b],zi2[a][b]] for a in range(len(zi1)) for b in range(len(zi1[0])) if zi1[a][b]>=precis] ID=np.array([((a[2]-precis)**2. + (a[3]-recall)**2.) for a in combinations]).argmin() above_thresh_sigma=combinations[ID] # Find the thresholds for these sigmas, by fitting the observed data with a Gaussian model sigcutx,paramx,range_x = generic_tools.get_sigcut([float(x[0]) for x in data],above_thresh_sigma[0]) sigcuty,paramy,range_y = generic_tools.get_sigcut([float(x[1]) for x in data],above_thresh_sigma[1]) # Count up the different numbers of tp, fp, fn fp=len([z for z in range(len(xstable)) if (xstable[z]>sigcutx and ystable[z]>sigcuty)]) # False Positive tp=len([z for z in range(len(xvals)) if (xvals[z]>sigcutx and yvals[z]>sigcuty)]) # True Positive fn=len([z for z in range(len(xvals)) if (xvals[z]<sigcutx or yvals[z]<sigcuty)]) # False Negative # Use these values to calculate the precision and recall values obtained with the trained threshold. # If the test is successful, the outputs should meet or exceed the input parameters. results1, results2 = generic_tools.precision_and_recall(tp,fp,fn) return [precis, recall, results1, results2]
def tests(args): # Test multiple input precision and recall values to check out if we are meeting and exceeding the input parameters xi, yi, zi1, zi2, data, xvals, yvals, xstable, ystable, precis, recall = args # Find the combination of x and y which is closest to the two thresholds combinations = [[xi[a][b], yi[a][b], zi1[a][b], zi2[a][b]] for a in range(len(zi1)) for b in range(len(zi1[0])) if zi1[a][b] >= precis] ID = np.array([((a[2] - precis)**2. + (a[3] - recall)**2.) for a in combinations]).argmin() above_thresh_sigma = combinations[ID] # Find the thresholds for these sigmas, by fitting the observed data with a Gaussian model sigcutx, paramx, range_x = generic_tools.get_sigcut( [float(x[0]) for x in data], above_thresh_sigma[0]) sigcuty, paramy, range_y = generic_tools.get_sigcut( [float(x[1]) for x in data], above_thresh_sigma[1]) # Count up the different numbers of tp, fp, fn fp = len([ z for z in range(len(xstable)) if (xstable[z] > sigcutx and ystable[z] > sigcuty) ]) # False Positive tp = len([ z for z in range(len(xvals)) if (xvals[z] > sigcutx and yvals[z] > sigcuty) ]) # True Positive fn = len([ z for z in range(len(xvals)) if (xvals[z] < sigcutx or yvals[z] < sigcuty) ]) # False Negative # Use these values to calculate the precision and recall values obtained with the trained threshold. # If the test is successful, the outputs should meet or exceed the input parameters. results1, results2 = generic_tools.precision_and_recall(tp, fp, fn) return [precis, recall, results1, results2]
def trial_data(args): # Find the precision and recall for a given pair of thresholds data, sigma1, sigma2 = args # Sort data into transient and non-transient xvals = [float(x[0]) for x in data if float(x[-1]) != 0.] yvals = [float(x[1]) for x in data if float(x[-1]) != 0.] xstable = [float(x[0]) for x in data if float(x[-1]) == 0.] ystable = [float(x[1]) for x in data if float(x[-1]) == 0.] # Find the thresholds for a given sigma, by fitting data with a Gaussian model sigcutx, paramx, range_x = generic_tools.get_sigcut( [float(x[0]) for x in data if float(x[-1]) == 0.], sigma1) sigcuty, paramy, range_y = generic_tools.get_sigcut( [float(x[1]) for x in data if float(x[-1]) == 0.], sigma2) # Count up the different numbers of tn, tp, fp, fn fp = len([ z for z in range(len(xstable)) if (xstable[z] > sigcutx and ystable[z] > sigcuty) ]) # False Positive tn = len([ z for z in range(len(xstable)) if (xstable[z] < sigcutx or ystable[z] < sigcuty) ]) # True Negative tp = len([ z for z in range(len(xvals)) if (xvals[z] > sigcutx and yvals[z] > sigcuty) ]) # True Positive fn = len([ z for z in range(len(xvals)) if (xvals[z] < sigcutx or yvals[z] < sigcuty) ]) # False Negative # Use these values to calculate the precision and recall values precision, recall = generic_tools.precision_and_recall(tp, fp, fn) return [sigma1, sigma2, precision, recall]
def trial_data(args): # Find the precision and recall for a given pair of thresholds data,sigma1,sigma2 = args # Sort data into transient and non-transient xvals = [float(x[0]) for x in data if float(x[-1]) != 0.] yvals = [float(x[1]) for x in data if float(x[-1]) != 0.] xstable = [float(x[0]) for x in data if float(x[-1]) == 0.] ystable = [float(x[1]) for x in data if float(x[-1]) == 0.] # Find the thresholds for a given sigma, by fitting data with a Gaussian model sigcutx,paramx,range_x = generic_tools.get_sigcut([float(x[0]) for x in data if float(x[-1]) == 0.],sigma1) sigcuty,paramy,range_y = generic_tools.get_sigcut([float(x[1]) for x in data if float(x[-1]) == 0.],sigma2) # Count up the different numbers of tn, tp, fp, fn fp=len([z for z in range(len(xstable)) if (xstable[z]>sigcutx and ystable[z]>sigcuty)]) # False Positive tn=len([z for z in range(len(xstable)) if (xstable[z]<sigcutx or ystable[z]<sigcuty)]) # True Negative tp=len([z for z in range(len(xvals)) if (xvals[z]>sigcutx and yvals[z]>sigcuty)]) # True Positive fn=len([z for z in range(len(xvals)) if (xvals[z]<sigcutx or yvals[z]<sigcuty)]) # False Negative # Use these values to calculate the precision and recall values precision, recall = generic_tools.precision_and_recall(tp,fp,fn) print sigma1, sigma2, precision, recall return [sigma1, sigma2, precision, recall]
def find_sigma_margin(best_data, worst_data, detection_threshold): # find the precision, recall and F-score for different margins using the best and worst expected significances sigma_thresh = np.arange(0., 100., 1) best_plot_data = [] worst_plot_data = [] for sigma in sigma_thresh: best_tp = len([ best_data[a, 0] for a in range(len(best_data)) if best_data[a, 0] >= (detection_threshold + sigma) if best_data[a, 1] == 1 ]) best_fn = len([ best_data[a, 0] for a in range(len(best_data)) if best_data[a, 0] < (detection_threshold + sigma) if best_data[a, 1] == 1 ]) best_fp = len([ best_data[a, 0] for a in range(len(best_data)) if best_data[a, 0] >= (detection_threshold + sigma) if best_data[a, 1] == 0 ]) best_tn = len([ best_data[a, 0] for a in range(len(best_data)) if best_data[a, 0] < (detection_threshold + sigma) if best_data[a, 1] == 0 ]) worst_tp = len([ worst_data[a, 0] for a in range(len(worst_data)) if worst_data[a, 0] >= (detection_threshold + sigma) if worst_data[a, 1] == 1 ]) worst_fn = len([ worst_data[a, 0] for a in range(len(worst_data)) if worst_data[a, 0] < (detection_threshold + sigma) if worst_data[a, 1] == 1 ]) worst_fp = len([ worst_data[a, 0] for a in range(len(worst_data)) if worst_data[a, 0] >= (detection_threshold + sigma) if worst_data[a, 1] == 0 ]) worst_tn = len([ worst_data[a, 0] for a in range(len(worst_data)) if worst_data[a, 0] < (detection_threshold + sigma) if worst_data[a, 1] == 0 ]) best_precision, best_recall = generic_tools.precision_and_recall( best_tp, best_fp, best_fn) worst_precision, worst_recall = generic_tools.precision_and_recall( worst_tp, worst_fp, worst_fn) if best_precision == 0 or best_recall == 0: best_plot_data.append([sigma, best_precision, best_recall, 0]) else: best_plot_data.append([ sigma, best_precision, best_recall, (2 * best_precision * best_recall) / (best_precision + best_recall) ]) if worst_precision == 0 or best_recall == 0: worst_plot_data.append([sigma, worst_precision, worst_recall, 0]) else: worst_plot_data.append([ sigma, worst_precision, worst_recall, (2 * worst_precision * worst_recall) / (worst_precision + worst_recall) ]) Fbest = max([x[3] for x in best_plot_data]) Fworst = max([x[3] for x in worst_plot_data]) sigBest = [x[0] for x in best_plot_data if x[3] == Fbest][0] sigWorst = [x[0] for x in worst_plot_data if x[3] == Fworst][0] return best_plot_data, worst_plot_data, sigBest, sigWorst
# make second array for the diagnostic plot: [eta_nu, V_nu, maxflx_nu, flxrat_nu, nu] data2=[[variables[n][0],float(variables[n][1]),float(variables[n][2]),float(variables[n][3]),float(variables[n][4]),variables[n][5]] for n in range(len(variables)) if float(variables[n][1]) > 0 if float(variables[n][2]) > 0] # Create the diagnostic plot plotting_tools.create_diagnostic(data2,0,0,frequencies,'') # Setup data to make TP/FP/TN/FN plots # Create arrays containing the data to plot fp=[[z[0],np.log10(float(z[1])),np.log10(float(z[2])),'FP'] for z in stable if (float(z[1])>=10.**sigcutx and float(z[2])>=10.**sigcuty)] # False Positive tn=[[z[0],np.log10(float(z[1])),np.log10(float(z[2])),'TN'] for z in stable if (float(z[1])<10.**sigcutx or float(z[2])<10.**sigcuty)] # True Negative tp=[[z[0],np.log10(float(z[1])),np.log10(float(z[2])),'TP'] for z in variable if (float(z[1])>=10.**sigcutx and float(z[2])>=10.**sigcuty)] # True Positive fn=[[z[0],np.log10(float(z[1])),np.log10(float(z[2])),'FN'] for z in variable if (float(z[1])<10.**sigcutx or float(z[2])<10.**sigcuty)] # False Negative data3=fp+tn+tp+fn # Print out the actual precision and recall using the training data. precision, recall = generic_tools.precision_and_recall(len(tp),len(fp),len(fn)) print "Precision: "+str(precision)+", Recall: "+str(recall) # Get the different frequencies in the dataset frequencies = generic_tools.get_frequencies(data3) # Create the scatter_hist plot plotting_tools.create_scatter_hist(data3,sigcutx,sigcuty,paramx,paramy,range_x,range_y,'_ADresults',frequencies) # Create arrays containing the data to plot fp=[[z[0],float(z[1]),float(z[2]),float(z[3]),float(z[4]),'FP'] for z in stable if (float(z[1])>=10.**sigcutx and float(z[2])>=10.**sigcuty)] # False Positive tn=[[z[0],float(z[1]),float(z[2]),float(z[3]),float(z[4]),'TN'] for z in stable if (float(z[1])<10.**sigcutx or float(z[2])<10.**sigcuty)] # True Negative tp=[[z[0],float(z[1]),float(z[2]),float(z[3]),float(z[4]),'TP'] for z in variable if (float(z[1])>=10.**sigcutx and float(z[2])>=10.**sigcuty)] # True Positive fn=[[z[0],float(z[1]),float(z[2]),float(z[3]),float(z[4]),'FN'] for z in variable if (float(z[1])<10.**sigcutx or float(z[2])<10.**sigcuty)] # False Negative data4=fp+tn+tp+fn