def test_roc_points(self): """roc_points should calculate the points for a Receiver Operating Characteristics curve """ #The set up here is a bit elaborate since I generate the test datasets #based on the values we need in the confusion matrix. #I test the intermediate results though, so any errors should be due #to the actual function, not the test tn_obs = 0 tn_exp = 0 fp_obs = 1 fp_exp = 0 tp_obs = 1 tp_exp = 1 fn_obs = 0 fn_exp = 1 #point A obs = [tp_obs] * 63 + [fp_obs] *28 + [fn_obs] * 37 + [tn_obs]*72 exp = [tp_exp] * 63 + [fp_exp] *28 + [fn_exp] * 37 + [tn_exp]*72 trial_a_results = confusion_matrix_from_data(obs,exp) #Check that this is correct self.assertEqual(trial_a_results,(63,28,37,72)) trial_a = (obs,exp) #point B obs = [tp_obs] * 77 + [fp_obs] *77 + [fn_obs] * 23 + [tn_obs]*23 exp = [tp_exp] * 77 + [fp_exp] *77 + [fn_exp] * 23 + [tn_exp]*23 trial_b_results = confusion_matrix_from_data(obs,exp) #Check that this is correct self.assertEqual(trial_b_results,(77,77,23,23)) trial_b = (obs,exp) #point c obs = [tp_obs] * 24 + [fp_obs] *88 + [fn_obs] * 76 + [tn_obs]*12 exp = [tp_exp] * 24 + [fp_exp] *88 + [fn_exp] * 76 + [tn_exp]*12 trial_c_results = confusion_matrix_from_data(obs,exp) #Check that this is correct self.assertEqual(trial_c_results,(24,88,76,12)) trial_c_results = calculate_accuracy_stats_from_observations(obs,exp) #Check that this is correct self.assertFloatEqual(trial_c_results["false_positive_rate"],0.88) trial_c = (obs,exp) trials = [trial_a, trial_b,trial_c] #Finally the actual test obs_points = roc_points(trials) exp_points = [(0.28,0.63),(0.77,0.77),(0.88,0.24)] self.assertFloatEqual(obs_points,exp_points)
def test_roc_points(self): """roc_points should calculate the points for a Receiver Operating Characteristics curve """ #The set up here is a bit elaborate since I generate the test datasets #based on the values we need in the confusion matrix. #I test the intermediate results though, so any errors should be due #to the actual function, not the test tn_obs = 0 tn_exp = 0 fp_obs = 1 fp_exp = 0 tp_obs = 1 tp_exp = 1 fn_obs = 0 fn_exp = 1 #point A obs = [tp_obs] * 63 + [fp_obs] *28 + [fn_obs] * 37 + [tn_obs]*72 exp = [tp_exp] * 63 + [fp_exp] *28 + [fn_exp] * 37 + [tn_exp]*72 trial_a_results = confusion_matrix_from_data(obs,exp) #Check that this is correct self.assertEqual(trial_a_results,(63,28,37,72)) trial_a = (obs,exp) #point B obs = [tp_obs] * 77 + [fp_obs] *77 + [fn_obs] * 23 + [tn_obs]*23 exp = [tp_exp] * 77 + [fp_exp] *77 + [fn_exp] * 23 + [tn_exp]*23 trial_b_results = confusion_matrix_from_data(obs,exp) #Check that this is correct self.assertEqual(trial_b_results,(77,77,23,23)) trial_b = (obs,exp) #point c obs = [tp_obs] * 24 + [fp_obs] *88 + [fn_obs] * 76 + [tn_obs]*12 exp = [tp_exp] * 24 + [fp_exp] *88 + [fn_exp] * 76 + [tn_exp]*12 trial_c_results = confusion_matrix_from_data(obs,exp) #Check that this is correct self.assertEqual(trial_c_results,(24,88,76,12)) trial_c_results = calculate_accuracy_stats_from_observations(obs,exp) #Check that this is correct self.assertFloatEqual(trial_c_results["false_positive_rate"],0.88) trial_c = (obs,exp) trials = [trial_a, trial_b,trial_c] #Finally the actual test obs_points = roc_points(trials) exp_points = [(0.28,0.63),(0.77,0.77),(0.88,0.24)] self.assertFloatEqual(obs_points,exp_points)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) verbose=opts.verbose min_args = 1 if len(args) < min_args: option_parser.error('One or more predicted biom files must be provided.') observed_files=args make_output_dir_for_file(opts.output_fp) out_fh=open(opts.output_fp,'w') if verbose: print "Loading expected trait table file:",opts.exp_trait_table_fp exp_table =parse_biom_table(open(opts.exp_trait_table_fp,'U')) header_printed=False header_keys=[] delimiter="\t" for observed_file in observed_files: observed_file_name=basename(observed_file) if verbose: print "Loading predicted trait table file:",observed_file_name obs_table =parse_biom_table(open(observed_file,'U')) if opts.compare_observations: if verbose: print "Transposing tables to allow evaluation of observations (instead of samples)..." obs_table=transpose_biom(obs_table) exp_table=transpose_biom(exp_table) if verbose: print "Matching predicted and expected tables..." obs,exp=match_biom_tables(obs_table,exp_table,verbose=verbose,limit_to_expected_observations=opts.limit_to_expected_observations,limit_to_observed_observations=opts.limit_to_observed_observations,normalize=opts.normalize,shuffle_samples=opts.shuffle_samples) if verbose: print "Calculating accuracy stats for all observations..." #import pdb; pdb.set_trace() for i in obs: if verbose: print "Calculating stats for: ",i if opts.not_relative_abundance_scores: results=calculate_accuracy_stats_from_observations(obs[i],exp[i],success_criterion='binary') else: results=calculate_accuracy_stats_from_observations(obs[i],exp[i],success_criterion='ra_exact') #If first pass then print out header if not header_printed: header_printed=True header_keys=sorted(results.keys()) out_fh.write(delimiter.join(['file','label']+header_keys)+"\n") #print results using same order as header values=[observed_file_name,i]+['{0:.3g}'.format(results[x]) for x in header_keys] out_str=delimiter.join(map(str,values))+"\n" out_fh.write(out_str)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) verbose = opts.verbose min_args = 1 if len(args) < min_args: option_parser.error( 'One or more predicted biom files must be provided.') observed_files = args make_output_dir_for_file(opts.output_fp) out_fh = open(opts.output_fp, 'w') if verbose: print "Loading expected trait table file:", opts.exp_trait_table_fp exp_table = load_table(opts.exp_trait_table_fp) header_printed = False header_keys = [] delimiter = "\t" for observed_file in observed_files: observed_file_name = basename(observed_file) if verbose: print "Loading predicted trait table file:", observed_file_name obs_table = load_table(observed_file) if opts.compare_observations: if verbose: print "Transposing tables to allow evaluation of observations (instead of samples)..." obs_table = obs_table.transpose() exp_table = exp_table.transpose() if verbose: print "Matching predicted and expected tables..." obs, exp = match_biom_tables( obs_table, exp_table, verbose=verbose, limit_to_expected_observations=opts.limit_to_expected_observations, limit_to_observed_observations=opts.limit_to_observed_observations, normalize=opts.normalize, shuffle_samples=opts.shuffle_samples) if verbose: print "Calculating accuracy stats for all observations..." #import pdb; pdb.set_trace() for i in obs: if verbose: print "Calculating stats for: ", i if opts.not_relative_abundance_scores: results = calculate_accuracy_stats_from_observations( obs[i], exp[i], success_criterion='binary') else: results = calculate_accuracy_stats_from_observations( obs[i], exp[i], success_criterion='ra_exact') #If first pass then print out header if not header_printed: header_printed = True header_keys = sorted(results.keys()) out_fh.write( delimiter.join(['file', 'label'] + header_keys) + "\n") #print results using same order as header values = [observed_file_name, i ] + ['{0:.3g}'.format(results[x]) for x in header_keys] out_str = delimiter.join(map(str, values)) + "\n" out_fh.write(out_str)