def test_roc_points(self):
        """roc_points should calculate the points for a Receiver Operating Characteristics curve
        """
       
        #The set up here is a bit elaborate since I generate the test datasets
        #based on the values we need in the confusion matrix.
        #I test the intermediate results though, so any errors should be due 
        #to the actual function, not the test
        
        tn_obs = 0
        tn_exp = 0

        fp_obs = 1
        fp_exp = 0

        tp_obs = 1
        tp_exp = 1

        fn_obs = 0
        fn_exp = 1

         #point A
        obs = [tp_obs] * 63 + [fp_obs] *28 + [fn_obs] * 37 + [tn_obs]*72
        exp = [tp_exp] * 63 + [fp_exp] *28 + [fn_exp] * 37 + [tn_exp]*72
        trial_a_results = confusion_matrix_from_data(obs,exp)
        #Check that this is correct
        self.assertEqual(trial_a_results,(63,28,37,72))
        trial_a = (obs,exp)
        
       
        #point B
        obs = [tp_obs] * 77 + [fp_obs] *77 + [fn_obs] * 23 + [tn_obs]*23
        exp = [tp_exp] * 77 + [fp_exp] *77 + [fn_exp] * 23 + [tn_exp]*23
        trial_b_results = confusion_matrix_from_data(obs,exp)
        #Check that this is correct
        self.assertEqual(trial_b_results,(77,77,23,23))
        trial_b = (obs,exp)
        
        #point c
        obs = [tp_obs] * 24 + [fp_obs] *88 + [fn_obs] * 76 + [tn_obs]*12
        exp = [tp_exp] * 24 + [fp_exp] *88 + [fn_exp] * 76 + [tn_exp]*12
        trial_c_results = confusion_matrix_from_data(obs,exp)
        #Check that this is correct
        self.assertEqual(trial_c_results,(24,88,76,12))
        trial_c_results = calculate_accuracy_stats_from_observations(obs,exp)
        #Check that this is correct
        self.assertFloatEqual(trial_c_results["false_positive_rate"],0.88)

        trial_c = (obs,exp)

        trials = [trial_a, trial_b,trial_c]
        
        
        #Finally the actual test

        obs_points = roc_points(trials)
        exp_points = [(0.28,0.63),(0.77,0.77),(0.88,0.24)]
        self.assertFloatEqual(obs_points,exp_points)
    def test_roc_points(self):
        """roc_points should calculate the points for a Receiver Operating Characteristics curve
        """

        #The set up here is a bit elaborate since I generate the test datasets
        #based on the values we need in the confusion matrix.
        #I test the intermediate results though, so any errors should be due
        #to the actual function, not the test

        tn_obs = 0
        tn_exp = 0

        fp_obs = 1
        fp_exp = 0

        tp_obs = 1
        tp_exp = 1

        fn_obs = 0
        fn_exp = 1

         #point A
        obs = [tp_obs] * 63 + [fp_obs] *28 + [fn_obs] * 37 + [tn_obs]*72
        exp = [tp_exp] * 63 + [fp_exp] *28 + [fn_exp] * 37 + [tn_exp]*72
        trial_a_results = confusion_matrix_from_data(obs,exp)
        #Check that this is correct
        self.assertEqual(trial_a_results,(63,28,37,72))
        trial_a = (obs,exp)


        #point B
        obs = [tp_obs] * 77 + [fp_obs] *77 + [fn_obs] * 23 + [tn_obs]*23
        exp = [tp_exp] * 77 + [fp_exp] *77 + [fn_exp] * 23 + [tn_exp]*23
        trial_b_results = confusion_matrix_from_data(obs,exp)
        #Check that this is correct
        self.assertEqual(trial_b_results,(77,77,23,23))
        trial_b = (obs,exp)

        #point c
        obs = [tp_obs] * 24 + [fp_obs] *88 + [fn_obs] * 76 + [tn_obs]*12
        exp = [tp_exp] * 24 + [fp_exp] *88 + [fn_exp] * 76 + [tn_exp]*12
        trial_c_results = confusion_matrix_from_data(obs,exp)
        #Check that this is correct
        self.assertEqual(trial_c_results,(24,88,76,12))
        trial_c_results = calculate_accuracy_stats_from_observations(obs,exp)
        #Check that this is correct
        self.assertFloatEqual(trial_c_results["false_positive_rate"],0.88)

        trial_c = (obs,exp)

        trials = [trial_a, trial_b,trial_c]


        #Finally the actual test

        obs_points = roc_points(trials)
        exp_points = [(0.28,0.63),(0.77,0.77),(0.88,0.24)]
        self.assertFloatEqual(obs_points,exp_points)
Exemplo n.º 3
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    verbose=opts.verbose

    min_args = 1
    if len(args) < min_args:
       option_parser.error('One or more predicted biom files must be provided.')
    observed_files=args
   

    make_output_dir_for_file(opts.output_fp)
    out_fh=open(opts.output_fp,'w')

    if verbose:
        print "Loading expected trait table file:",opts.exp_trait_table_fp

    exp_table =parse_biom_table(open(opts.exp_trait_table_fp,'U'))

    header_printed=False
    header_keys=[]
    delimiter="\t"


    for observed_file in observed_files:
        observed_file_name=basename(observed_file)

        if verbose:
            print "Loading predicted trait table file:",observed_file_name

        obs_table =parse_biom_table(open(observed_file,'U'))

        if opts.compare_observations:
            if verbose:
                print "Transposing tables to allow evaluation of observations (instead of samples)..."
            obs_table=transpose_biom(obs_table)
            exp_table=transpose_biom(exp_table)

        if verbose:
           print "Matching predicted and expected tables..."    

        obs,exp=match_biom_tables(obs_table,exp_table,verbose=verbose,limit_to_expected_observations=opts.limit_to_expected_observations,limit_to_observed_observations=opts.limit_to_observed_observations,normalize=opts.normalize,shuffle_samples=opts.shuffle_samples)
           
        if verbose:
            print "Calculating accuracy stats for all observations..."

        #import pdb; pdb.set_trace()
        for i in obs:
            if verbose:
                print "Calculating stats for: ",i
            if opts.not_relative_abundance_scores:
                results=calculate_accuracy_stats_from_observations(obs[i],exp[i],success_criterion='binary')
            else:
                results=calculate_accuracy_stats_from_observations(obs[i],exp[i],success_criterion='ra_exact')

            #If first pass then print out header
            if not header_printed:
                header_printed=True
                header_keys=sorted(results.keys())
                out_fh.write(delimiter.join(['file','label']+header_keys)+"\n")

            #print results using same order as header
            values=[observed_file_name,i]+['{0:.3g}'.format(results[x]) for x in header_keys]
            out_str=delimiter.join(map(str,values))+"\n"
            out_fh.write(out_str)
Exemplo n.º 4
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    min_args = 1
    if len(args) < min_args:
        option_parser.error(
            'One or more predicted biom files must be provided.')
    observed_files = args

    make_output_dir_for_file(opts.output_fp)
    out_fh = open(opts.output_fp, 'w')

    if verbose:
        print "Loading expected trait table file:", opts.exp_trait_table_fp

    exp_table = load_table(opts.exp_trait_table_fp)

    header_printed = False
    header_keys = []
    delimiter = "\t"

    for observed_file in observed_files:
        observed_file_name = basename(observed_file)

        if verbose:
            print "Loading predicted trait table file:", observed_file_name

        obs_table = load_table(observed_file)

        if opts.compare_observations:
            if verbose:
                print "Transposing tables to allow evaluation of observations (instead of samples)..."
            obs_table = obs_table.transpose()
            exp_table = exp_table.transpose()

        if verbose:
            print "Matching predicted and expected tables..."

        obs, exp = match_biom_tables(
            obs_table,
            exp_table,
            verbose=verbose,
            limit_to_expected_observations=opts.limit_to_expected_observations,
            limit_to_observed_observations=opts.limit_to_observed_observations,
            normalize=opts.normalize,
            shuffle_samples=opts.shuffle_samples)

        if verbose:
            print "Calculating accuracy stats for all observations..."

        #import pdb; pdb.set_trace()
        for i in obs:
            if verbose:
                print "Calculating stats for: ", i
            if opts.not_relative_abundance_scores:
                results = calculate_accuracy_stats_from_observations(
                    obs[i], exp[i], success_criterion='binary')
            else:
                results = calculate_accuracy_stats_from_observations(
                    obs[i], exp[i], success_criterion='ra_exact')

            #If first pass then print out header
            if not header_printed:
                header_printed = True
                header_keys = sorted(results.keys())
                out_fh.write(
                    delimiter.join(['file', 'label'] + header_keys) + "\n")

            #print results using same order as header
            values = [observed_file_name, i
                      ] + ['{0:.3g}'.format(results[x]) for x in header_keys]
            out_str = delimiter.join(map(str, values)) + "\n"
            out_fh.write(out_str)