示例#1
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    otu_table_fp = opts.otu_table_fp
    otu_include_fp = opts.otu_include_fp
    output_fp = opts.output_fp
    verbose = opts.verbose
    category_mapping_fp = opts.category_mapping_fp
    individual_column = opts.individual_column
    reference_sample_column = opts.reference_sample_column
    conv_output_fp = opts.converted_otu_table_output_fp
    relative_abundance = opts.relative_abundance
    filter = opts.filter
    test = opts.test
    category = opts.category
    threshold = opts.threshold
    collate_results = opts.collate_results

    # check mapping file
    category_mapping = open(category_mapping_fp, 'U')
    category_mapping = parse_mapping_file(category_mapping)
    if not category:
        if test != 'paired_T':
            option_parser.error(
                'a category in the category mapping file must be'
                ' specified with the -c option for this test')

    # set up threshold value for filtering, if any
    if threshold and threshold != 'None':
        threshold = float(threshold)

    # if specifying a list of OTUs to look at specifically
    if otu_include_fp and otu_include_fp != 'None':
        otu_include = open(otu_include_fp)
    else:
        otu_include = None

    # if only passing in a single OTU table
    if isdir(otu_table_fp) is False:
        # raise error if collate option is being passed to single table
        if collate_results is True:
            option_parser.error(
                'Cannot collate the results of only one table.'
                ' Please rerun the command without passing the -w option')
        else:
            #open and parse the biom table fp
            otu_table = parse_biom_table(open(otu_table_fp, 'U'))

            # run the statistical test
            output = test_wrapper(
                test,
                otu_table,
                category_mapping,
                category,
                threshold,
                filter,
                otu_include,
                otu_table_relative_abundance=relative_abundance)

            # write output
            output_file = open(output_fp, 'w')
            output_file.write('\n'.join(output))
            output_file.close()

    # if the user has passed in a directory
    if isdir(otu_table_fp) is True:

        # negate_collate to return an results file on a per table basis
        if collate_results is False:

            # build list of otu tables
            otu_table_paths = glob('%s/*biom' % otu_table_fp)

            # if output dir doesn't exist, then make it
            if exists(output_fp):
                pass
            else:
                makedirs(output_fp)

            for otu_table_fp in otu_table_paths:
                #open and parse the biom table fp
                otu_table = parse_biom_table(open(otu_table_fp, 'U'))

                #synchronize the mapping file with the otu table
                category_mapping, removed_samples = \
                    sync_mapping_to_otu_table(otu_table, category_mapping)
                if removed_samples:
                    print "Warning, the following samples were in the category mapping file " +\
                                    "but not the OTU table and will be ignored: "
                    for i in removed_samples:
                        print i + '\n'

                # create naming convention for output file
                # will look like: otu_table_ANOVA_Treatment.txt
                output_basename = basename(otu_table_fp)
                output_basename = output_basename.replace(".biom", "")
                output_fp_sweep = "%s_%s_%s.txt" % \
                    (output_basename,test,category)

                # if the convert_otu_table_fp is passed, save the converted table
                if test == 'longitudinal_correlation' or test == 'paired_T':
                    converted_otu_table = longitudinal_otu_table_conversion_wrapper(
                        table, category_mapping, individual_column,
                        reference_sample_column)
                    if conv_output_fp:
                        of = open(conv_output_fp, 'w')
                        of.write(format_biom_table(converted_otu_table))
                        of.close()
                    if test == 'longitudinal_correlation':
                        #set the otu_include list to all of the OTUs, this effectively
                        #deactivates the filter for correlation, because the filtered OTU_list is
                        #rewritten with the otu_include list in the test_wrapper
                        if not otu_include:
                            otu_include = set(otu_table.ObservationIds)
                        output = test_wrapper('correlation',
                                              converted_otu_table,
                                              category_mapping, category,
                                              threshold, filter, otu_include,
                                              999999999.0, True)
                    elif test == 'paired_T':
                        output = test_wrapper('paired_T', converted_otu_table,
                                              category_mapping, category,
                                              threshold, filter, otu_include,
                                              999999999.0, True,
                                              individual_column,
                                              reference_sample_column)

                # run test single input table from the directory
                else:
                    output = test_wrapper(
                        test,
                        otu_table,
                        category_mapping,
                        category,
                        threshold,
                        filter,
                        otu_include,
                        otu_table_relative_abundance=relative_abundance)

                # write output file with new naming convention
                output_file = open(join(output_fp, output_fp_sweep), 'w')
                output_file.write('\n'.join(output))
                output_file.close()

        # Use when the input dir contains rarefied OTU tables, and you want
        # to collate the p-values & results into one results file
        if collate_results is True:
            if test != 'longitudinal_correlation' and test != 'paired_T':
                # get biom tables
                otu_table_paths = glob('%s/*biom' % otu_table_fp)

                #get aggregated tables
                parsed_otu_tables = []
                for otu_table_fp in otu_table_paths:
                    otu_table = open(otu_table_fp, 'U')
                    otu_table = parse_biom_table(otu_table)
                    parsed_otu_tables.append(otu_table)

                #synchronize the mapping file with the otu table
                #checks with just the first OTU table and assumes that all otu tables
                #have the same collection of samples
                category_mapping, removed_samples = \
                    sync_mapping_to_otu_table(parsed_otu_tables[0],category_mapping)
                if removed_samples:
                    print "Warning, the following samples were in the category mapping file " +\
                                "but not the OTU table and will be ignored: "
                    for i in removed_samples:
                        print i + '\n'

                # get output from statistical test
                output = test_wrapper_multiple(
                    test,
                    parsed_otu_tables,
                    category_mapping,
                    category,
                    threshold,
                    filter,
                    otu_include,
                    otu_table_relative_abundance=relative_abundance)

                #write out aggregated results
                output_file = open(output_fp, 'w')
                output_file.write('\n'.join(output))
                output_file.close()

            else:
                option_parser.error(
                    "You cannot collate the results obtained from "
                    "using the longitudinal_correlation and paired_T options.")
示例#2
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    output_fp = opts.output_fp

    category_mapping_fp = opts.category_mapping_fp
    category_mapping = open(category_mapping_fp, 'U')
    category_mapping = parse_mapping_file(category_mapping)
    individual_column = opts.individual_column
    reference_sample_column = opts.reference_sample_column
    conv_output_fp = opts.converted_otu_table_output_fp
    relative_abundance = opts.relative_abundance

    filter = opts.filter
    test = opts.test
    category = opts.category
    if not category:
        if test != 'paired_T':
            raise ValueError('a category in the category mapping file must be' +\
                ' specified with the -c option for this test')
    threshold = opts.threshold
    if threshold and threshold != 'None':
        threshold = float(threshold)

    otu_include_fp = opts.otu_include_fp
    if otu_include_fp and otu_include_fp != 'None':
        otu_include = open(otu_include_fp)
    else:
        otu_include = None

    otu_table_fp = opts.otu_table_fp
    if not isdir(opts.otu_table_fp):
        # if single file, process normally
        otu_table = open(otu_table_fp, 'U')
        try:
            otu_table = parse_biom_table(otu_table)
        except AttributeError:
            otu_table = parse_biom_table_str(otu_table)
        #synchronize the mapping file with the otu table
        category_mapping, removed_samples = sync_mapping_to_otu_table(otu_table, \
                        category_mapping)
        if removed_samples:
            print "Warning, the following samples were in the category mapping file " +\
                            "but not the OTU table and will be ignored: "
            for i in removed_samples:
                print i + '\n'

        if test == 'longitudinal_correlation' or test == 'paired_T':
            converted_otu_table = longitudinal_otu_table_conversion_wrapper(
                otu_table, category_mapping, individual_column,
                reference_sample_column)
            if conv_output_fp:
                of = open(conv_output_fp, 'w')
                of.write(format_biom_table(converted_otu_table))
                of.close()
            if test == 'longitudinal_correlation':
                #set the otu_include list to all of the OTUs, this effectively
                #deactivates the filter for correlation, because the filtered OTU_list is
                #rewritten with the otu_include list in the test_wrapper
                if not otu_include:
                    otu_include = set(otu_table.ObservationIds)
                output = test_wrapper('correlation', converted_otu_table, \
                    category_mapping, category, threshold, filter, otu_include, \
                    999999999.0, True)
            elif test == 'paired_T':
                output = test_wrapper('paired_T', converted_otu_table, \
                    category_mapping, category, threshold, \
                    filter, otu_include, 999999999.0, True, \
                    individual_column, reference_sample_column)
        else:
            output = test_wrapper(test, otu_table, category_mapping, \
                category, threshold, filter, otu_include, \
                otu_table_relative_abundance=relative_abundance)
    else:
        if test != 'longitudinal_correlation' and test != 'paired_T':
            otu_table_paths = glob('%s/*biom' % otu_table_fp)
            # if directory, get aggregated results
            parsed_otu_tables = []
            for path in otu_table_paths:
                ot = open(path, 'U')
                ot = parse_biom_table(ot)
                parsed_otu_tables.append(ot)

            #synchronize the mapping file with the otu table
            #checks with just the first OTU table and assumes that all otu tables
            #have the same collection of samples
            category_mapping, removed_samples = sync_mapping_to_otu_table(parsed_otu_tables[0], \
                        category_mapping)
            if removed_samples:
                print "Warning, the following samples were in the category mapping file " +\
                            "but not the OTU table and will be ignored: "
                for i in removed_samples:
                    print i + '\n'

            output = test_wrapper_multiple(test, parsed_otu_tables, \
                category_mapping, category, threshold, filter, otu_include,\
                otu_table_relative_abundance=relative_abundance)
        else:
            raise ValueError(
                "the longitudinal_correlation and paired_T options cannot be run on a directory"
            )

    of = open(output_fp, 'w')
    of.write('\n'.join(output))
    of.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    otu_table_fp = opts.otu_table_fp
    otu_include_fp = opts.otu_include_fp
    output_fp = opts.output_fp
    verbose = opts.verbose
    category_mapping_fp = opts.category_mapping_fp
    individual_column = opts.individual_column
    reference_sample_column = opts.reference_sample_column
    conv_output_fp = opts.converted_otu_table_output_fp
    relative_abundance = opts.relative_abundance
    filter = opts.filter
    test = opts.test
    category = opts.category
    threshold = opts.threshold
    collate_results = opts.collate_results

    # check mapping file
    category_mapping = open(category_mapping_fp,'U')
    category_mapping = parse_mapping_file(category_mapping)
    if not category:
        if test != 'paired_T':
            option_parser.error('a category in the category mapping file must be'
                ' specified with the -c option for this test')

    # set up threshold value for filtering, if any
    if threshold and threshold != 'None':
        threshold = float(threshold)

    # if specifying a list of OTUs to look at specifically
    if otu_include_fp and otu_include_fp != 'None':
        otu_include = open(otu_include_fp)
    else:
        otu_include = None

    # if only passing in a single OTU table
    if isdir(otu_table_fp) is False:
        # raise error if collate option is being passed to single table
        if collate_results is True:
            option_parser.error('Cannot collate the results of only one table.'
                ' Please rerun the command without passing the -w option')
        else:
            #open and parse the biom table fp
            otu_table = parse_biom_table(open(otu_table_fp, 'U'))

            # run the statistical test
            output = test_wrapper(test, otu_table, category_mapping, 
                            category, threshold, filter, otu_include, 
                            otu_table_relative_abundance=relative_abundance)
            
            # write output
            output_file = open(output_fp, 'w')
            output_file.write('\n'.join(output))
            output_file.close()

    # if the user has passed in a directory
    if isdir(otu_table_fp) is True:

        # negate_collate to return an results file on a per table basis
        if collate_results is False:
      
            # build list of otu tables
            otu_table_paths = glob('%s/*biom' % otu_table_fp)

            # if output dir doesn't exist, then make it
            if exists(output_fp):
                pass
            else:
                makedirs(output_fp)

            for otu_table_fp in otu_table_paths:
                #open and parse the biom table fp
                otu_table = parse_biom_table(open(otu_table_fp, 'U'))

                #synchronize the mapping file with the otu table
                category_mapping, removed_samples = \
                    sync_mapping_to_otu_table(otu_table, category_mapping)
                if removed_samples:
                    print "Warning, the following samples were in the category mapping file " +\
                                    "but not the OTU table and will be ignored: "
                    for i in removed_samples:
                            print i + '\n'

                # create naming convention for output file
                # will look like: otu_table_ANOVA_Treatment.txt
                output_basename = basename(otu_table_fp)
                output_basename = output_basename.replace(".biom","")
                output_fp_sweep = "%s_%s_%s.txt" % \
                    (output_basename,test,category)

                # if the convert_otu_table_fp is passed, save the converted table
                if test == 'longitudinal_correlation' or test == 'paired_T':
                    converted_otu_table = longitudinal_otu_table_conversion_wrapper(table, 
                        category_mapping, individual_column, reference_sample_column)
                    if conv_output_fp:
                        of = open(conv_output_fp, 'w')
                        of.write(format_biom_table(converted_otu_table))
                        of.close()
                    if test == 'longitudinal_correlation':
                        #set the otu_include list to all of the OTUs, this effectively
                        #deactivates the filter for correlation, because the filtered OTU_list is
                        #rewritten with the otu_include list in the test_wrapper
                        if not otu_include:
                            otu_include = set(otu_table.ObservationIds)
                        output = test_wrapper('correlation', converted_otu_table, 
                            category_mapping, category, threshold, filter, otu_include, 
                            999999999.0, True)
                    elif test == 'paired_T':
                        output = test_wrapper('paired_T', converted_otu_table, 
                            category_mapping, category, threshold, 
                            filter, otu_include, 999999999.0, True, 
                            individual_column, reference_sample_column)

                # run test single input table from the directory  
                else:
                    output = test_wrapper(test, otu_table, category_mapping, 
                        category, threshold, filter, otu_include, 
                        otu_table_relative_abundance=relative_abundance)
                    
                # write output file with new naming convention
                output_file = open(join(output_fp,output_fp_sweep), 'w')
                output_file.write('\n'.join(output))
                output_file.close()


        # Use when the input dir contains rarefied OTU tables, and you want
        # to collate the p-values & results into one results file       
        if collate_results is True:
            if test != 'longitudinal_correlation' and test != 'paired_T':
                # get biom tables
                otu_table_paths = glob('%s/*biom' % otu_table_fp)

                #get aggregated tables
                parsed_otu_tables= []
                for otu_table_fp in otu_table_paths:
                    otu_table = open(otu_table_fp, 'U')
                    otu_table = parse_biom_table(otu_table)
                    parsed_otu_tables.append(otu_table)

                #synchronize the mapping file with the otu table
                #checks with just the first OTU table and assumes that all otu tables
                #have the same collection of samples
                category_mapping, removed_samples = \
                    sync_mapping_to_otu_table(parsed_otu_tables[0],category_mapping)
                if removed_samples:
                    print "Warning, the following samples were in the category mapping file " +\
                                "but not the OTU table and will be ignored: "
                    for i in removed_samples:
                            print i + '\n'

                # get output from statistical test            
                output = test_wrapper_multiple(test, parsed_otu_tables, 
                    category_mapping, category, threshold, filter, otu_include,
                    otu_table_relative_abundance=relative_abundance)

                #write out aggregated results
                output_file = open(output_fp, 'w')
                output_file.write('\n'.join(output))
                output_file.close()

            else:
                option_parser.error("You cannot collate the results obtained from "
                    "using the longitudinal_correlation and paired_T options.")
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    output_fp = opts.output_fp

    category_mapping_fp = opts.category_mapping_fp
    category_mapping = open(category_mapping_fp,'U')
    individual_column = opts.individual_column
    reference_sample_column = opts.reference_sample_column
    conv_output_fp = opts.conv_output_fp

    filter = opts.filter
    test = opts.test
    category = opts.category
    if not category:
        if test != 'paired_T':
            raise ValueError('a category in the category mapping file must be' +\
                ' specified with the -c option for this test')
    threshold = opts.threshold
    if threshold and threshold != 'None':
        threshold = float(threshold)

    otu_include_fp = opts.otu_include_fp
    if otu_include_fp and otu_include_fp != 'None':
        otu_include = open(otu_include_fp)
    else:
        otu_include = None

    otu_table_fp = opts.otu_table_fp
    if not isdir(opts.otu_table_fp):
        # if single file, process normally
        otu_table = open(otu_table_fp,'U')
        if test == 'longitudinal_correlation' or test == 'paired_T':
            converted_otu_table = longitudinal_otu_table_conversion_wrapper(\
                otu_table, category_mapping, individual_column, \
                reference_sample_column) 
            if conv_output_fp:
                of = open(conv_output_fp, 'w')
                of.write(converted_otu_table)
                of.close()
            converted_otu_table = converted_otu_table.split('\n')
            category_mapping = open(category_mapping_fp,'U')
            if test == 'longitudinal_correlation':
                output = test_wrapper('correlation', converted_otu_table, \
                    category_mapping, category, threshold, filter, otu_include, \
                    999999999.0, True)
            elif test == 'paired_T':
                output = test_wrapper('paired_T', converted_otu_table, \
                    category_mapping, category, threshold, \
                    filter, otu_include, 999999999.0, True, \
                    individual_column, reference_sample_column)
        else:
            output = test_wrapper(test, otu_table, category_mapping, \
                category, threshold, filter, otu_include)
    else:
        if test != 'longitudinal_correlation' and test != 'paired_T':
            otu_table_paths = [join(otu_table_fp,fp) for fp in \
                listdir(otu_table_fp)]
            # if directory, get aggregated results
            output = test_wrapper_multiple(test, otu_table_paths, \
                category_mapping, category, threshold, filter, otu_include)
        else:
            raise ValueError("the longitudinal_correlation and paired_T options cannot be run on a directory")
        
    of = open(output_fp, 'w')
    of.write('\n'.join(output))
    of.close()
        otu_table_paths = [fp1,fp2]
        threshold = None
        _filter = 0
        otu_include = None
        otu_table1 = open(fp1,'U')
        otu_table2 = open(fp2,'U')
        category = 'cat1'

        # get expected ANOVA output from each file separately
        results1 = test_wrapper('ANOVA', otu_table1, category_mapping, category, threshold, \
                 _filter, otu_include=None)
        results2 = test_wrapper('ANOVA', otu_table2, category_mapping, category, threshold, \
                 _filter, otu_include=None)

        results = test_wrapper_multiple('ANOVA', otu_table_paths,
                                        category_mapping, category,
                                        threshold, _filter,
                                        otu_include)

        # multiple results should be combination of individual results
        results1 = [result.split('\t') for result in results1]        
        results1_dict = {}
        for line in results1[1:]:
            results1_dict[line[0]] = line[1:]
        results2 = [result.split('\t') for result in results2]
        results2_dict = {}
        for line in results2[1:]:
            results2_dict[line[0]] = line[1:]
        results = [result.split('\t') for result in results]
        results_dict = {}
        for line in results[1:]:
            results_dict[line[0]] = line[1:]
示例#6
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    output_fp = opts.output_fp

    category_mapping_fp = opts.category_mapping_fp
    category_mapping = open(category_mapping_fp,'U')
    category_mapping = parse_mapping_file(category_mapping)
    individual_column = opts.individual_column
    reference_sample_column = opts.reference_sample_column
    conv_output_fp = opts.converted_otu_table_output_fp
    relative_abundance = opts.relative_abundance

    filter = opts.filter
    test = opts.test
    category = opts.category
    if not category:
        if test != 'paired_T':
            raise ValueError('a category in the category mapping file must be' +\
                ' specified with the -c option for this test')
    threshold = opts.threshold
    if threshold and threshold != 'None':
        threshold = float(threshold)

    otu_include_fp = opts.otu_include_fp
    if otu_include_fp and otu_include_fp != 'None':
        otu_include = open(otu_include_fp)
    else:
        otu_include = None

    otu_table_fp = opts.otu_table_fp
    if not isdir(opts.otu_table_fp):
        # if single file, process normally
        otu_table = open(otu_table_fp,'U')
        try:
            otu_table = parse_biom_table(otu_table)
        except AttributeError:
            otu_table = parse_biom_table_str(otu_table)
        #synchronize the mapping file with the otu table
        category_mapping, removed_samples = sync_mapping_to_otu_table(otu_table, \
                        category_mapping)
        if removed_samples:
            print "Warning, the following samples were in the category mapping file " +\
                            "but not the OTU table and will be ignored: "
            for i in removed_samples:
                    print i + '\n'

        if test == 'longitudinal_correlation' or test == 'paired_T':
            converted_otu_table = longitudinal_otu_table_conversion_wrapper(otu_table,
                category_mapping, individual_column, reference_sample_column)
            if conv_output_fp:
                of = open(conv_output_fp, 'w')
                of.write(format_biom_table(converted_otu_table))
                of.close()
            if test == 'longitudinal_correlation':
                #set the otu_include list to all of the OTUs, this effectively
                #deactivates the filter for correlation, because the filtered OTU_list is
                #rewritten with the otu_include list in the test_wrapper
                if not otu_include:
                    otu_include = set(otu_table.ObservationIds)
                output = test_wrapper('correlation', converted_otu_table, \
                    category_mapping, category, threshold, filter, otu_include, \
                    999999999.0, True)
            elif test == 'paired_T':
                output = test_wrapper('paired_T', converted_otu_table, \
                    category_mapping, category, threshold, \
                    filter, otu_include, 999999999.0, True, \
                    individual_column, reference_sample_column)
        else:
            output = test_wrapper(test, otu_table, category_mapping, \
                category, threshold, filter, otu_include, \
                otu_table_relative_abundance=relative_abundance)
    else:
        if test != 'longitudinal_correlation' and test != 'paired_T':
            otu_table_paths = glob('%s/*biom' % otu_table_fp)
            # if directory, get aggregated results
            parsed_otu_tables = []
            for path in otu_table_paths:
                ot = open(path,'U')
                ot = parse_biom_table(ot)
                parsed_otu_tables.append(ot)

            #synchronize the mapping file with the otu table
            #checks with just the first OTU table and assumes that all otu tables
            #have the same collection of samples
            category_mapping, removed_samples = sync_mapping_to_otu_table(parsed_otu_tables[0], \
                        category_mapping)
            if removed_samples:
                print "Warning, the following samples were in the category mapping file " +\
                            "but not the OTU table and will be ignored: "
                for i in removed_samples:
                        print i + '\n'

            output = test_wrapper_multiple(test, parsed_otu_tables, \
                category_mapping, category, threshold, filter, otu_include,\
                otu_table_relative_abundance=relative_abundance)
        else:
            raise ValueError("the longitudinal_correlation and paired_T options cannot be run on a directory")
        
    of = open(output_fp, 'w')
    of.write('\n'.join(output))
    of.close()