Exemplo n.º 1
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    exclude_otus_fp = opts.exclude_otus_fp

    if not opts.taxonomy_fname:
        otu_to_taxonomy = None
    else:
        infile = open(opts.taxonomy_fname, 'U')
        otu_to_taxonomy = parse_taxonomy(infile)

    ids_to_exclude = []
    if exclude_otus_fp:
        if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'):
            ids_to_exclude = \
                get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U'))
        else:
            ids_to_exclude = \
                get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U'))

    sample_metadata = None
    if opts.mapping_fp is not None:
        with open(opts.mapping_fp, 'U') as map_f:
            mapping_data, mapping_header, mapping_comments = \
                parse_mapping_file(map_f)

        sample_metadata = mapping_file_to_dict(mapping_data, mapping_header)

    with open(opts.otu_map_fp, 'U') as otu_map_f:
        biom_otu_table = make_otu_table(otu_map_f,
                                        otu_to_taxonomy=otu_to_taxonomy,
                                        otu_ids_to_exclude=ids_to_exclude,
                                        sample_metadata=sample_metadata)

    write_biom_table(biom_otu_table, opts.output_biom_fp)
Exemplo n.º 2
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    exclude_otus_fp = opts.exclude_otus_fp
    
    outfile = open(opts.output_biom_fp, 'w')
    
    if not opts.taxonomy_fname:
        otu_to_taxonomy = None
    else:
       infile = open(opts.taxonomy_fname,'U')
       otu_to_taxonomy = parse_taxonomy(infile)
    
    ids_to_exclude = []
    if exclude_otus_fp:
        if splitext(exclude_otus_fp)[1] in ('.fasta','.fna'):
            ids_to_exclude = \
             get_seq_ids_from_fasta_file(open(exclude_otus_fp,'U'))
        else:
            ids_to_exclude = \
             get_seq_ids_from_seq_id_file(open(exclude_otus_fp,'U'))
    biom_otu_table = make_otu_table(open(opts.otu_map_fp, 'U'), 
                               otu_to_taxonomy,
                               ids_to_exclude)
    outfile.write(biom_otu_table)
Exemplo n.º 3
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    exclude_otus_fp = opts.exclude_otus_fp

    if not opts.taxonomy_fname:
        otu_to_taxonomy = None
    else:
        infile = open(opts.taxonomy_fname, 'U')
        otu_to_taxonomy = parse_taxonomy(infile)

    ids_to_exclude = []
    if exclude_otus_fp:
        if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'):
            ids_to_exclude = \
                get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U'))
        else:
            ids_to_exclude = \
                get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U'))

    sample_metadata = None
    if opts.mapping_fp is not None:
        mapping_data, mapping_header, mapping_comments = parse_mapping_file(open(opts.mapping_fp, 'U'))
        sample_metadata = assemble_sample_metadata(mapping_data, mapping_header, mapping_comments)
                
    biom_otu_table = make_otu_table(open(opts.otu_map_fp, 'U'),
                                    otu_to_taxonomy=otu_to_taxonomy,
                                    otu_ids_to_exclude=ids_to_exclude,
                                    sample_metadata=sample_metadata)

    write_biom_table(biom_otu_table, opts.output_biom_fp)
Exemplo n.º 4
0
    def _generate_biom_output(self, observation_map_fp, output_biom_fp, observation_metadata_fp):
        if observation_metadata_fp is not None:
            observation_metadata = parse_taxonomy(open(observation_metadata_fp, "U"))
        else:
            observation_metadata = None

        biom_table = make_otu_table(open(observation_map_fp, "U"), observation_metadata)
        write_biom_table(biom_table, output_biom_fp)
Exemplo n.º 5
0
    def _generate_biom_output(self, observation_map_fp, output_biom_fp, observation_metadata_fp):
        if observation_metadata_fp is not None:
            observation_metadata = parse_taxonomy(open(observation_metadata_fp, "U"))
        else:
            observation_metadata = None

        biom_table_f = open(output_biom_fp, "w")
        biom_table_f.write(make_otu_table(open(observation_map_fp, "U"), observation_metadata))
        biom_table_f.close()
Exemplo n.º 6
0
    def _generate_biom_output(self, observation_map_fp, output_biom_fp,
                              observation_metadata_fp):
        if observation_metadata_fp is not None:
            observation_metadata = \
                parse_taxonomy(open(observation_metadata_fp, 'U'))
        else:
            observation_metadata = None

        biom_table = make_otu_table(open(observation_map_fp, 'U'),
                                    observation_metadata)
        write_biom_table(biom_table, output_biom_fp)
Exemplo n.º 7
0
    def _generate_biom_output(self, observation_map_fp, output_biom_fp,
                              observation_metadata_fp):
        if observation_metadata_fp != None:
            observation_metadata = \
             parse_taxonomy(open(observation_metadata_fp,'U'))
        else:
            observation_metadata = None

        biom_table_f = open(output_biom_fp, 'w')
        biom_table_f.write(
            make_otu_table(open(observation_map_fp, 'U'),
                           observation_metadata))
        biom_table_f.close()
Exemplo n.º 8
0
    def test_parse_taxonomy(self):
        """ should parse taxonomy example, keeping otu id only"""
        example_tax = \
"""412 PC.635_647	Root;Bacteria;Firmicutes;"Clostridia";Clostridiales	0.930
319 PC.355_281	Root;Bacteria;Bacteroidetes	0.970
353 PC.634_154	Root;Bacteria;Bacteroidetes	0.830
17 PC.607_302	Root;Bacteria;Bacteroidetes	0.960
13 PC.481_1214	Root;Bacteria;Firmicutes;"Clostridia";Clostridiales	0.870
338 PC.593_1314	Root;Bacteria	0.990	42556	Additional fields ignored"""
        res = parse_taxonomy(example_tax.split('\n'))
        self.assertEqual(res['412'],
         "Root;Bacteria;Firmicutes;\"Clostridia\";Clostridiales")
        self.assertEqual(res['338'],
         "Root;Bacteria")
Exemplo n.º 9
0
    def test_parse_taxonomy(self):
        """ should parse taxonomy example, keeping otu id only"""
        example_tax = \
"""412 PC.635_647	Root;Bacteria;Firmicutes;"Clostridia";Clostridiales	0.930
319 PC.355_281	Root;Bacteria;Bacteroidetes	0.970
353 PC.634_154	Root;Bacteria;Bacteroidetes	0.830
17 PC.607_302	Root;Bacteria;Bacteroidetes	0.960
13 PC.481_1214	Root;Bacteria;Firmicutes;"Clostridia";Clostridiales	0.870
338 PC.593_1314	Root;Bacteria	0.990	42556	Additional fields ignored"""
        res = parse_taxonomy(example_tax.split('\n'))
        self.assertEqual(
            res['412'],
            "Root;Bacteria;Firmicutes;\"Clostridia\";Clostridiales")
        self.assertEqual(res['338'], "Root;Bacteria")
Exemplo n.º 10
0
 def _generate_biom_output(self,
                           observation_map_fp,
                           output_biom_fp,
                           observation_metadata_fp):
     if observation_metadata_fp != None:
         observation_metadata = \
          parse_taxonomy(open(observation_metadata_fp,'U'),
                         parse_all_fields=True)
     else:
         observation_metadata = None
     
     biom_table_f = open(output_biom_fp,'w')
     biom_table_f.write(make_otu_table(open(observation_map_fp,'U'),
                                       observation_metadata))
     biom_table_f.close()
Exemplo n.º 11
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    exclude_otus_fp = opts.exclude_otus_fp

    if not opts.taxonomy_fname:
        otu_to_taxonomy = None
    else:
        infile = open(opts.taxonomy_fname, 'U')
        otu_to_taxonomy = parse_taxonomy(infile)

    if not opts.counts_fname:
        seq_counts = None
    else:
        seq_counts = {}
        with open(opts.counts_fname, 'U') as infile:
            for line in infile:
                (key, val) = line.split()
                seq_counts[key] = val

    ids_to_exclude = []
    if exclude_otus_fp:
        if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'):
            ids_to_exclude = \
                get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U'))
        else:
            ids_to_exclude = \
                get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U'))

    sample_metadata = None
    if opts.mapping_fp is not None:
        with open(opts.mapping_fp, 'U') as map_f:
            mapping_data, mapping_header, mapping_comments = \
                parse_mapping_file(map_f)

        sample_metadata = mapping_file_to_dict(mapping_data,
                                               mapping_header)
    with open(opts.otu_map_fp, 'U') as otu_map_f:
        biom_otu_table = make_otu_table(otu_map_f,
                                        otu_to_taxonomy=otu_to_taxonomy,
                                        otu_ids_to_exclude=ids_to_exclude,
                                        sample_metadata=sample_metadata,seq_counts=seq_counts)

    write_biom_table(biom_otu_table, opts.output_biom_fp)
Exemplo n.º 12
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    exclude_otus_fp = opts.exclude_otus_fp
    
    if opts.output_fp:
        outfile = open(opts.output_fp, 'w')
    else:
        outfile = stdout
    if not opts.taxonomy_fname:
        otu_to_taxonomy = None
    else:
       infile = open(opts.taxonomy_fname,'U')
       otu_to_taxonomy = parse_taxonomy(infile)

    otu_to_seqid = fields_to_dict(open(opts.otu_map_fp, 'U'))
    
    if exclude_otus_fp:
        otu_to_seqid = remove_otus(otu_to_seqid,open(exclude_otus_fp,'U'))

    outfile.write(make_otu_table(otu_to_seqid, otu_to_taxonomy))
Exemplo n.º 13
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    exclude_otus_fp = opts.exclude_otus_fp

    outfile = open(opts.output_biom_fp, 'w')

    if not opts.taxonomy_fname:
        otu_to_taxonomy = None
    else:
        infile = open(opts.taxonomy_fname, 'U')
        otu_to_taxonomy = parse_taxonomy(infile)

    ids_to_exclude = []
    if exclude_otus_fp:
        if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'):
            ids_to_exclude = \
                get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U'))
        else:
            ids_to_exclude = \
                get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U'))
    biom_otu_table = make_otu_table(open(opts.otu_map_fp, 'U'),
                                    otu_to_taxonomy, ids_to_exclude)
    outfile.write(biom_otu_table)
Exemplo n.º 14
0
def test_cospeciation(potu_table_fp, cotu_table_fp, host_tree_fp, mapping_fp, mapping_category, output_dir, significance_level, test, permutations, taxonomy_fp, force):

    # Convert inputs to absolute paths
    output_dir = os.path.abspath(output_dir)
    host_tree_fp = os.path.abspath(host_tree_fp)
    mapping_fp = os.path.abspath(mapping_fp)
    potu_table_fp = os.path.abspath(potu_table_fp)
    cotu_table_fp = os.path.abspath(cotu_table_fp)

    # Check Host Tree
    try:
        with open(host_tree_fp) as f:
            pass

    except IOError as e:
        print 'Host Data could not be opened! Are you sure it is located at ' + host_tree_fp + '  ?'
        exit(1)

    # Check pOTU table
    try:
        with open(potu_table_fp) as f:
            pass

    except IOError as e:
        print 'parent OTU table could not be opened! Are you sure it is located at ' + potu_table_fp + '  ?'
        exit(1)

    try:
        os.makedirs(output_dir)

    except OSError:
        if force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            print "Output directory already exists. Please choose " +\
                "a different directory, or force overwrite with -f."
            exit(1)

    # get sample names present in potu table
    sample_names, taxon_names, data, lineages = parse_otu_table(
        open(potu_table_fp, 'Ur'))

    # Process host input (tree/alignment/matrix) and take subtree of host
    # supertree
    host_tree, host_dist = make_dists_and_tree(sample_names, host_tree_fp)

    # At this point, the host tree and host dist matrix have the intersect of
    # the samples in the pOTU table and the input host tree/dm.

    summary_file = open(
        output_dir + '/' + 'cospeciation_results_summary.txt', 'w')
    summary_file.write("sig_nodes\tnum_nodes\tfile\n")

    # Load taxonomic assignments for the pOTUs
    otu_to_taxonomy = parse_taxonomy(open(taxonomy_fp, 'Ur'))

    # test that you have a directory, otherwise exit.
    if os.path.isdir(cotu_table_fp):
        os.chdir(cotu_table_fp)
        print os.getcwd()
        # run test on cOTU tables in directory.
        # use pOTU table to choose which cOTUs to use.
        for line in open(potu_table_fp, 'r'):
            # ignore comment lines
            if not line.startswith('#'):
                # first element in OTU table tab-delimited row
                cotu_basename = line.split('\t')[0]

                print "Analyzing pOTU # " + cotu_basename

                cotu_table_fp = cotu_basename + '_seqs_otu_table.txt'

                basename = cotu_basename + "_" + test

                # Read in cOTU file
                try:
                    cotu_file = open(cotu_table_fp, 'Ur')
                except:
                    print "is this a real file?"

                # Reconcile hosts in host DM and cOTU table
                filtered_cotu_file, host_dist_filtered = reconcile_hosts_symbionts(
                    cotu_file, host_dist)

                cotu_file.close()

                # Read in reconciled cOTU table
                sample_names, taxon_names, data, lineages = parse_otu_table(
                    filtered_cotu_file)
                filtered_cotu_file.close()

                # exit loop if less than three hosts or cOTUs
                if len(sample_names) < 3 or len(taxon_names) < 3:
                    print "Less than 3 hosts or cOTUs in cOTU table!"
                    continue

                # Import, filter, and root cOTU tree
                otu_tree_fp = cotu_basename + "_seqs_rep_set.tre"
                otu_tree_file = open(otu_tree_fp, 'r')
                otu_tree_unrooted = DndParser(otu_tree_file, PhyloNode)
                otu_tree_file.close()
                otu_subtree_unrooted = otu_tree_unrooted.getSubTree(
                    taxon_names)
                # root at midpoint
                # Consider alternate step to go through and find closest DB seq
                # to root?
                otu_subtree = otu_subtree_unrooted.rootAtMidpoint()

                # filter host tree
                host_subtree = host_tree.getSubTree(sample_names)

                # Load up and filter cOTU sequences
                aligned_otu_seqs = LoadSeqs(
                    cotu_basename + '_seqs_rep_set_aligned.fasta', moltype=DNA, label_to_name=lambda x: x.split()[0])
                filtered_seqs = aligned_otu_seqs.takeSeqs(taxon_names)

                result = False

                # Run recursive test on this pOTU:
                try:
                    # DEBUG:
                    # print 'in run_test_cospeciation'

                    # get number of hosts and cOTUs
                    htips = len(host_subtree.getTipNames())
                    stips = len(otu_subtree.getTipNames())

                    if test == 'unifrac':
                        print 'calling unifrac test'
                        results_dict, acc_dict = unifrac_recursive_test(host_subtree, otu_subtree, sample_names,
                                                                        taxon_names, data, permutations)
                        pvals = 'p_vals'

                    if test == 'hommola_recursive':

                        # run recursive hommola test
                        results_dict, acc_dict = recursive_hommola(filtered_seqs, host_subtree, host_dist_filtered, otu_subtree, sample_names,
                                                                   taxon_names, data, permutations, recurse=True)

                        pvals = 'p_vals'

                    if test == 'hommola':

                        # run recursive hommola test
                        results_dict, acc_dict = recursive_hommola(filtered_seqs, host_subtree, host_dist_filtered, otu_subtree, sample_names,
                                                                   taxon_names, data, permutations, recurse=False)

                        pvals = 'p_vals'

                    sig_nodes = 0

                    # Count number of significant nodes
                    for pval in results_dict[pvals]:
                        if pval < significance_level:
                            sig_nodes += 1

                    num_nodes = write_results(
                        results_dict, acc_dict, output_dir, basename, host_tree)
                    result = True

                except Exception as e:
                    print e
                    raise
                if result:
                    outline = "{0}\t{1}\t{2}\t{3}".format(
                        sig_nodes, num_nodes, cotu_basename, otu_to_taxonomy[cotu_basename]) + "\n"
                else:
                    outline = "ERROR\t\t" + file + "\n"
                print outline
                summary_file.write(outline)

    else:
        print 'Not a directory.'

    summary_file.close()
Exemplo n.º 15
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    cOTUs_dir = opts.cOTUs_dir
    results_dir = opts.results_dir
    output_dir = opts.output_dir
    significance_level = float(opts.significance_level)
    taxonomy_fp = opts.taxonomy_fp
    force = opts.force

    from qiime.otu_category_significance import add_fdr_correction_to_results, \
        add_bonferroni_to_results, \
        fdr_correction

    # test input and output dirs

    if opts.taxonomy_fp:
        try:
            taxonomy_file = open(opts.taxonomy_fp, 'Ur')
            otu_to_taxonomy = parse_taxonomy(open(taxonomy_fp, 'Ur'))
            #
        except IOError:
            raise IOError,\
                "Can't open taxonomy file (%s). Does it exist? Do you have read access?"\
                % opts.taxonomy_fp
    else:
        otu_to_taxonomy = None

    if not os.path.isdir(cOTUs_dir):
        print "cOTUs_directory not a directory. Please try again."
        exit(1)

    cOTUs_dir = os.path.abspath(cOTUs_dir)

    if not os.path.isdir(results_dir):
        print "results_directory not a directory. Please try again."
        exit(1)

    results_dir = os.path.abspath(results_dir)

    try:
        os.makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            print "Output directory already exists. Please choose " +\
                "a different directory, or force overwrite with -f."
            exit(1)

    output_dir = os.path.abspath(output_dir)

    # get results dict
    #   results dict is 2D, with key a sequential per-node UID and fields for:
    #   pOTU, uncorrected pval,  taxonomy, plus other results values

    results_list = []
    results_keys = []
    os.chdir(results_dir)
    for file in os.listdir('.'):
        if file.endswith("results.txt"):

            results_list += read_results_file(file)

            if results_keys == []:
                results_keys = read_results_keys(file)

        # do FDR correction
        #   now results dict has FDR and bonfo vals

    p_dict = {}

    # we're making a dict here for all the nodes that have been tested, with the
    # key corresponding to the position in the results_list array of dicts.
    for node in range(len(results_list)):
        pval = float(results_list[node]['p_vals'])
        p_dict[node] = [pval, pval]

    add_fdr_correction_to_results(p_dict)
    add_bonferroni_to_results(p_dict)

    # a previous iteration of the permutation test allowed 0.0 p_vals, which gave
    #'NA' results for highly significant nodes. Retained for legacy purposes.
    p_dict = de_NA(p_dict)

    os.chdir(output_dir)

    print_corrected_results_files(results_list, results_keys, p_dict)

    print_sig_lists(
        results_list, results_keys, p_dict, significance_level, otu_to_taxonomy)