Exemplo n.º 1
0
def ensure_dir(d):
    """
    Check to make sure the supplied directory path does not exist, if so, create it. The
    method catches OSError exceptions and returns a descriptive message instead of
    re-raising the error.

    :type d: str
    :param d: It is the full path to a directory.

    :return: Does not return anything, but creates a directory path if it doesn't exist
             already.
    """
    if not os.path.exists(d):
        try:
            os.makedirs(d)
        except OSError as oe:
            # should not happen with os.makedirs
            # ENOENT: No such file or directory
            if os.errno == errno.ENOENT:
                msg = twdd("""One or more directories in the path ({}) do not exist. If
                           you are specifying a new directory for output, please ensure
                           all other directories in the path currently exist.""")
                return msg.format(d)
            else:
                msg = twdd("""An error occurred trying to create the output directory
                           ({}) with message: {}""")
                return msg.format(d, oe.strerror)
Exemplo n.º 2
0
def ensure_dir(d):
    """
    Check to make sure the supplied directory path does not exist, if so, create it. The
    method catches OSError exceptions and returns a descriptive message instead of
    re-raising the error.

    :type d: str
    :param d: It is the full path to a directory.

    :return: Does not return anything, but creates a directory path if it doesn't exist
             already.
    """
    if not os.path.exists(d):
        try:
            os.makedirs(d)
        except OSError as oe:
            # should not happen with os.makedirs
            # ENOENT: No such file or directory
            if os.errno == errno.ENOENT:
                msg = twdd(
                    """One or more directories in the path ({}) do not exist. If
                           you are specifying a new directory for output, please ensure
                           all other directories in the path currently exist."""
                )
                return msg.format(d)
            else:
                msg = twdd(
                    """An error occurred trying to create the output directory
                           ({}) with message: {}""")
                return msg.format(d, oe.strerror)
Exemplo n.º 3
0
    def setUp(self):
        self.krepA = prep_clark_input(
            io.StringIO(
                twdd(u"""\
            Name,TaxID,Lineage,Count,Proportion_All(%),Proportion_Classified(%)
            Achromobacter xylosoxidans,85698,Bacteria;Proteobacteria;Betaproteobacteria;Burkholderiales;Alcaligenaceae;Achromobacter,82,0.00142317,0.124620061
            Acinetobacter baumannii,470,Bacteria;Proteobacteria;Gammaproteobacteria;Pseudomonadales;Moraxellaceae;Acinetobacter,356,0.00617862,0.541033435
            Actinomyces cardiffensis,181487,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,1,1.74E-05,0.001519757
            Actinomyces dentalis,272548,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,15,0.000260335,0.022796353
            Actinomyces georgiae,52768,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,5,8.68E-05,0.007598784
            Actinomyces gerencseriae,52769,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,12,0.000208268,0.018237082
            Actinomyces israelii,1659,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,93,0.00161408,0.141337386
            Actinomyces johnsonii,544581,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,1,1.74E-05,0.001519757
            Actinomyces massiliensis,461393,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,12,0.000208268,0.018237082
            Actinomyces meyeri,52773,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,81,0.00140581,0.123100304
            UNKNOWN,UNKNOWN,UNKNOWN,658,92.0161,-
            """)))

        self.krepB = prep_clark_input(
            io.StringIO(
                twdd(u"""\
            Name,TaxID,Lineage,Count,Proportion_All(%),Proportion_Classified(%)
            Achromobacter xylosoxidans,85698,Bacteria;Proteobacteria;Betaproteobacteria;Burkholderiales;Alcaligenaceae;Achromobacter,10,0.00142317,0.003241491
            Acinetobacter baumannii,470,Bacteria;Proteobacteria;Gammaproteobacteria;Pseudomonadales;Moraxellaceae;Acinetobacter,200,0.00617862,0.064829822
            Actinomyces viscosus,1656,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,5,8.68E-05,0.001620746
            Aggregatibacter actinomycetemcomitans,714,Bacteria;Proteobacteria;Gammaproteobacteria;Pasteurellales;Pasteurellaceae;Aggregatibacter,212,0.0036794,0.068719611
            Aggregatibacter aphrophilus,732,Bacteria;Proteobacteria;Gammaproteobacteria;Pasteurellales;Pasteurellaceae;Aggregatibacter,2630,0.0456454,0.852512156
            Aggregatibacter segnis,739,Bacteria;Proteobacteria;Gammaproteobacteria;Pasteurellales;Pasteurellaceae;Aggregatibacter,1,1.74E-05,0.000324149
            Agrobacterium fabrum,1176649,Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;Rhizobiaceae;Agrobacterium,1,1.74E-05,0.000324149
            Agrobacterium tumefaciens,358,Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;Rhizobiaceae;Agrobacterium,9,0.000156201,0.002917342
            Alloscardovia omnicolens,419015,Bacteria;Actinobacteria;Actinobacteria;Bifidobacteriales;Bifidobacteriaceae;Alloscardovia,1,1.74E-05,0.000324149
            Anaerococcus prevotii,33034,Bacteria;Firmicutes;Tissierellia;Tissierellales;Peptoniphilaceae;Anaerococcus,3,5.21E-05,0.000972447
            Arsenicicoccus sp. oral taxon 190,1658671,Bacteria;Actinobacteria;Actinobacteria;Micrococcales;Intrasporangiaceae;Arsenicicoccus,5,8.68E-05,0.001620746
            Atopobium parvulum,1382,Bacteria;Actinobacteria;Coriobacteriia;Coriobacteriales;Atopobiaceae;Atopobium,7,0.00012149,0.002269044
            Atopobium rimae,1383,Bacteria;Actinobacteria;Coriobacteriia;Coriobacteriales;Atopobiaceae;Atopobium,1,1.74E-05,0.000324149
            UNKNOWN,UNKNOWN,UNKNOWN,3085,92.0161,-
            """)))

        # parse the sample reports
        self.taxa = OrderedDict()
        self.sample_counts = OrderedDict()

        countsA, taxaA = cb.parse_clark_abundance_tbl(self.krepA)
        countsB, taxaB = cb.parse_clark_abundance_tbl(self.krepB)
        self.taxa.update(taxaA)
        self.taxa.update(taxaB)
        self.sample_counts["A"] = countsA
        self.sample_counts["B"] = countsB

        # create the BIOM table from the sample counts and taxa
        self.biomT = cb.create_biom_table(self.sample_counts, self.taxa)
Exemplo n.º 4
0
def main():
    args = handle_program_options()

    if args.fmt == 'hdf5' and not HAVE_H5PY:
        args.fmt = 'json'
        msg = """\
        Library 'h5py' not found, unable to write BIOM 2.x (HDF5) files.
        Defaulting to BIOM 1.0 (JSON)."""
        print(twdd(msg))

    if ranks.index(args.max) > ranks.index(args.min):
        msg = "ERROR: Max and Min ranks are out of order: {} < {}"
        sys.exit(msg.format(args.max, args.min))

    reports = args.kraken_reports
    if args.kraken_reports_fp:
        reports += [str(p) for p in Path(args.kraken_reports_fp).glob('*')]

    # load all kraken-report files and parse them
    sample_counts, taxa = process_samples(reports,
                                          max_rank=args.max,
                                          min_rank=args.min)

    # Make sample metadata. Reads the givin file or
    # make simple dummy metadata.
    sample_metadata = process_metadata(sample_counts, args.metadata)

    # create new BIOM table from sample counts and taxon ids
    # add taxonomy strings to row (taxon) metadata
    biomT = create_biom_table(sample_counts, taxa, sample_metadata)

    out_fp = write_biom(biomT, args.output_fp, args.fmt, args.gzip)

    if args.otu_fp:
        try:
            write_otu_file(list(taxa), args.otu_fp)
        except RuntimeError as re:
            msg = "ERROR creating OTU file: \n\t{}"
            sys.exit(msg.format(re))

    if args.verbose:
        print("".format(out_fp))
        table_str = """\
        BIOM-format table written to: {out_fp}
        Table contains {rows} rows (OTUs) and {cols} columns (Samples)
        and is {density:.1%} dense.""".format(out_fp=out_fp, 
                                              rows=biomT.shape[0], 
                                              cols=biomT.shape[1],
                                              density=biomT.get_table_density())
        print(twdd(table_str))
Exemplo n.º 5
0
    def setUp(self):
        self.crepA = twdd(u"""\
            Name,TaxID,Lineage,Count,Proportion_All(%),Proportion_Classified(%)
            Achromobacter xylosoxidans,85698,Bacteria;Proteobacteria;Betaproteobacteria;Burkholderiales;Alcaligenaceae;Achromobacter,82,0.00142317,0.124620061
            Acinetobacter baumannii,470,Bacteria;Proteobacteria;Gammaproteobacteria;Pseudomonadales;Moraxellaceae;Acinetobacter,356,0.00617862,0.541033435
            Actinomyces cardiffensis,181487,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,1,1.74E-05,0.001519757
            Actinomyces dentalis,272548,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,15,0.000260335,0.022796353
            Actinomyces georgiae,52768,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,5,8.68E-05,0.007598784
            Actinomyces gerencseriae,52769,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,12,0.000208268,0.018237082
            Actinomyces israelii,1659,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,93,0.00161408,0.141337386
            Actinomyces johnsonii,544581,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,1,1.74E-05,0.001519757
            Actinomyces massiliensis,461393,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,12,0.000208268,0.018237082
            Actinomyces meyeri,52773,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,81,0.00140581,0.123100304
            UNKNOWN,UNKNOWN,UNKNOWN,658,92.0161,-
            """).encode("utf-8")

        self.crepB = twdd(u"""\
            Name,TaxID,Lineage,Count,Proportion_All(%),Proportion_Classified(%)
            Achromobacter xylosoxidans,85698,Bacteria;Proteobacteria;Betaproteobacteria;Burkholderiales;Alcaligenaceae;Achromobacter,10,0.00142317,0.003241491
            Acinetobacter baumannii,470,Bacteria;Proteobacteria;Gammaproteobacteria;Pseudomonadales;Moraxellaceae;Acinetobacter,200,0.00617862,0.064829822
            Actinomyces viscosus,1656,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,5,8.68E-05,0.001620746
            Aggregatibacter actinomycetemcomitans,714,Bacteria;Proteobacteria;Gammaproteobacteria;Pasteurellales;Pasteurellaceae;Aggregatibacter,212,0.0036794,0.068719611
            Aggregatibacter aphrophilus,732,Bacteria;Proteobacteria;Gammaproteobacteria;Pasteurellales;Pasteurellaceae;Aggregatibacter,2630,0.0456454,0.852512156
            Aggregatibacter segnis,739,Bacteria;Proteobacteria;Gammaproteobacteria;Pasteurellales;Pasteurellaceae;Aggregatibacter,1,1.74E-05,0.000324149
            Agrobacterium fabrum,1176649,Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;Rhizobiaceae;Agrobacterium,1,1.74E-05,0.000324149
            Agrobacterium tumefaciens,358,Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;Rhizobiaceae;Agrobacterium,9,0.000156201,0.002917342
            Alloscardovia omnicolens,419015,Bacteria;Actinobacteria;Actinobacteria;Bifidobacteriales;Bifidobacteriaceae;Alloscardovia,1,1.74E-05,0.000324149
            Anaerococcus prevotii,33034,Bacteria;Firmicutes;Tissierellia;Tissierellales;Peptoniphilaceae;Anaerococcus,3,5.21E-05,0.000972447
            Arsenicicoccus sp. oral taxon 190,1658671,Bacteria;Actinobacteria;Actinobacteria;Micrococcales;Intrasporangiaceae;Arsenicicoccus,5,8.68E-05,0.001620746
            Atopobium parvulum,1382,Bacteria;Actinobacteria;Coriobacteriia;Coriobacteriales;Atopobiaceae;Atopobium,7,0.00012149,0.002269044
            Atopobium rimae,1383,Bacteria;Actinobacteria;Coriobacteriia;Coriobacteriales;Atopobiaceae;Atopobium,1,1.74E-05,0.000324149
            UNKNOWN,UNKNOWN,UNKNOWN,3085,92.0161,-
            """).encode("utf-8")

        # create temp files containing the above clark results
        tempf_crepA = tempfile.NamedTemporaryFile(delete=False)
        tempf_crepA.write(self.crepA)
        tempf_crepA.close()

        tempf_crepB = tempfile.NamedTemporaryFile(delete=False)
        tempf_crepB.write(self.crepB)
        tempf_crepB.close()

        self.fps = [tempf_crepA.name, tempf_crepB.name]
        self.fnames = [osp.split(fp)[1] for fp in self.fps]

        self.sample_counts, self.taxa = cb.process_samples(self.fps,
                                                           store_pct=False)
Exemplo n.º 6
0
    def get(self, request):
        context = {}

        # PREFIX P: <{OM.iri}>
        example_query = twdd(f"""
        # example query: select all possible tags

        PREFIX P: <https://ackrep.org/draft/ocse-prototype01#>
        SELECT ?entity
        WHERE {{
          ?entity rdf:type ?type.
          ?type rdfs:subClassOf* P:OCSE_Entity.
        }}
        """)
        qsrc = context["query"] = request.GET.get("query", example_query)

        try:
            ackrep_entities, onto_entities = core.AOM.run_sparql_query_and_translate_result(
                qsrc)
        except Exception as e:
            context["err"] = f"The following error occurred: {str(e)}"
            ackrep_entities, onto_entities = [], []

        context["ackrep_entities"] = ackrep_entities
        context["onto_entities"] = onto_entities
        context["c"] = util.Container(
        )  # this could be used for further options

        return TemplateResponse(request, "ackrep_web/search_sparql.html",
                                context)
Exemplo n.º 7
0
def main():
    args = handle_program_options()

    if args.fmt == 'hdf5' and not HAVE_H5PY:
        args.fmt = 'json'
        msg = """\
        Library 'h5py' not found, unable to write BIOM 2.x (HDF5) files.
        Defaulting to BIOM 1.0 (JSON)."""
        print(twdd(msg))

    if ranks.index(args.max) > ranks.index(args.min):
        msg = "ERROR: Max and Min ranks are out of order: {} < {}"
        sys.exit(msg.format(args.max, args.min))

    reports = args.kraken_reports
    if args.kraken_reports_fp:
        reports += [str(p) for p in Path(args.kraken_reports_fp).glob('*')]

    # load all kraken-report files and parse them
    sample_counts, taxa = process_samples(reports, 
                                          max_rank=args.max, 
                                          min_rank=args.min)

    # create new BIOM table from sample counts and taxon ids
    # add taxonomy strings to row (taxon) metadata
    biomT = create_biom_table(sample_counts, taxa)

    out_fp = write_biom(biomT, args.output_fp, args.fmt, args.gzip)

    if args.otu_fp:
        try:
            write_otu_file(list(taxa), args.otu_fp)
        except RuntimeError as re:
            msg = "ERROR creating OTU file: \n\t{}"
            sys.exit(msg.format(re))

    if args.verbose:
        print("".format(out_fp))
        table_str = """\
        BIOM-format table written to: {out_fp}
        Table contains {rows} rows (OTUs) and {cols} columns (Samples)
        and is {density:.1%} dense.""".format(out_fp=out_fp, 
                                              rows=biomT.shape[0], 
                                              cols=biomT.shape[1],
                                              density=biomT.get_table_density())
        print(twdd(table_str))
Exemplo n.º 8
0
def main():
    args = handle_program_options()

    if args.fmt == 'hdf5' and not HAVE_H5PY:
        args.fmt = 'json'
        msg = """\
        Library 'h5py' not found, unable to write BIOM 2.x (HDF5) files.
        Defaulting to BIOM 1.0 (JSON)."""
        print(twdd(msg))

    # load all abundance table files and parse them
    sample_counts, taxa = process_samples(args.clark_abd_tbls, 
                                          store_pct=args.store_pct)

    # create new BIOM table from sample counts and taxon ids
    # add taxonomy strings to row (taxon) metadata
    biomT = create_biom_table(sample_counts, taxa)

    out_fp = write_biom(biomT, args.output_fp, args.fmt, args.gzip)

    if args.otu_fp:
        try:
            write_otu_file(list(taxa), args.otu_fp)
        except RuntimeError as re:
            msg = "ERROR creating OTU file: \n\t{}"
            sys.exit(msg.format(re))

    if args.verbose:
        print("".format(out_fp))
        table_str = """\
        BIOM-format table written to: {out_fp}
        Table contains {rows} rows (OTUs) and {cols} columns (Samples)
        and is {density:.1%} dense.""".format(out_fp=out_fp, 
                                              rows=biomT.shape[0], 
                                              cols=biomT.shape[1],
                                              density=biomT.get_table_density())
        print(twdd(table_str))
Exemplo n.º 9
0
 def setUp(self):
     self.sample_clark_rep = prep_clark_input(
         io.StringIO(
             twdd(u"""\
         Name,TaxID,Lineage,Count,Proportion_All(%),Proportion_Classified(%)
         Achromobacter xylosoxidans,85698,Bacteria;Proteobacteria;Betaproteobacteria;Burkholderiales;Alcaligenaceae;Achromobacter,82,0.00142317,0.124620061
         Acinetobacter baumannii,470,Bacteria;Proteobacteria;Gammaproteobacteria;Pseudomonadales;Moraxellaceae;Acinetobacter,356,0.00617862,0.541033435
         Actinomyces cardiffensis,181487,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,1,1.74E-05,0.001519757
         Actinomyces dentalis,272548,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,15,0.000260335,0.022796353
         Actinomyces georgiae,52768,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,5,8.68E-05,0.007598784
         Actinomyces gerencseriae,52769,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,12,0.000208268,0.018237082
         Actinomyces israelii,1659,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,93,0.00161408,0.141337386
         Actinomyces johnsonii,544581,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,1,1.74E-05,0.001519757
         Actinomyces massiliensis,461393,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,12,0.000208268,0.018237082
         Actinomyces meyeri,52773,Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Actinomycetaceae;Actinomyces,81,0.00140581,0.123100304
         UNKNOWN,UNKNOWN,UNKNOWN,658,92.0161,-
         """)))
Exemplo n.º 10
0
    def setUp(self):
        self.krepA = prep_kraken_input(io.StringIO(twdd(u"""\
            100.00	6783846	6783846	U	0	unclassified
            0.00	130	18	-	1	root
            0.00	105	0	-	131567	  cellular organisms
            0.00	105	0	D	2	    Bacteria
            0.00	62	30	P	1239	      Firmicutes
            0.00	29	0	C	91061	        Bacilli
            0.00	29	0	O	186826	          Lactobacillales
            0.00	20	0	F	1300	            Streptococcaceae
            0.00	20	8	G	1301	              Streptococcus
            0.00	5	4	S	1304	                Streptococcus salivarius
            0.00	1	1	-	1048332	                  Streptococcus salivarius CCHSS3
            0.00	3	0	S	1303	                Streptococcus oralis
            0.00	3	3	-	927666	                  Streptococcus oralis Uo5
            0.00	2	2	S	1318	                Streptococcus parasanguinis
            0.00	1	0	S	1305	                Streptococcus sanguinis
            0.00	1	1	-	388919	                  Streptococcus sanguinis SK36
            0.00	1	1	S	1308	                Streptococcus thermophilus
            0.00	9	0	F	81852	            Enterococcaceae
            0.00	9	0	G	1350	              Enterococcus
            0.00	9	0	S	37734	                Enterococcus casseliflavus
            0.00	9	9	-	565655	                  Enterococcus casseliflavus EC20
            0.00	1	0	C	186801	        Clostridia
            0.00	1	1	O	186802	          Clostridiales
            0.00	40	0	P	1224	      Proteobacteria
            0.00	20	2	C	1236	        Gammaproteobacteria
            0.00	11	1	O	135622	          Alteromonadales
            0.00	10	0	F	72275	            Alteromonadaceae
            0.00	10	0	G	226	              Alteromonas
            0.00	10	0	S	28108	                Alteromonas macleodii
            0.00	10	10	-	1300257	                  Alteromonas macleodii str. 'Ionian Sea U8'
            0.00	7	0	O	91347	          Enterobacteriales
            0.00	7	6	F	543	            Enterobacteriaceae
            0.00	1	0	G	561	              Escherichia
            0.00	1	1	S	562	                Escherichia coli
            0.00	20	5	C	28216	        Betaproteobacteria
            0.00	15	0	O	80840	          Burkholderiales
            0.00	11	0	F	119060	            Burkholderiaceae
            0.00	11	0	G	48736	              Ralstonia
            0.00	11	10	S	329	                Ralstonia pickettii
            0.00	1	1	-	428406	                  Ralstonia pickettii 12D
            0.00	4	0	F	80864	            Comamonadaceae
            0.00	2	2	G	12916	              Acidovorax
            0.00	2	0	G	201096	              Alicycliphilus
            0.00	2	0	S	179636	                Alicycliphilus denitrificans
            0.00	2	2	-	596154	                  Alicycliphilus denitrificans K601
            0.00	3	0	P	201174	      Actinobacteria
            0.00	3	0	C	1760	        Actinobacteria
            0.00	2	0	-	84998	          Coriobacteridae
            0.00	2	0	O	84999	            Coriobacteriales
            0.00	2	0	-	255727	              Coriobacterineae
            0.00	2	0	F	84107	                Coriobacteriaceae
            0.00	2	0	G	1380	                  Atopobium
            0.00	2	0	S	1382	                    Atopobium parvulum
            0.00	2	2	-	521095	                      Atopobium parvulum DSM 20469
            0.00	1	0	-	85003	          Actinobacteridae
            0.00	1	0	O	2037	            Actinomycetales
            0.00	1	0	-	85009	              Propionibacterineae
            0.00	1	0	F	31957	                Propionibacteriaceae
            0.00	1	0	G	1743	                  Propionibacterium
            0.00	1	1	S	1747	                    Propionibacterium acnes
            0.00	20	0	D	2157	    Archaea
            0.00	20	7	P	28890	      Euryarchaeota
            0.00	8	0	C	183963	        Halobacteria
            0.00	8	0	O	1644060	          Natrialbales
            0.00	8	0	F	1644061	            Natrialbaceae
            0.00	8	0	G	29287	              Natronococcus
            0.00	8	0	S	29288	                Natronococcus occultus
            0.00	8	8	-	694430	                  Natronococcus occultus SP4
            0.00	4	0	C	224756	        Methanomicrobia
            0.00	4	0	O	94695	          Methanosarcinales
            0.00	4	0	F	2206	            Methanosarcinaceae
            0.00	3	0	G	101191	              Methanomethylovorans
            0.00	3	0	S	101192	                Methanomethylovorans hollandica
            0.00	3	3	-	867904	                  Methanomethylovorans hollandica DSM 15978
            0.00	1	0	G	2207	              Methanosarcina
            0.00	1	0	S	2214	                Methanosarcina acetivorans
            0.00	1	1	-	188937	                  Methanosarcina acetivorans C2A
            0.00	1	0	C	183939	        Methanococci
            0.00	1	0	O	2182	          Methanococcales
            0.00	1	0	F	2183	            Methanococcaceae
            0.00	1	0	G	2184	              Methanococcus
            0.00	1	0	S	2188	                Methanococcus voltae
            0.00	1	1	-	456320	                  Methanococcus voltae A3
            0.00	7	0	D	10239	  Viruses
            0.00	2	0	-	29258	    ssDNA viruses
            0.00	2	0	F	10841	      Microviridae
            0.00	2	0	G	10842	        Microvirus
            0.00	2	2	S	374840	          Enterobacteria phage phiX174 sensu lato
            0.00	2	0	-	35237	    dsDNA viruses, no RNA stage
            0.00	2	0	F	10482	      Polydnaviridae
            0.00	2	0	G	10483	        Ichnovirus
            0.00	2	2	S	265522	          Hyposoter fugitivus ichnovirus
            0.00	2	0	-	439488	    ssRNA viruses
            0.00	2	0	-	35278	      ssRNA positive-strand viruses, no DNA stage
            0.00	2	0	F	11018	        Togaviridae
            0.00	2	0	G	11019	          Alphavirus
            0.00	2	0	-	177872	            VEEV complex
            0.00	2	2	S	11036	              Venezuelan equine encephalitis virus
            0.00	1	0	-	35268	    Retro-transcribing viruses
            0.00	1	0	F	11632	      Retroviridae
            0.00	1	0	-	35276	        unclassified Retroviridae
            0.00	1	0	-	206037	          Human endogenous retroviruses
            0.00	1	0	S	45617	            Human endogenous retrovirus K
            0.00	1	1	-	166122	              Human endogenous retrovirus K113
            """)))
        
        self.krepB = prep_kraken_input(io.StringIO(twdd(u"""\
            100.00	6783846	6783846	U	0	unclassified
            0.00	130	18	-	1	root
            0.00	105	0	-	131567	  cellular organisms
            0.00	105	0	D	2	    Bacteria
            0.00	62	30	P	1239	      Firmicutes
            0.00	29	0	C	91061	        Bacilli
            0.00	29	0	O	186826	          Lactobacillales
            0.00	20	0	F	1300	            Streptococcaceae
            0.00	20	8	G	1301	              Streptococcus
            0.00	5	4	S	1304	                Streptococcus salivarius
            0.00	1	1	-	1048332	                  Streptococcus salivarius CCHSS3
            0.00	3	0	S	1303	                Streptococcus oralis
            0.00	3	3	-	927666	                  Streptococcus oralis Uo5
            0.00	2	2	S	1318	                Streptococcus parasanguinis
            0.00	1	0	S	1305	                Streptococcus sanguinis
            0.00	1	1	-	388919	                  Streptococcus sanguinis SK36
            0.00	1	1	S	1308	                Streptococcus thermophilus
            0.00	9	0	F	81852	            Enterococcaceae
            0.00	9	0	G	1350	              Enterococcus
            0.00	9	0	S	37734	                Enterococcus casseliflavus
            0.00	9	9	-	565655	                  Enterococcus casseliflavus EC20
            0.00	2	0	C	909932	        Negativicutes
            0.00	2	0	O	909929	          Selenomonadales
            0.00	2	0	F	31977	            Veillonellaceae
            0.00	2	0	G	29465	              Veillonella
            0.00	2	0	S	29466	                Veillonella parvula
            0.00	2	2	-	479436	                  Veillonella parvula DSM 2008
            0.00	20	0	D	2157	    Archaea
            0.00	20	7	P	28890	      Euryarchaeota
            0.00	8	0	C	183963	        Halobacteria
            0.00	8	0	O	1644060	          Natrialbales
            0.00	8	0	F	1644061	            Natrialbaceae
            0.00	8	0	G	29287	              Natronococcus
            0.00	8	0	S	29288	                Natronococcus occultus
            0.00	8	8	-	694430	                  Natronococcus occultus SP4
            0.00	7	0	D	10239	  Viruses
            0.00	2	0	-	29258	    ssDNA viruses
            0.00	2	0	F	10841	      Microviridae
            0.00	2	0	G	10842	        Microvirus
            0.00	2	2	S	374840	          Enterobacteria phage phiX174 sensu lato
            0.00	2	0	-	35237	    dsDNA viruses, no RNA stage
            0.00	2	0	F	10482	      Polydnaviridae
            0.00	2	0	G	10483	        Ichnovirus
            0.00	2	2	S	265522	          Hyposoter fugitivus ichnovirus
            """)))

        # parse the sample reports
        self.taxa = OrderedDict()
        self.sample_counts = OrderedDict()

        countsA, taxaA = kb.parse_kraken_report(self.krepA, max_rank="O", 
                                                            min_rank="S")
        countsB, taxaB = kb.parse_kraken_report(self.krepB, max_rank="O", 
                                                            min_rank="S")
        self.taxa.update(taxaA)
        self.taxa.update(taxaB)
        self.sample_counts["A"] = countsA
        self.sample_counts["B"] = countsB

        # Make the dummy metadata for the samples.
        self.metadata =kb.process_metadata(self.sample_counts ,None)

        # create the BIOM table from the sample counts and taxa
        self.biomT = kb.create_biom_table(self.sample_counts, self.taxa,self.metadata)
Exemplo n.º 11
0
    def setUp(self):
        self.krepA = twdd(u"""\
            100.00	6783846	6783846	U	0	unclassified
            0.00	130	18	-	1	root
            0.00	105	0	-	131567	  cellular organisms
            0.00	105	0	D	2	    Bacteria
            0.00	62	30	P	1239	      Firmicutes
            0.00	29	0	C	91061	        Bacilli
            0.00	29	0	O	186826	          Lactobacillales
            0.00	20	0	F	1300	            Streptococcaceae
            0.00	20	8	G	1301	              Streptococcus
            0.00	5	4	S	1304	                Streptococcus salivarius
            0.00	1	1	-	1048332	                  Streptococcus salivarius CCHSS3
            0.00	3	0	S	1303	                Streptococcus oralis
            0.00	3	3	-	927666	                  Streptococcus oralis Uo5
            0.00	2	2	S	1318	                Streptococcus parasanguinis
            0.00	1	0	S	1305	                Streptococcus sanguinis
            0.00	1	1	-	388919	                  Streptococcus sanguinis SK36
            0.00	1	1	S	1308	                Streptococcus thermophilus
            0.00	9	0	F	81852	            Enterococcaceae
            0.00	9	0	G	1350	              Enterococcus
            0.00	9	0	S	37734	                Enterococcus casseliflavus
            0.00	9	9	-	565655	                  Enterococcus casseliflavus EC20
            0.00	1	0	C	186801	        Clostridia
            0.00	1	1	O	186802	          Clostridiales
            0.00	40	0	P	1224	      Proteobacteria
            0.00	20	2	C	1236	        Gammaproteobacteria
            0.00	11	1	O	135622	          Alteromonadales
            0.00	10	0	F	72275	            Alteromonadaceae
            0.00	10	0	G	226	              Alteromonas
            0.00	10	0	S	28108	                Alteromonas macleodii
            0.00	10	10	-	1300257	                  Alteromonas macleodii str. 'Ionian Sea U8'
            0.00	7	0	O	91347	          Enterobacteriales
            0.00	7	6	F	543	            Enterobacteriaceae
            0.00	1	0	G	561	              Escherichia
            0.00	1	1	S	562	                Escherichia coli
            0.00	20	5	C	28216	        Betaproteobacteria
            0.00	15	0	O	80840	          Burkholderiales
            0.00	11	0	F	119060	            Burkholderiaceae
            0.00	11	0	G	48736	              Ralstonia
            0.00	11	10	S	329	                Ralstonia pickettii
            0.00	1	1	-	428406	                  Ralstonia pickettii 12D
            0.00	4	0	F	80864	            Comamonadaceae
            0.00	2	2	G	12916	              Acidovorax
            0.00	2	0	G	201096	              Alicycliphilus
            0.00	2	0	S	179636	                Alicycliphilus denitrificans
            0.00	2	2	-	596154	                  Alicycliphilus denitrificans K601
            0.00	3	0	P	201174	      Actinobacteria
            0.00	3	0	C	1760	        Actinobacteria
            0.00	2	0	-	84998	          Coriobacteridae
            0.00	2	0	O	84999	            Coriobacteriales
            0.00	2	0	-	255727	              Coriobacterineae
            0.00	2	0	F	84107	                Coriobacteriaceae
            0.00	2	0	G	1380	                  Atopobium
            0.00	2	0	S	1382	                    Atopobium parvulum
            0.00	2	2	-	521095	                      Atopobium parvulum DSM 20469
            0.00	1	0	-	85003	          Actinobacteridae
            0.00	1	0	O	2037	            Actinomycetales
            0.00	1	0	-	85009	              Propionibacterineae
            0.00	1	0	F	31957	                Propionibacteriaceae
            0.00	1	0	G	1743	                  Propionibacterium
            0.00	1	1	S	1747	                    Propionibacterium acnes
            0.00	20	0	D	2157	    Archaea
            0.00	20	7	P	28890	      Euryarchaeota
            0.00	8	0	C	183963	        Halobacteria
            0.00	8	0	O	1644060	          Natrialbales
            0.00	8	0	F	1644061	            Natrialbaceae
            0.00	8	0	G	29287	              Natronococcus
            0.00	8	0	S	29288	                Natronococcus occultus
            0.00	8	8	-	694430	                  Natronococcus occultus SP4
            0.00	4	0	C	224756	        Methanomicrobia
            0.00	4	0	O	94695	          Methanosarcinales
            0.00	4	0	F	2206	            Methanosarcinaceae
            0.00	3	0	G	101191	              Methanomethylovorans
            0.00	3	0	S	101192	                Methanomethylovorans hollandica
            0.00	3	3	-	867904	                  Methanomethylovorans hollandica DSM 15978
            0.00	1	0	G	2207	              Methanosarcina
            0.00	1	0	S	2214	                Methanosarcina acetivorans
            0.00	1	1	-	188937	                  Methanosarcina acetivorans C2A
            0.00	1	0	C	183939	        Methanococci
            0.00	1	0	O	2182	          Methanococcales
            0.00	1	0	F	2183	            Methanococcaceae
            0.00	1	0	G	2184	              Methanococcus
            0.00	1	0	S	2188	                Methanococcus voltae
            0.00	1	1	-	456320	                  Methanococcus voltae A3
            0.00	7	0	D	10239	  Viruses
            0.00	2	0	-	29258	    ssDNA viruses
            0.00	2	0	F	10841	      Microviridae
            0.00	2	0	G	10842	        Microvirus
            0.00	2	2	S	374840	          Enterobacteria phage phiX174 sensu lato
            0.00	2	0	-	35237	    dsDNA viruses, no RNA stage
            0.00	2	0	F	10482	      Polydnaviridae
            0.00	2	0	G	10483	        Ichnovirus
            0.00	2	2	S	265522	          Hyposoter fugitivus ichnovirus
            0.00	2	0	-	439488	    ssRNA viruses
            0.00	2	0	-	35278	      ssRNA positive-strand viruses, no DNA stage
            0.00	2	0	F	11018	        Togaviridae
            0.00	2	0	G	11019	          Alphavirus
            0.00	2	0	-	177872	            VEEV complex
            0.00	2	2	S	11036	              Venezuelan equine encephalitis virus
            0.00	1	0	-	35268	    Retro-transcribing viruses
            0.00	1	0	F	11632	      Retroviridae
            0.00	1	0	-	35276	        unclassified Retroviridae
            0.00	1	0	-	206037	          Human endogenous retroviruses
            0.00	1	0	S	45617	            Human endogenous retrovirus K
            0.00	1	1	-	166122	              Human endogenous retrovirus K113
            """).encode("utf-8")

        self.krepB = twdd(u"""\
            100.00	6783846	6783846	U	0	unclassified
            0.00	130	18	-	1	root
            0.00	105	0	-	131567	  cellular organisms
            0.00	105	0	D	2	    Bacteria
            0.00	62	30	P	1239	      Firmicutes
            0.00	29	0	C	91061	        Bacilli
            0.00	29	0	O	186826	          Lactobacillales
            0.00	20	0	F	1300	            Streptococcaceae
            0.00	20	8	G	1301	              Streptococcus
            0.00	5	4	S	1304	                Streptococcus salivarius
            0.00	1	1	-	1048332	                  Streptococcus salivarius CCHSS3
            0.00	3	0	S	1303	                Streptococcus oralis
            0.00	3	3	-	927666	                  Streptococcus oralis Uo5
            0.00	2	2	S	1318	                Streptococcus parasanguinis
            0.00	1	0	S	1305	                Streptococcus sanguinis
            0.00	1	1	-	388919	                  Streptococcus sanguinis SK36
            0.00	1	1	S	1308	                Streptococcus thermophilus
            0.00	9	0	F	81852	            Enterococcaceae
            0.00	9	0	G	1350	              Enterococcus
            0.00	9	0	S	37734	                Enterococcus casseliflavus
            0.00	9	9	-	565655	                  Enterococcus casseliflavus EC20
            0.00	1	0	C	186801	        Clostridia
            0.00	1	1	O	186802	          Clostridiales
            0.00	40	0	P	1224	      Proteobacteria
            0.00	20	2	C	1236	        Gammaproteobacteria
            0.00	11	1	O	135622	          Alteromonadales
            0.00	10	0	F	72275	            Alteromonadaceae
            0.00	10	0	G	226	              Alteromonas
            0.00	10	0	S	28108	                Alteromonas macleodii
            0.00	10	10	-	1300257	                  Alteromonas macleodii str. 'Ionian Sea U8'
            0.00	7	0	O	91347	          Enterobacteriales
            0.00	7	6	F	543	            Enterobacteriaceae
            0.00	1	0	G	561	              Escherichia
            0.00	1	1	S	562	                Escherichia coli
            0.00	20	5	C	28216	        Betaproteobacteria
            0.00	15	0	O	80840	          Burkholderiales
            0.00	11	0	F	119060	            Burkholderiaceae
            0.00	11	0	G	48736	              Ralstonia
            0.00	11	10	S	329	                Ralstonia pickettii
            0.00	1	1	-	428406	                  Ralstonia pickettii 12D
            0.00	4	0	F	80864	            Comamonadaceae
            0.00	2	2	G	12916	              Acidovorax
            0.00	2	0	G	201096	              Alicycliphilus
            0.00	2	0	S	179636	                Alicycliphilus denitrificans
            0.00	2	2	-	596154	                  Alicycliphilus denitrificans K601
            0.00	3	0	P	201174	      Actinobacteria
            0.00	3	0	C	1760	        Actinobacteria
            0.00	2	0	-	84998	          Coriobacteridae
            0.00	2	0	O	84999	            Coriobacteriales
            0.00	2	0	-	255727	              Coriobacterineae
            0.00	2	0	F	84107	                Coriobacteriaceae
            0.00	2	0	G	1380	                  Atopobium
            0.00	2	0	S	1382	                    Atopobium parvulum
            0.00	2	2	-	521095	                      Atopobium parvulum DSM 20469
            0.00	1	0	-	85003	          Actinobacteridae
            0.00	1	0	O	2037	            Actinomycetales
            0.00	1	0	-	85009	              Propionibacterineae
            0.00	1	0	F	31957	                Propionibacteriaceae
            0.00	1	0	G	1743	                  Propionibacterium
            0.00	1	1	S	1747	                    Propionibacterium acnes
            0.00	20	0	D	2157	    Archaea
            0.00	20	7	P	28890	      Euryarchaeota
            0.00	8	0	C	183963	        Halobacteria
            0.00	8	0	O	1644060	          Natrialbales
            0.00	8	0	F	1644061	            Natrialbaceae
            0.00	8	0	G	29287	              Natronococcus
            0.00	8	0	S	29288	                Natronococcus occultus
            0.00	8	8	-	694430	                  Natronococcus occultus SP4
            0.00	4	0	C	224756	        Methanomicrobia
            0.00	4	0	O	94695	          Methanosarcinales
            0.00	4	0	F	2206	            Methanosarcinaceae
            0.00	3	0	G	101191	              Methanomethylovorans
            0.00	3	0	S	101192	                Methanomethylovorans hollandica
            0.00	3	3	-	867904	                  Methanomethylovorans hollandica DSM 15978
            0.00	1	0	G	2207	              Methanosarcina
            0.00	1	0	S	2214	                Methanosarcina acetivorans
            0.00	1	1	-	188937	                  Methanosarcina acetivorans C2A
            0.00	1	0	C	183939	        Methanococci
            0.00	1	0	O	2182	          Methanococcales
            0.00	1	0	F	2183	            Methanococcaceae
            0.00	1	0	G	2184	              Methanococcus
            0.00	1	0	S	2188	                Methanococcus voltae
            0.00	1	1	-	456320	                  Methanococcus voltae A3
            0.00	7	0	D	10239	  Viruses
            0.00	2	0	-	29258	    ssDNA viruses
            0.00	2	0	F	10841	      Microviridae
            0.00	2	0	G	10842	        Microvirus
            0.00	2	2	S	374840	          Enterobacteria phage phiX174 sensu lato
            0.00	2	0	-	35237	    dsDNA viruses, no RNA stage
            0.00	2	0	F	10482	      Polydnaviridae
            0.00	2	0	G	10483	        Ichnovirus
            0.00	2	2	S	265522	          Hyposoter fugitivus ichnovirus
            0.00	2	0	-	439488	    ssRNA viruses
            0.00	2	0	-	35278	      ssRNA positive-strand viruses, no DNA stage
            0.00	2	0	F	11018	        Togaviridae
            0.00	2	0	G	11019	          Alphavirus
            0.00	2	0	-	177872	            VEEV complex
            0.00	2	2	S	11036	              Venezuelan equine encephalitis virus
            """).encode("utf-8")

        # create temp files containing the above kraken results
        tempf_krepA = tempfile.NamedTemporaryFile()
        tempf_krepA.write(self.krepA)

        tempf_krepB = tempfile.NamedTemporaryFile()
        tempf_krepB.write(self.krepB)

        self.fps = [tempf_krepA.name, tempf_krepB.name]
        self.fnames = [osp.split(fp)[1] for fp in self.fps]

        self.sample_counts, self.taxa = kb.process_samples(self.fps, 
                                                           max_rank="O", 
                                                           min_rank="S")
Exemplo n.º 12
0
 def setUp(self):
     self.sample_kraken_rep = prep_kraken_input(io.StringIO(twdd(u"""\
         100.00	6783846	6783846	U	0	unclassified
         0.00	130	18	-	1	root
         0.00	105	0	-	131567	  cellular organisms
         0.00	105	0	D	2	    Bacteria
         0.00	62	30	P	1239	      Firmicutes
         0.00	29	0	C	91061	        Bacilli
         0.00	29	0	O	186826	          Lactobacillales
         0.00	20	0	F	1300	            Streptococcaceae
         0.00	20	8	G	1301	              Streptococcus
         0.00	5	4	S	1304	                Streptococcus salivarius
         0.00	1	1	-	1048332	                  Streptococcus salivarius CCHSS3
         0.00	3	0	S	1303	                Streptococcus oralis
         0.00	3	3	-	927666	                  Streptococcus oralis Uo5
         0.00	2	2	S	1318	                Streptococcus parasanguinis
         0.00	1	0	S	1305	                Streptococcus sanguinis
         0.00	1	1	-	388919	                  Streptococcus sanguinis SK36
         0.00	1	1	S	1308	                Streptococcus thermophilus
         0.00	9	0	F	81852	            Enterococcaceae
         0.00	9	0	G	1350	              Enterococcus
         0.00	9	0	S	37734	                Enterococcus casseliflavus
         0.00	9	9	-	565655	                  Enterococcus casseliflavus EC20
         0.00	2	0	C	909932	        Negativicutes
         0.00	2	0	O	909929	          Selenomonadales
         0.00	2	0	F	31977	            Veillonellaceae
         0.00	2	0	G	29465	              Veillonella
         0.00	2	0	S	29466	                Veillonella parvula
         0.00	2	2	-	479436	                  Veillonella parvula DSM 2008
         0.00	1	0	C	186801	        Clostridia
         0.00	1	1	O	186802	          Clostridiales
         0.00	40	0	P	1224	      Proteobacteria
         0.00	20	2	C	1236	        Gammaproteobacteria
         0.00	11	1	O	135622	          Alteromonadales
         0.00	10	0	F	72275	            Alteromonadaceae
         0.00	10	0	G	226	              Alteromonas
         0.00	10	0	S	28108	                Alteromonas macleodii
         0.00	10	10	-	1300257	                  Alteromonas macleodii str. 'Ionian Sea U8'
         0.00	7	0	O	91347	          Enterobacteriales
         0.00	7	6	F	543	            Enterobacteriaceae
         0.00	1	0	G	561	              Escherichia
         0.00	1	1	S	562	                Escherichia coli
         0.00	20	5	C	28216	        Betaproteobacteria
         0.00	15	0	O	80840	          Burkholderiales
         0.00	11	0	F	119060	            Burkholderiaceae
         0.00	11	0	G	48736	              Ralstonia
         0.00	11	10	S	329	                Ralstonia pickettii
         0.00	1	1	-	428406	                  Ralstonia pickettii 12D
         0.00	4	0	F	80864	            Comamonadaceae
         0.00	2	2	G	12916	              Acidovorax
         0.00	2	0	G	201096	              Alicycliphilus
         0.00	2	0	S	179636	                Alicycliphilus denitrificans
         0.00	2	2	-	596154	                  Alicycliphilus denitrificans K601
         0.00	3	0	P	201174	      Actinobacteria
         0.00	3	0	C	1760	        Actinobacteria
         0.00	2	0	-	84998	          Coriobacteridae
         0.00	2	0	O	84999	            Coriobacteriales
         0.00	2	0	-	255727	              Coriobacterineae
         0.00	2	0	F	84107	                Coriobacteriaceae
         0.00	2	0	G	1380	                  Atopobium
         0.00	2	0	S	1382	                    Atopobium parvulum
         0.00	2	2	-	521095	                      Atopobium parvulum DSM 20469
         0.00	1	0	-	85003	          Actinobacteridae
         0.00	1	0	O	2037	            Actinomycetales
         0.00	1	0	-	85009	              Propionibacterineae
         0.00	1	0	F	31957	                Propionibacteriaceae
         0.00	1	0	G	1743	                  Propionibacterium
         0.00	1	1	S	1747	                    Propionibacterium acnes
         0.00	20	0	D	2157	    Archaea
         0.00	20	7	P	28890	      Euryarchaeota
         0.00	8	0	C	183963	        Halobacteria
         0.00	8	0	O	1644060	          Natrialbales
         0.00	8	0	F	1644061	            Natrialbaceae
         0.00	8	0	G	29287	              Natronococcus
         0.00	8	0	S	29288	                Natronococcus occultus
         0.00	8	8	-	694430	                  Natronococcus occultus SP4
         0.00	4	0	C	224756	        Methanomicrobia
         0.00	4	0	O	94695	          Methanosarcinales
         0.00	4	0	F	2206	            Methanosarcinaceae
         0.00	3	0	G	101191	              Methanomethylovorans
         0.00	3	0	S	101192	                Methanomethylovorans hollandica
         0.00	3	3	-	867904	                  Methanomethylovorans hollandica DSM 15978
         0.00	1	0	G	2207	              Methanosarcina
         0.00	1	0	S	2214	                Methanosarcina acetivorans
         0.00	1	1	-	188937	                  Methanosarcina acetivorans C2A
         0.00	1	0	C	183939	        Methanococci
         0.00	1	0	O	2182	          Methanococcales
         0.00	1	0	F	2183	            Methanococcaceae
         0.00	1	0	G	2184	              Methanococcus
         0.00	1	0	S	2188	                Methanococcus voltae
         0.00	1	1	-	456320	                  Methanococcus voltae A3
         0.00	7	0	D	10239	  Viruses
         0.00	2	0	-	29258	    ssDNA viruses
         0.00	2	0	F	10841	      Microviridae
         0.00	2	0	G	10842	        Microvirus
         0.00	2	2	S	374840	          Enterobacteria phage phiX174 sensu lato
         0.00	2	0	-	35237	    dsDNA viruses, no RNA stage
         0.00	2	0	F	10482	      Polydnaviridae
         0.00	2	0	G	10483	        Ichnovirus
         0.00	2	2	S	265522	          Hyposoter fugitivus ichnovirus
         0.00	2	0	-	439488	    ssRNA viruses
         0.00	2	0	-	35278	      ssRNA positive-strand viruses, no DNA stage
         0.00	2	0	F	11018	        Togaviridae
         0.00	2	0	G	11019	          Alphavirus
         0.00	2	0	-	177872	            VEEV complex
         0.00	2	2	S	11036	              Venezuelan equine encephalitis virus
         0.00	1	0	-	35268	    Retro-transcribing viruses
         0.00	1	0	F	11632	      Retroviridae
         0.00	1	0	-	35276	        unclassified Retroviridae
         0.00	1	0	-	206037	          Human endogenous retroviruses
         0.00	1	0	S	45617	            Human endogenous retrovirus K
         0.00	1	1	-	166122	              Human endogenous retrovirus K113
         0.00	3	1	D	2759	    Eukaryota
         0.00	1	0	P	3041	        Chlorophyta
         0.00	1	1	C	75966	          Trebouxiophyceae
         0.00	1	0	-	33682	      Euglenozoa
         0.00	1	0	O	5653	        Kinetoplastida
         0.00	1	0	F	5654	          Trypanosomatidae
         0.00	1	0	G	5690	            Trypanosoma
         0.00	1	0	-	47570	              Schizotrypanum
         0.00	1	0	S	5693	                Trypanosoma cruzi
         0.00	1	1	-	353153	                  Trypanosoma cruzi strain CL Brener
         """)))
Exemplo n.º 13
0
def handle_program_options():
    descr = """\
    Create BIOM-format tables (http://biom-format.org) from Kraken output 
    (http://ccb.jhu.edu/software/kraken/).

    The program takes as input, one or more files output from the kraken-report
    tool. Each file is parsed and the counts for each OTU (operational taxonomic
    unit) are recorded, along with database ID (e.g. NCBI), and lineage. The
    extracted data are then stored in a BIOM table where each count is linked
    to the Sample and OTU it belongs to. Sample IDs are extracted from the input
    filenames (everything up to the '.').

    OTUs are defined by the --max and --min arguments. By default these are
    set to Order and Species respectively. This means that counts assigned
    directly to an Order, Family, or Genus are recorded under the associated
    OTU ID, and counts assigned at or below the Species level are assigned to
    the OTU ID for the species. Setting a minimum rank below Species is not yet
    available.

    The BIOM format currently has two major versions. Version 1.0 uses the 
    JSON (JavaScript Object Notation) format as a base. Version 2.x uses the
    HDF5 (Hierarchical Data Format v5) as a base. The output format can be
    specified with the --fmt option. Note that a tab-separated (tsv) output
    format is also available. The resulting file will not contain most of the
    metadata, but can be opened by spreadsheet programs.

    Version 2 of the BIOM format is used by default for output, but requires the
    Python library 'h5py'. If the library is not installed, kraken-biom will 
    automatically switch to using version 1.0. Note that the output can 
    optionally be compressed with gzip (--gzip) for version 1.0 and TSV files. 
    Version 2 files are automatically compressed.

    Usage examples
    --------------
    1. Basic usage with default parameters:

    $ kraken-biom.py S1.txt S2.txt

      This produces a compressed BIOM 2.1 file: table.biom

    2. BIOM v1.0 output:

    $ kraken-biom.py S1.txt S2.txt --fmt json

      Produces a BIOM 1.0 file: table.biom

    3. Compressed TSV output:

    $ kraken-biom.py S1.txt S2.txt --fmt tsv --gzip -o table.tsv

      Produces a TSV file: table.tsv.gz

    4. Change the max and min OTU levels to Class and Genus:

    $ kraken-biom.py S1.txt S2.txt --max C --min G

    Program arguments
    -----------------"""

    parser = argparse.ArgumentParser(
        description=twdd(descr),
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('kraken_reports',
                        nargs='*',
                        help="Results files from the kraken-report tool.")
    parser.add_argument('-k',
                        '--kraken_reports_fp',
                        metavar="REPORTS_FP",
                        help="Folder containing kraken reports")
    parser.add_argument('--max',
                        default="O",
                        choices=ranks[:-1],
                        help="Assigned reads will be recorded only if \
                              they are at or below max rank. Default: O.")
    parser.add_argument('--min',
                        default="S",
                        choices=ranks[:-1],
                        help="Reads assigned at and below min rank \
                              will be recorded as being assigned to the \
                              min rank level. Default: S.")
    parser.add_argument('-o',
                        '--output_fp',
                        default="table.biom",
                        help="Path to the BIOM-format file. By default, the\
                        table will be in the HDF5 BIOM 2.x format. Users can\
                        output to a different format using the --fmt option.\
                        The output can also be gzipped using the --gzip\
                        option. Default path is: ./table.biom")
    parser.add_argument('--otu_fp',
                        help="Create a file containing just the (NCBI) OTU IDs\
                        for use with a service such as phyloT \
                        (http://phylot.biobyte.de/) to generate a phylogenetic\
                        tree for use in downstream analysis such as UniFrac, \
                        iTol (itol.embl.de), or PhyloToAST (phylotoast.org).")
    parser.add_argument('--fmt',
                        default="hdf5",
                        choices=["hdf5", "json", "tsv"],
                        help="Set the output format of the BIOM table.\
                              Default is HDF5.")
    parser.add_argument('--gzip',
                        action='store_true',
                        help="Compress the output BIOM table with gzip.\
                              HDF5 BIOM (v2.x) files are internally\
                              compressed by default, so this option\
                              is not needed when specifying --fmt hdf5.")

    parser.add_argument('--version',
                        action='version',
                        version="kraken-biom version {}, {}".format(
                            __version__, __url__))
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help="Prints status messages during program \
                              execution.")

    return parser.parse_args()
Exemplo n.º 14
0
def handle_program_options():
    descr = """\
    Create BIOM-format tables (http://biom-format.org) from Kraken output 
    (http://ccb.jhu.edu/software/kraken/).

    The program takes as input, one or more files output from the kraken-report
    tool. Each file is parsed and the counts for each OTU (operational taxonomic
    unit) are recorded, along with database ID (e.g. NCBI), and lineage. The
    extracted data are then stored in a BIOM table where each count is linked
    to the Sample and OTU it belongs to. Sample IDs are extracted from the input
    filenames (everything up to the '.').

    OTUs are defined by the --max and --min arguments. By default these are
    set to Order and Species respectively. This means that counts assigned
    directly to an Order, Family, or Genus are recorded under the associated
    OTU ID, and counts assigned at or below the Species level are assigned to
    the OTU ID for the species. Setting a minimum rank below Species is not yet
    available.

    The BIOM format currently has two major versions. Version 1.0 uses the 
    JSON (JavaScript Object Notation) format as a base. Version 2.x uses the
    HDF5 (Hierarchical Data Format v5) as a base. The output format can be
    specified with the --fmt option. Note that a tab-separated (tsv) output
    format is also available. The resulting file will not contain most of the
    metadata, but can be opened by spreadsheet programs.

    Version 2 of the BIOM format is used by default for output, but requires the
    Python library 'h5py'. If the library is not installed, kraken-biom will 
    automatically switch to using version 1.0. Note that the output can 
    optionally be compressed with gzip (--gzip) for version 1.0 and TSV files. 
    Version 2 files are automatically compressed.

    Usage examples
    --------------
    1. Basic usage with default parameters:

    $ kraken-biom S1.txt S2.txt

      This produces a compressed BIOM 2.1 file: table.biom

    2. BIOM v1.0 output:

    $ kraken-biom S1.txt S2.txt --fmt json

      Produces a BIOM 1.0 file: table.biom

    3. Compressed TSV output:

    $ kraken-biom S1.txt S2.txt --fmt tsv --gzip -o table.tsv

      Produces a TSV file: table.tsv.gz

    4. Change the max and min OTU levels to Class and Genus:

    $ kraken-biom S1.txt S2.txt --max C --min G

    Program arguments
    -----------------"""

    parser = argparse.ArgumentParser(description=twdd(descr),
                           formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('kraken_reports', nargs='*',
                        help="Results files from the kraken-report tool.")
    parser.add_argument('-k', '--kraken_reports_fp', metavar="REPORTS_FP",
                        help="Folder containing kraken reports")
    parser.add_argument('--max', default="O", choices=ranks[:-1],
                        help="Assigned reads will be recorded only if \
                              they are at or below max rank. Default: O.")
    parser.add_argument('--min', default="S", choices=ranks[:-1],
                        help="Reads assigned at and below min rank \
                              will be recorded as being assigned to the \
                              min rank level. Default: S.")
    parser.add_argument('-o', '--output_fp', default="table.biom",
                        help="Path to the BIOM-format file. By default, the\
                        table will be in the HDF5 BIOM 2.x format. Users can\
                        output to a different format using the --fmt option.\
                        The output can also be gzipped using the --gzip\
                        option. Default path is: ./table.biom")
    parser.add_argument('--otu_fp',
                        help="Create a file containing just the (NCBI) OTU IDs\
                        for use with a service such as phyloT \
                        (http://phylot.biobyte.de/) to generate a phylogenetic\
                        tree for use in downstream analysis such as UniFrac, \
                        iTol (itol.embl.de), or PhyloToAST (phylotoast.org).")
    parser.add_argument('--fmt', default="hdf5", 
                        choices=["hdf5", "json", "tsv"],
                        help="Set the output format of the BIOM table.\
                              Default is HDF5.")
    parser.add_argument('--gzip', action='store_true',
                        help="Compress the output BIOM table with gzip.\
                              HDF5 BIOM (v2.x) files are internally\
                              compressed by default, so this option\
                              is not needed when specifying --fmt hdf5.")

    
    parser.add_argument('--version', action='version',                    
             version="kraken-biom version {}, {}".format(__version__, __url__))
    parser.add_argument('-v', '--verbose', action='store_true',
                        help="Prints status messages during program \
                              execution.")

    return parser.parse_args()
Exemplo n.º 15
0
def handle_program_options():
    descr = """\
    Create BIOM-format tables (http://biom-format.org) from CLARK output 
    (http://clark.cs.ucr.edu/).

    The program takes as input, one or more files output from CLARK's 
    estimate_abundance tool. Each file is parsed and the counts for each OTU 
    (operational taxonomic unit) are recorded, along with database ID (e.g. NCBI), 
    and lineage. The extracted data are then stored in a BIOM table where each count
    is linked to the Sample and OTU it belongs to. Sample IDs are extracted from the
    input filenames (everything up to the '.' preceeding the extension).

    The BIOM format currently has two major versions. Version 1.0 uses the 
    JSON (JavaScript Object Notation) format as a base. Version 2.x uses the
    HDF5 (Hierarchical Data Format v5) as a base. The output format can be
    specified with the --fmt option. Note that a tab-separated (tsv) output
    format is also available. The resulting file will not contain most of the
    metadata, but can be opened by spreadsheet programs.

    Version 2 of the BIOM format is used by default for output, but requires the
    Python library 'h5py'. If the library is not installed, clark-biom will 
    automatically switch to using version 1.0. Note that the output can 
    optionally be compressed with gzip (--gzip) for version 1.0 and TSV files. 
    Version 2 files are automatically compressed.

    Currently the taxonomy for each OTU ID is stored as row metadata in the BIOM
    table using the seven-level format used by QIIME and metaphlan: k__K, p__P, ... 
    s__S. If you would like another format supported, please file an issue or send a
    pull request (note the contribution guidelines).

    Usage examples
    --------------

    1. Basic usage with default parameters::

        $ clark-biom S1.csv S2.csv

      This produces a compressed BIOM 2.1 file: table.biom
      with sample IDs: S1, S2.

    2. Process multiple samples from multiple groups::

        $ clark-biom groupA/*.csv groupB/*.csv -o groupsAB.biom

    3. BIOM v1.0 output::

        $ clark-biom S1.csv S2.csv --fmt json

      Produces a BIOM 1.0 file: table.biom

    4. Compressed TSV output::

        $ clark-biom S1.csv S2.csv --fmt tsv --gzip -o table.tsv

      Produces a TSV file: table.tsv.gz


    Program arguments
    -----------------"""

    parser = argparse.ArgumentParser(description=twdd(descr),
                        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('clark_abd_tbls', nargs='+', metavar="TABLE-FILE",
                        help="Result file from estimate_abundance.sh.")
    parser.add_argument('-o', '--output_fp', default="table.biom",
                        metavar="COMBINED-OUTPUT-FILE",
                        help="Path to the BIOM-format file. By default, the "
                        "table will be in the HDF5 BIOM 2.x format. Users can "
                        "output to a different format using the --fmt option. "
                        "The output can also be gzipped using the --gzip"
                        "option. Default path is: ./table.biom")
    parser.add_argument('--otu-fp', dest="otu_fp", metavar="OTU-FILE",
                        help="Create a file containing just (NCBI) OTU IDs "
                        "for use with a service such as phyloT "
                        "(http://phylot.biobyte.de/) to generate phylogenetic "
                        "trees for use in downstream analysis such as "
                        "UniFrac, iTol (itol.embl.de), or PhyloToAST "
                        "(phylotoast.org).")
    parser.add_argument('--fmt', default="hdf5", 
                        choices=["hdf5", "json", "tsv"],
                        help="Set the output format of the BIOM table. "
                              "Default is HDF5.")
    parser.add_argument('--store-pct', dest="store_pct", action='store_true',
                        help="Record the relative abundances "
                             "('Proportion_Classified' column) instead of "
                             "the raw count ('Count' column) data.")
    parser.add_argument('--gzip', action='store_true',
                        help="Compress the output BIOM table with gzip. "
                              "HDF5 BIOM (v2.x) files are internally "
                              "compressed by default, so this option "
                              "is not needed when specifying --fmt hdf5.")


    parser.add_argument('--version', action='version',                    
             version="clark-biom version {}, {}".format(__version__, __url__))
    parser.add_argument('-v', '--verbose', action='store_true',
                        help="Prints status messages during program "
                             "execution.")

    return parser.parse_args()