def test_srnabench(self): """testing reading seqbuster files function""" from mirtop.libs import logger logger.initialize_logger("test", True, True) logger = logger.getLogger(__name__) from mirtop.importer import srnabench annotate("data/examples/srnabench", srnabench.read_file, create=False)
def main(**kwargs): kwargs = parse_cl(sys.argv[1:]) initialize_logger(kwargs['args'].out, kwargs['args'].debug, kwargs['args'].print_debug) logger = mylog.getLogger(__name__) start = time.time() if "gff" in kwargs: logger.info("Run annotation") reader(kwargs["args"]) elif "stats" in kwargs: logger.info("Run stats.") stats(kwargs["args"]) elif "compare" in kwargs: logger.info("Run compare.") compare(kwargs["args"]) elif "simulator" in kwargs: logger.info("Run simulation") simulate(kwargs["args"]) elif "counts" in kwargs: logger.info("Run convert of GFF to TSV containing expression") convert_gff_counts(kwargs["args"]) elif "export" in kwargs: logger.info("Run export of GFF into other format.") isomirs.convert(kwargs["args"]) elif "validator" in kwargs: logger.info("Run validator.") validator.check_multiple(kwargs["args"]) elif "query" in kwargs["args"]: logger.info("Not yet ready: This will allow queries to GFF files.") logger.info('It took %.3f minutes' % ((time.time() - start) / 60))
def test_srnabench(self): """testing reading seqbuster files function""" from mirtop.libs import logger logger.initialize_logger("test", True, True) logger = logger.getLogger(__name__) from mirtop.importer import srnabench annotate("data/examples/srnabench", srnabench.read_file, create=False)
def main(**kwargs): kwargs = parse_cl(sys.argv[1:]) initialize_logger(kwargs['args'].out, kwargs['args'].debug, kwargs['args'].print_debug) logger = mylog.getLogger(__name__) start = time.time() if "gff" in kwargs: logger.info("Run annotation") reader(kwargs["args"]) elif "stats" in kwargs: logger.info("Run stats.") stats(kwargs["args"]) elif "compare" in kwargs: logger.info("Run compare.") compare(kwargs["args"]) elif "simulator" in kwargs: logger.info("Run simulation") simulate(kwargs["args"]) elif "counts" in kwargs: logger.info("Run convert of GFF to TSV containing expression") convert_gff_counts(kwargs["args"]) elif "export" in kwargs: logger.info("Run export of GFF into other format.") isomirs.convert(kwargs["args"]) elif "validator" in kwargs: logger.info("Run validator.") validator.check_multiple(kwargs["args"]) elif "query" in kwargs["args"]: logger.info("Not yet ready: This will allow queries to GFF files.") logger.info('It took %.3f minutes' % ((time.time()-start)/60))
def test_collapse(self): """testing GFF function""" from mirtop.libs import logger logger.initialize_logger("test", True, True) logger = logger.getLogger(__name__) from mirtop.bam import bam bam_fn = "data/aligments/collapsing-isomirs.sam" annotate(bam_fn, bam.read_bam) return True
def test_read_genomic(self): from mirtop.mirna import mapper from mirtop.libs import logger logger.initialize_logger("test_read_files", True, True) map_mir = mapper.read_gtf_to_mirna("data/examples/annotate/hsa.gff3") print(map_mir) # if map_mir["hsa-let-7a-1"]["hsa-let-7a-5p"][0] != 5: # raise ValueError("GFF is not loaded correctly.") return True
def test_read_genomic(self): from mirtop.mirna import mapper from mirtop.libs import logger logger.initialize_logger("test_read_files", True, True) map_mir = mapper.read_gtf_to_mirna("data/examples/annotate/hsa.gff3") print(map_mir) # if map_mir["hsa-let-7a-1"]["hsa-let-7a-5p"][0] != 5: # raise ValueError("GFF is not loaded correctly.") return True
def test_collapse(self): """testing GFF function""" from mirtop.libs import logger logger.initialize_logger("test", True, True) logger = logger.getLogger(__name__) from mirtop.bam import bam bam_fn = "data/aligments/collapsing-isomirs.sam" annotate(bam_fn, bam.read_bam) return True
def test_optimir(self): """testing reading optimir files function""" from mirtop.libs import logger logger.initialize_logger("test", True, True) logger = logger.getLogger(__name__) from mirtop.importer import optimir annotate("data/examples/optimir/synthetic_100_full.gff3", optimir.read_file, create=False)
def test_seqbuster(self): """testing reading seqbuster files function""" from mirtop.libs import logger logger.initialize_logger("test", True, True) logger = logger.getLogger(__name__) from mirtop.importer import seqbuster print("\nperfect\n") annotate("data/examples/seqbuster/reads20.mirna", seqbuster.read_file) print("\naddition\n") annotate("data/examples/seqbuster/readsAdd.mirna", seqbuster.read_file)
def test_seqbuster(self): """testing reading seqbuster files function""" from mirtop.libs import logger logger.initialize_logger("test", True, True) logger = logger.getLogger(__name__) from mirtop.importer import seqbuster print("\nperfect\n") annotate("data/examples/seqbuster/reads20.mirna", seqbuster.read_file) print("\naddition\n") annotate("data/examples/seqbuster/readsAdd.mirna", seqbuster.read_file)
def main(**kwargs): kwargs = parse_cl(sys.argv[1:]) initialize_logger(kwargs['args'].out, kwargs['args'].debug, kwargs['args'].print_debug) logger = mylog.getLogger(__name__) start = time.time() if "annotate" in kwargs: logger.info("Run annotation") mirna.miraligner(kwargs["args"]) elif "simulator" in kwargs: logger.info("Run simulation") simulate(kwargs["args"]) logger.info('It took %.3f minutes' % ((time.time() - start) / 60))
def test_prost(self): """testing reading prost files function""" from mirtop.libs import logger logger.initialize_logger("test", True, True) logger = logger.getLogger(__name__) from mirtop.mirna import fasta precursors = fasta.read_precursor("data/examples/annotate/hairpin.fa", "hsa") fn = "data/examples/prost/prost.example.txt" from mirtop.importer import prost reads = prost.read_file(fn, precursors, "miRBasev21", "data/examples/annotate/hsa.gff3") annotate("data/example/prost/prost.example.txt", reads, True)
def test_prost(self): """testing reading prost files function""" from mirtop.libs import logger logger.initialize_logger("test", True, True) logger = logger.getLogger(__name__) from mirtop.mirna import fasta precursors = fasta.read_precursor("data/examples/annotate/hairpin.fa", "hsa") fn = "data/examples/prost/prost.example.txt" from mirtop.importer import prost reads = prost.read_file( fn, precursors, "miRBasev21", "data/examples/annotate/hsa.gff3") annotate("data/example/prost/prost.example.txt", reads, True)
def test_read(self): from mirtop.mirna import mapper, fasta from mirtop.libs import logger logger.initialize_logger("test_read_files", True, True) map_mir = mapper.read_gtf_to_precursor( "data/examples/annotate/hsa.gff3") print map_mir if map_mir["hsa-let-7a-1"]["hsa-let-7a-5p"][0] != 5: raise ValueError("GFF is not loaded correctly.") fasta_precursor = fasta.read_precursor( "data/examples/annotate/hairpin.fa", "hsa") # read data/aligments/let7-perfect.bam return True
def test_read(self): from mirtop.mirna import mapper, fasta from mirtop.libs import logger logger.initialize_logger("test_read_files", True, True) map_mir = mapper.read_gtf_to_precursor( "data/examples/annotate/hsa.gff3") print(map_mir) if map_mir["hsa-let-7a-1"]["hsa-let-7a-5p"][0] != 5: raise ValueError("GFF is not loaded correctly.") fasta_precursor = fasta.read_precursor( "data/examples/annotate/hairpin.fa", "hsa") print(fasta_precursor) fasta_precursor2 = fasta.read_precursor( "data/examples/annotate/hairpin.fa", None) print(fasta_precursor2) if fasta_precursor != fasta_precursor2: raise ValueError("species value generates two different dicts.") # read data/aligments/let7-perfect.bam return True
def test_alignment_genomic(self): """testing alignments function""" from mirtop.bam import bam from mirtop.libs import logger logger.initialize_logger("test_read_files", True, True) # print(annotate("data/examples/annotate/hsa-let-7a-5ploss1_neg.sam", # bam.read_bam, # gtf="data/db/hsa.gff3", genomic=True)) print("\ngenomic\n") with make_workdir(): for example in [ "hsa-let-7a-nm", "hsa-let-7a-5ploss1", "hsa-let-7a-3ploss1", "hsa-let-7a-5ploss1_neg" ]: print( annotate("data/examples/annotate/%s.sam" % example, bam.read_bam, gtf="data/db/mirbase/hsa.gff3", genomic=True))
def test_counts(self): """testing convert_gff_counts in convert.py function""" from mirtop.libs import logger from mirtop.gff.convert import convert_gff_counts import argparse logger.initialize_logger("test counts", True, True) logger = logger.getLogger(__name__) args = argparse.Namespace() args.hairpin = "data/examples/annotate/hairpin.fa" args.sps = "hsa" args.gtf = "data/examples/annotate/hsa.gff3" args.gff = 'data/examples/synthetic/let7a-5p.gtf' args.out = 'data/examples/synthetic' args.add_extra = True convert_gff_counts(args) os.remove(os.path.join(args.out, "expression_counts.tsv")) return True
def main(**kwargs): kwargs = parse_cl(sys.argv[1:]) initialize_logger(kwargs['args'].out, kwargs['args'].debug, kwargs['args'].print_debug) logger = mylog.getLogger(__name__) start = time.time() if "gff" in kwargs: logger.info("Run annotation") reader(kwargs["args"]) elif "simulator" in kwargs: logger.info("Run simulation") simulate(kwargs["args"]) elif "check" in kwargs["args"]: logger.info("Not yet ready: This will check GFF files.") elif "query" in kwargs["args"]: logger.info("Not yet ready: This will allow queries to GFF files.") elif "convert" in kwargs["args"]: logger.info( "Not yet ready: This will output tabular format from GFF files.") logger.info('It took %.3f minutes' % ((time.time() - start) / 60))
def test_counts(self): """testing convert_gff_counts in convert.py function""" from mirtop.libs import logger from mirtop.gff.convert import convert_gff_counts import argparse logger.initialize_logger("test counts", True, True) logger = logger.getLogger(__name__) args = argparse.Namespace() args.hairpin = "data/examples/annotate/hairpin.fa" args.sps = "hsa" args.gtf = "data/examples/annotate/hsa.gff3" args.gff = 'data/examples/synthetic/let7a-5p.gtf' args.out = 'data/examples/synthetic' args.add_extra = True convert_gff_counts(args) os.remove(os.path.join(args.out, "expression_counts.tsv")) return True
def test_alignment(self): """testing alignments function""" from mirtop.libs import logger logger.initialize_logger("test", True, True) logger = logger.getLogger(__name__) from mirtop.mirna import fasta, mapper precursors = fasta.read_precursor("data/examples/annotate/hairpin.fa", "hsa") matures = mapper.read_gtf_to_precursor( "data/examples/annotate/hsa.gff3") # matures = mirtop.mirna.read_mature("data/examples/annotate/mirnas.gff", "hsa") def annotate(fn, precursors, matures): from mirtop.bam import bam from mirtop.gff import body reads = bam.read_bam(fn, precursors) ann = bam.annotate(reads, matures, precursors) gff = body.create(ann, "miRBase21", "example", fn + ".gff3", "#") print "\nlast1D\n" annotate("data/aligments/let7-last1D.sam", precursors, matures) #mirna TGAGGTAGTAGGTTGTATAGTT #seq AGAGGTAGTAGGTTGTA print "\n1D\n" annotate("data/aligments/let7-1D.sam", precursors, matures) #mirna TGAGGTAG-TAGGTTGTATAGTT #seq TGAGGTAGGTAGGTTGTATAGTTA print "\nlast7M1I\n" annotate("data/aligments/let7-last7M1I.sam", precursors, matures) #mirna TGAGGTAGTAGGTTGTATAGTT #seq TGAGGTAGTAGGTTGTA-AGT print "\nmiddle1D\n" annotate("data/aligments/let7-middle1D.sam", precursors, matures) #mirna TGAGGTAGTAGGTTGTATAGTT #seq TGAGGTAGTAGGTTGTATAGTT print "\nperfect\n" annotate("data/aligments/let7-perfect.sam", precursors, matures) #mirna TGAGGTAGTAGGTTGTATAGTT #seq TGAGGTAGTAGGTTGTATAG (3tt 3TT) print "\ntriming\n" annotate("data/aligments/let7-triming.sam", precursors, matures)
def test_srnabench(self): """testing reading seqbuster files function""" from mirtop.libs import logger logger.initialize_logger("test", True, True) logger = logger.getLogger(__name__) from mirtop.mirna import fasta, mapper precursors = fasta.read_precursor("data/examples/annotate/hairpin.fa", "hsa") matures = mapper.read_gtf_to_precursor( "data/examples/annotate/hsa.gff3") def annotate(fn, precursors, matures): from mirtop.importer import srnabench from mirtop.bam import bam reads = srnabench.read_file(fn, precursors) ann = bam.annotate(reads, matures, precursors) return True print "\nsRNAbench\n" annotate("data/examples/srnabench/reads.annotation", precursors, matures)
def test_collapse(self): """testing GFF function""" from mirtop.libs import logger from mirtop.mirna import mapper, fasta from mirtop.gff import body, header logger.initialize_logger("test", True, True) logger = logger.getLogger(__name__) precursors = fasta.read_precursor("data/examples/annotate/hairpin.fa", "hsa") # depend on https://github.com/miRTop/mirtop/issues/6 matures = mapper.read_gtf_to_precursor( "data/examples/annotate/hsa.gff3") # matures = mirtop.mirna.read_mature("data/examples/annotate/mirnas.gff", "hsa") from mirtop.bam import bam bam_fn = "data/aligments/collapsing-isomirs.sam" reads = bam.read_bam(bam_fn, precursors) ann = bam.annotate(reads, matures, precursors) fn = bam_fn + ".gff" h = header.create(bam_fn, ["example"], "miRBase21") gff = body.create(ann, "miRBase21", "example", fn, header) print gff return True
def test_read_mir2genomic(self): from mirtop.mirna import mapper from mirtop.libs import logger logger.initialize_logger("test_read_files", True, True) map_mir = mapper.read_gtf_to_mirna("data/examples/annotate/hsa.gff3") print(map_mir)
def test_read_hairpin_mirgenedb(self): from mirtop.mirna import mapper from mirtop.libs import logger logger.initialize_logger("test_read_files", True, True) map_mir = mapper.read_gtf_to_precursor("data/db/mirgenedb/hsa.gff") print(map_mir)
help="File with mature sequences.", required=True) parser.add_argument("-s", "--size", default=22, help="Size of spike-ins to generate.") parser.add_argument("-n", "--number", default=16, help="Number of spike-ins to generate.") parser.add_argument("-o", "--out", default="spikeins.fa", help="Name used for output files.") parser.add_argument("--seed", help="set up seed for reproducibility.", default=42) parser.add_argument("--universe", help="Set up universe sequences to avoid duplication.", default=None) args = parser.parse_args() random.seed(args.seed) mylog.initialize_logger(os.path.dirname(os.path.abspath(args.out))) logger = mylog.getLogger(__name__) # Read file to get all sequences longer than size - 2 size = args.size - 2 source = _read_fasta(args.fa, size) logger.info("%s was read: %s sequences were loaded" % (args.fa, len(source))) source = _update_ends(source) logger.info("source updated with extended nts: %s" % source) # Map all vs all with razers3 modified = _write_fasta(source, os.path.join(os.path.dirname(args.out), "modified.fa")) sam = os.path.join(os.path.dirname(args.out), "modified.bam") runner.run(("razers3 -i 75 -rr 80 -f -so 1 -o {output} {target} {query}").format(output=sam, target=modified, query=modified)) uniques = _parse_hits(sam, source) print(uniques)