def test_intron_exon_reads(): gff = pybedtools.example_filename('gdc.gff') bam = pybedtools.example_filename('gdc.bam') cmds = [ 'intron_exon_reads.py', '--gff', gff, '--bam', bam, '--processes', '2'] out = sp.check_output(cmds, universal_newlines=True) assert out == dedent( """\ exon_only 3 intron_only 3 intron_and_exon 1 """) cmds = [ 'intron_exon_reads.py', '--gff', gff, '--bam', bam, '--processes', '2', '--stranded'] out = sp.check_output(cmds, universal_newlines=True) assert out == dedent( """\ exon_only 0 intron_only 0 intron_and_exon 0 """)
def test_intron_exon_reads(): gff = pybedtools.example_filename("gdc.gff") bam = pybedtools.example_filename("gdc.bam") cmds = [ "intron_exon_reads.py", "--gff", gff, "--bam", bam, "--processes", "2" ] out = sp.check_output(cmds, universal_newlines=True) assert out == dedent("""\ exon_only 3 intron_only 3 intron_and_exon 1 """) cmds = [ "intron_exon_reads.py", "--gff", gff, "--bam", bam, "--processes", "2", "--stranded", ] out = sp.check_output(cmds, universal_newlines=True) assert out == dedent("""\ exon_only 0 intron_only 0 intron_and_exon 0 """)
def test_bed_methods(): """ Generator that yields tests, inserting different versions of `bed` as needed """ for method, send_kwargs, expected in parse_yaml(config_fn): ignore = ['a', 'b','abam','i'] skip_test = False for i in ignore: if i in send_kwargs: skip_test = True if skip_test: continue if 'bed' not in send_kwargs: continue if 'files' in send_kwargs: send_kwargs['files'] = [pybedtools.example_filename(i) for i in send_kwargs['files']] if 'bams' in send_kwargs: send_kwargs['bams'] = [pybedtools.example_filename(i) for i in send_kwargs['bams']] if 'fi' in send_kwargs: send_kwargs['fi'] = pybedtools.example_filename(send_kwargs['fi']) orig_bed = pybedtools.example_bedtool(send_kwargs['bed']) del send_kwargs['bed'] done = [] for kind_bed in ('filename', 'generator', 'stream', 'gzip'): bed = converter[kind_bed](orig_bed) kind = 'i=%(kind_bed)s' % locals() f = partial(run, method, bed, expected, **send_kwargs) f.description = '%(method)s, %(kind)s, %(send_kwargs)s' % locals() yield (f, )
def main(): ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) ap.add_argument('--bed', help='BED file of e.g. peaks') ap.add_argument('--gff', help='GFF file of e.g. annotations') ap.add_argument('--out', default='out.png', help='Output PNG file') ap.add_argument('--stranded', action='store_true', help='Use strand-specific intersections') ap.add_argument('--include', nargs='*', help='Featuretypes to include') ap.add_argument('--exclude', nargs='*', help='Featuretypes to exclude') ap.add_argument('--thresh', type=float, help='Threshold percentage below which output will be suppressed') ap.add_argument('--test', action='store_true', help='Run test, overwriting all other args') args = ap.parse_args() if not args.test: if args.include and args.exclude: raise ValueError('Cannot specify both --include and --exclude') make_pie(bed=args.bed, gff=args.gff, out=args.out, thresh=args.thresh, stranded=args.stranded, include=args.include, exclude=args.exclude) else: make_pie(bed=pybedtools.example_filename('gdc.bed'), gff=pybedtools.example_filename('gdc.gff'), stranded=True, out='out.png', include=['CDS', 'intron', 'five_prime_UTR', 'three_prime_UTR'])
def test_isBAM(): bam = pybedtools.example_filename("x.bam") notabam = pybedtools.example_filename("a.bed") open("tiny.txt", "w").close() assert pybedtools.helpers.isBAM(bam) assert not pybedtools.helpers.isBAM(notabam) assert not pybedtools.helpers.isBAM("tiny.txt") os.unlink("tiny.txt")
def test_isBAM(): bam = pybedtools.example_filename('x.bam') notabam = pybedtools.example_filename('a.bed') open('tiny.txt', 'w').close() assert pybedtools.helpers.isBAM(bam) assert not pybedtools.helpers.isBAM(notabam) assert not pybedtools.helpers.isBAM('tiny.txt') os.unlink('tiny.txt')
def main(): """ Make a pie chart of features overlapping annotations (e.g., peaks in introns, exons, etc) """ ap = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) ap.add_argument("--bed", help="BED file of e.g. peaks") ap.add_argument("--gff", help="GFF file of e.g. annotations") ap.add_argument("--out", default="out.png", help="Output PNG file") ap.add_argument("--stranded", action="store_true", help="Use strand-specific intersections") ap.add_argument("--include", nargs="*", help="Featuretypes to include") ap.add_argument("--exclude", nargs="*", help="Featuretypes to exclude") ap.add_argument( "--thresh", type=float, help="Threshold percentage below which output will be " "suppressed", ) ap.add_argument( "--test", action="store_true", help="Run test, overwriting all other args. Result will " 'be "out.png" in current directory.', ) args = ap.parse_args() if not (args.bed and args.gff) and not args.test: ap.print_help() sys.exit(1) if not args.test: if args.include and args.exclude: raise ValueError("Cannot specify both --include and --exclude") make_pie( bed=args.bed, gff=args.gff, out=args.out, thresh=args.thresh, stranded=args.stranded, include=args.include, exclude=args.exclude, ) else: make_pie( bed=pybedtools.example_filename("gdc.bed"), gff=pybedtools.example_filename("gdc.gff"), stranded=True, out="out.png", include=[ "exon", "CDS", "intron", "five_prime_UTR", "three_prime_UTR" ], )
def test_issue_156(): # NOTE: this isn't appropriate for including in the test_iter cases, since # that tests filenames, gzipped files, and iterators. There's no support # for "list of iterators" as the `b` argument. Plus, here we're not # concerned with the ability to handle those different input types -- just # that lists of filenames works. a = pybedtools.example_bedtool('a.bed') b = [pybedtools.example_filename('b.bed'), pybedtools.example_filename('c.gff')] res = str(a.intersect(b)) assert res == fix( """ chr1 59 100 feature1 0 + chr1 155 200 feature2 0 + chr1 173 200 feature2 0 + chr1 173 200 feature2 0 + chr1 100 200 feature2 0 + chr1 155 200 feature3 0 - chr1 464 500 feature3 0 - chr1 485 500 feature3 0 - chr1 173 326 feature3 0 - chr1 438 500 feature3 0 - chr1 495 500 feature3 0 - chr1 485 500 feature3 0 - chr1 173 326 feature3 0 - chr1 438 500 feature3 0 - chr1 150 269 feature3 0 - chr1 900 901 feature4 0 + chr1 900 913 feature4 0 + chr1 900 913 feature4 0 + chr1 900 950 feature4 0 + """), res res = str(a.intersect(b, wb=True, names=['B', 'C'])) assert res == fix( """ chr1 59 100 feature1 0 + C chr1 ucb gene 60 269 . - . ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269 chr1 155 200 feature2 0 + B chr1 155 200 feature5 0 - chr1 173 200 feature2 0 + C chr1 ucb CDS 174 326 . + . Parent=AT1G01010.mRNA;rname=AT1G01010 chr1 173 200 feature2 0 + C chr1 ucb mRNA 174 326 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010 chr1 100 200 feature2 0 + C chr1 ucb gene 60 269 . - . ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269 chr1 155 200 feature3 0 - B chr1 155 200 feature5 0 - chr1 464 500 feature3 0 - C chr1 ucb gene 465 805 . + . ID=thaliana_1_465_805;match=scaffold_801404.1;rname=thaliana_1_465_805 chr1 485 500 feature3 0 - C chr1 ucb CDS 486 605 . + . Parent=AT1G01010.mRNA;rname=AT1G01010 chr1 173 326 feature3 0 - C chr1 ucb CDS 174 326 . + . Parent=AT1G01010.mRNA;rname=AT1G01010 chr1 438 500 feature3 0 - C chr1 ucb CDS 439 630 . + . Parent=AT1G01010.mRNA;rname=AT1G01010 chr1 495 500 feature3 0 - C chr1 ucb mRNA 496 576 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010 chr1 485 500 feature3 0 - C chr1 ucb mRNA 486 605 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010 chr1 173 326 feature3 0 - C chr1 ucb mRNA 174 326 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010 chr1 438 500 feature3 0 - C chr1 ucb mRNA 439 899 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010 chr1 150 269 feature3 0 - C chr1 ucb gene 60 269 . - . ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269 chr1 900 901 feature4 0 + B chr1 800 901 feature6 0 + chr1 900 913 feature4 0 + C chr1 ucb mRNA 631 913 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010 chr1 900 913 feature4 0 + C chr1 ucb CDS 760 913 . + . Parent=AT1G01010.mRNA;rname=AT1G01010 chr1 900 950 feature4 0 + C chr1 ucb CDS 706 1095 . + . Parent=AT1G01010.mRNA;rname=AT1G01010 """), res
def test_issue_156(): # NOTE: this isn't appropriate for including in the test_iter cases, since # that tests filenames, gzipped files, and iterators. There's no support # for "list of iterators" as the `b` argument. Plus, here we're not # concerned with the ability to handle those different input types -- just # that lists of filenames works. a = pybedtools.example_bedtool("a.bed") b = [ pybedtools.example_filename("b.bed"), pybedtools.example_filename("c.gff") ] res = str(a.intersect(b)) assert res == fix(""" chr1 59 100 feature1 0 + chr1 155 200 feature2 0 + chr1 173 200 feature2 0 + chr1 173 200 feature2 0 + chr1 100 200 feature2 0 + chr1 155 200 feature3 0 - chr1 464 500 feature3 0 - chr1 485 500 feature3 0 - chr1 173 326 feature3 0 - chr1 438 500 feature3 0 - chr1 495 500 feature3 0 - chr1 485 500 feature3 0 - chr1 173 326 feature3 0 - chr1 438 500 feature3 0 - chr1 150 269 feature3 0 - chr1 900 901 feature4 0 + chr1 900 913 feature4 0 + chr1 900 913 feature4 0 + chr1 900 950 feature4 0 + """), res res = str(a.intersect(b, wb=True, names=["B", "C"])) assert res == fix(""" chr1 59 100 feature1 0 + C chr1 ucb gene 60 269 . - . ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269 chr1 155 200 feature2 0 + B chr1 155 200 feature5 0 - chr1 173 200 feature2 0 + C chr1 ucb CDS 174 326 . + . Parent=AT1G01010.mRNA;rname=AT1G01010 chr1 173 200 feature2 0 + C chr1 ucb mRNA 174 326 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010 chr1 100 200 feature2 0 + C chr1 ucb gene 60 269 . - . ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269 chr1 155 200 feature3 0 - B chr1 155 200 feature5 0 - chr1 464 500 feature3 0 - C chr1 ucb gene 465 805 . + . ID=thaliana_1_465_805;match=scaffold_801404.1;rname=thaliana_1_465_805 chr1 485 500 feature3 0 - C chr1 ucb CDS 486 605 . + . Parent=AT1G01010.mRNA;rname=AT1G01010 chr1 173 326 feature3 0 - C chr1 ucb CDS 174 326 . + . Parent=AT1G01010.mRNA;rname=AT1G01010 chr1 438 500 feature3 0 - C chr1 ucb CDS 439 630 . + . Parent=AT1G01010.mRNA;rname=AT1G01010 chr1 495 500 feature3 0 - C chr1 ucb mRNA 496 576 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010 chr1 485 500 feature3 0 - C chr1 ucb mRNA 486 605 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010 chr1 173 326 feature3 0 - C chr1 ucb mRNA 174 326 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010 chr1 438 500 feature3 0 - C chr1 ucb mRNA 439 899 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010 chr1 150 269 feature3 0 - C chr1 ucb gene 60 269 . - . ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269 chr1 900 901 feature4 0 + B chr1 800 901 feature6 0 + chr1 900 913 feature4 0 + C chr1 ucb mRNA 631 913 . + . ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010 chr1 900 913 feature4 0 + C chr1 ucb CDS 760 913 . + . Parent=AT1G01010.mRNA;rname=AT1G01010 chr1 900 950 feature4 0 + C chr1 ucb CDS 706 1095 . + . Parent=AT1G01010.mRNA;rname=AT1G01010 """), res
def test_gzipped_files_can_be_intersected(): agz = _make_temporary_gzip(pybedtools.example_filename('a.bed')) bgz = _make_temporary_gzip(pybedtools.example_filename('b.bed')) agz = pybedtools.BedTool(agz) bgz = pybedtools.BedTool(bgz) a = pybedtools.example_bedtool('a.bed') b = pybedtools.example_bedtool('b.bed') assert a.intersect(b) == agz.intersect(bgz) == a.intersect(bgz) == agz.intersect(b)
def test_links(): # have to be careful about the path, since it is embedded in the HTML # output -- so make a copy of the example file, and delete when done. os.system('cp %s a.links.bed' % pybedtools.example_filename('a.bed')) a = pybedtools.BedTool('a.links.bed') a = a.links() exp = open(pybedtools.example_filename('a.links.html')).read() obs = open(a.links_html).read() print exp print obs assert exp == obs os.unlink('a.links.bed')
def test_gzip(): # make new gzipped files on the fly agz = pybedtools.BedTool._tmp() bgz = pybedtools.BedTool._tmp() os.system('gzip -c %s > %s' % (pybedtools.example_filename('a.bed'), agz)) os.system('gzip -c %s > %s' % (pybedtools.example_filename('b.bed'), bgz)) agz = pybedtools.BedTool(agz) bgz = pybedtools.BedTool(bgz) assert agz.file_type == bgz.file_type == 'bed' a = pybedtools.example_bedtool('a.bed') b = pybedtools.example_bedtool('b.bed') assert a.intersect(b) == agz.intersect(bgz) == a.intersect(bgz) == agz.intersect(b)
def test_gzip(): # make new gzipped files on the fly agz = pybedtools.BedTool._tmp() bgz = pybedtools.BedTool._tmp() os.system("gzip -c %s > %s" % (pybedtools.example_filename("a.bed"), agz)) os.system("gzip -c %s > %s" % (pybedtools.example_filename("b.bed"), bgz)) agz = pybedtools.BedTool(agz) bgz = pybedtools.BedTool(bgz) assert agz.file_type == bgz.file_type == "bed" a = pybedtools.example_bedtool("a.bed") b = pybedtools.example_bedtool("b.bed") assert a.intersect(b) == agz.intersect(bgz) == a.intersect(bgz) == agz.intersect(b)
def test_gzip(): # make new gzipped files on the fly agz = pybedtools.BedTool._tmp() bgz = pybedtools.BedTool._tmp() os.system('gzip -c %s > %s' % (pybedtools.example_filename('a.bed'), agz)) os.system('gzip -c %s > %s' % (pybedtools.example_filename('b.bed'), bgz)) agz = pybedtools.BedTool(agz) bgz = pybedtools.BedTool(bgz) assert agz.file_type == bgz.file_type == 'bed' a = pybedtools.example_bedtool('a.bed') b = pybedtools.example_bedtool('b.bed') assert a.intersect(b) == agz.intersect(bgz) == a.intersect( bgz) == agz.intersect(b)
def test_issue_218(): from pybedtools.helpers import set_bedtools_path, get_bedtools_path from pybedtools import BedTool orig_path = get_bedtools_path() # As pointed out in #222, example_bedtool behaves differently from BedTool. # example_bedtool is defined in pybedtools.bedtool but pybedtools.BedTool # is imported in pybedtools.__init__. So check various constructors here. for constructor in ( lambda x: pybedtools.example_bedtool(x), lambda x: pybedtools.BedTool(pybedtools.example_filename(x)), lambda x: pybedtools.bedtool.BedTool(pybedtools.example_filename(x)), # NOTE: we likely need recursive reloading (like IPython.deepreload) # for this to work: # # lambda x: BedTool(pybedtools.example_filename(x)), ): x = constructor('x.bed') x.sort() assert "Original BEDTools help" in pybedtools.bedtool.BedTool.sort.__doc__ assert "Original BEDTools help" in x.sort.__doc__ set_bedtools_path('nonexistent') # Calling BEDTools with non-existent path, but the docstring should not # have been changed. with pytest.raises(OSError): x.sort() assert "Original BEDTools help" in x.sort.__doc__ # The class's docstring should have been reset though. assert pybedtools.bedtool.BedTool.sort.__doc__ is None # Creating a new BedTool object now that bedtools is not on the path # should detect that, adding a method that raises # NotImplementedError... y = constructor('x.bed') with pytest.raises(NotImplementedError): y.sort() # ...and correspondingly no docstring assert y.sort.__doc__ is None assert pybedtools.bedtool.BedTool.sort.__doc__ is None # Reset the path, and ensure the resetting works set_bedtools_path() z = constructor('x.bed') z.sort()
def test_issue_218(): from pybedtools.helpers import set_bedtools_path, get_bedtools_path from pybedtools import BedTool orig_path = get_bedtools_path() # As pointed out in #222, example_bedtool behaves differently from BedTool. # example_bedtool is defined in pybedtools.bedtool but pybedtools.BedTool # is imported in pybedtools.__init__. So check various constructors here. for constructor in ( lambda x: pybedtools.example_bedtool(x), lambda x: pybedtools.BedTool(pybedtools.example_filename(x)), lambda x: pybedtools.bedtool.BedTool(pybedtools.example_filename(x) ), # NOTE: we likely need recursive reloading (like IPython.deepreload) # for this to work: # # lambda x: BedTool(pybedtools.example_filename(x)), ): x = constructor("x.bed") x.sort() assert "Original BEDTools help" in pybedtools.bedtool.BedTool.sort.__doc__ assert "Original BEDTools help" in x.sort.__doc__ set_bedtools_path("nonexistent") # Calling BEDTools with non-existent path, but the docstring should not # have been changed. with pytest.raises(OSError): x.sort() assert "Original BEDTools help" in x.sort.__doc__ # The class's docstring should have been reset though. assert pybedtools.bedtool.BedTool.sort.__doc__ is None # Creating a new BedTool object now that bedtools is not on the path # should detect that, adding a method that raises # NotImplementedError... y = constructor("x.bed") with pytest.raises(NotImplementedError): y.sort() # ...and correspondingly no docstring assert y.sort.__doc__ is None assert pybedtools.bedtool.BedTool.sort.__doc__ is None # Reset the path, and ensure the resetting works set_bedtools_path() z = constructor("x.bed") z.sort()
def main(): """ Make a pie chart of features overlapping annotations (e.g., peaks in introns, exons, etc) """ ap = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) ap.add_argument('--bed', help='BED file of e.g. peaks') ap.add_argument('--gff', help='GFF file of e.g. annotations') ap.add_argument('--out', default='out.png', help='Output PNG file') ap.add_argument('--stranded', action='store_true', help='Use strand-specific intersections') ap.add_argument('--include', nargs='*', help='Featuretypes to include') ap.add_argument('--exclude', nargs='*', help='Featuretypes to exclude') ap.add_argument('--thresh', type=float, help='Threshold percentage below which output will be ' 'suppressed') ap.add_argument('--test', action='store_true', help='Run test, overwriting all other args. Result will ' 'be "out.png" in current directory.') args = ap.parse_args() if not (args.bed and args.gff) and not args.test: ap.print_help() sys.exit(1) if not args.test: if args.include and args.exclude: raise ValueError('Cannot specify both --include and --exclude') make_pie(bed=args.bed, gff=args.gff, out=args.out, thresh=args.thresh, stranded=args.stranded, include=args.include, exclude=args.exclude) else: make_pie(bed=pybedtools.example_filename('gdc.bed'), gff=pybedtools.example_filename('gdc.gff'), stranded=True, out='out.png', include=[ 'exon', 'CDS', 'intron', 'five_prime_UTR', 'three_prime_UTR' ])
def test_gzipped_files_are_iterable_as_normal(): agz = _make_temporary_gzip(pybedtools.example_filename('a.bed')) agz = pybedtools.BedTool(agz) a = pybedtools.example_bedtool('a.bed') for i in agz: print(i) assert_list_equal(list(a), list(agz))
def test_cat(): a = pybedtools.example_bedtool('a.bed') b = pybedtools.example_bedtool('b.bed') b_fn = pybedtools.example_filename('b.bed') assert a.cat(b) == a.cat(b_fn) expected = fix(""" chr1 1 500 chr1 800 950 """) assert a.cat(b) == expected a = pybedtools.example_bedtool('a.bed') b = pybedtools.example_bedtool('b.bed') c = a.cat(b, postmerge=False) assert len(a) + len(b) == len(c), (len(a), len(b), len(c)) print c assert c == fix(""" chr1 1 100 feature1 0 + chr1 100 200 feature2 0 + chr1 150 500 feature3 0 - chr1 900 950 feature4 0 + chr1 155 200 feature5 0 - chr1 800 901 feature6 0 + """)
def test_issue_178(): # Compatibility between py2/py3: py27 does not have FileNotFoundError, so # set it to IOError (which does exist) for this function. try: FileNotFoundError except NameError: FileNotFoundError = IOError try: fn = pybedtools.example_filename('gdc.othersort.bam') pybedtools.contrib.bigwig.bam_to_bigwig(fn, genome='dm3', output='tmp.bw') x = pybedtools.contrib.bigwig.bigwig_to_bedgraph('tmp.bw') assert x == fix( ''' chr2L 70 75 1 chr2L 140 145 1 chr2L 150 155 1 chr2L 160 165 1 chr2L 210 215 1 chrX 10 15 1 chrX 70 75 1 chrX 140 145 1 ''') os.unlink('tmp.bw') # If bedGraphToBigWig is not on the path, see # https://github.com/daler/pybedtools/issues/227 except FileNotFoundError: pass
def test_gzipped_files_are_iterable_as_normal(): agz = _make_temporary_gzip(pybedtools.example_filename('a.bed')) agz = pybedtools.BedTool(agz) a = pybedtools.example_bedtool('a.bed') for i in agz: print(i) assert list(a) == list(agz)
def test_issue_178(): # Compatibility between py2/py3: py27 does not have FileNotFoundError, so # set it to IOError (which does exist) for this function. try: FileNotFoundError except NameError: FileNotFoundError = IOError try: fn = pybedtools.example_filename("gdc.othersort.bam") pybedtools.contrib.bigwig.bam_to_bigwig(fn, genome="dm3", output="tmp.bw") x = pybedtools.contrib.bigwig.bigwig_to_bedgraph("tmp.bw") assert x == fix(""" chr2L 70 75 1 chr2L 140 145 1 chr2L 150 155 1 chr2L 160 165 1 chr2L 210 215 1 chrX 10 15 1 chrX 70 75 1 chrX 140 145 1 """) os.unlink("tmp.bw") # If bedGraphToBigWig is not on the path, see # https://github.com/daler/pybedtools/issues/227 except FileNotFoundError: pass
def test_getting_example_beds(): assert 'a.bed' in pybedtools.list_example_files() a_fn = pybedtools.example_filename('a.bed') assert a_fn == os.path.join(testdir, 'data', 'a.bed') a = pybedtools.example_bedtool('a.bed') assert a.fn == os.path.join(testdir, 'data', 'a.bed') # complain appropriately if nonexistent paths are asked for e = FileNotFoundError if six.PY3 else ValueError with pytest.raises(e): pybedtools.example_filename('nonexistent') with pytest.raises(e): pybedtools.example_bedtool('nonexistent') with pytest.raises(e): pybedtools.set_tempdir('nonexistent')
def fetchSequence(chrName, start, end, fastq): position = ' '.join([chrName, start, end]) #print(position) bedpos = pybedtools.BedTool(position, from_string=True) fasta = pybedtools.example_filename(fastq) bedpos = bedpos.sequence(fi=fasta) sequence = open(bedpos.seqfn).read() return (sequence)
def _classifier(): c = Classifier( bed=pybedtools.example_filename("gdc.bed"), annotations=pybedtools.example_filename("gdc.gff"), ) c.classify() bed = pybedtools.example_bedtool("gdc.bed") assert c.class_counts == { frozenset(["UTR", "exon", "mRNA", "CDS", "tRNA", "gene"]): 1, frozenset(["intron", "gene", "mRNA"]): 3, frozenset([]): 1, frozenset(["gene", "exon", "mRNA", "CDS"]): 2, frozenset(["exon", "mRNA", "CDS", "tRNA", "intron", "gene"]): 1, } assert c.feature_classes == { bed[0]: set(["."]), bed[1]: set(["gene", "exon", "mRNA", "CDS"]), bed[2]: set(["intron", "gene", "mRNA"]), bed[3]: set(["intron", "gene", "mRNA"]), bed[4]: set(["tRNA", "UTR", "exon", "mRNA", "CDS", "gene"]), bed[5]: set(["gene", "exon", "mRNA", "CDS"]), bed[6]: set(["intron", "gene", "mRNA"]), bed[7]: set(["tRNA", "intron", "exon", "mRNA", "CDS", "gene"]), } print("use these indexes for debugging") for i, f in enumerate(bed): print(i, f) for k, v in list(c.class_features.items()): print(k) for i in v: print("\t" + str(i)) assert c.class_features == { frozenset([]): [bed[0]], frozenset(["intron", "gene", "mRNA"]): [bed[6], bed[2], bed[3]], frozenset(["gene", "exon", "mRNA", "CDS"]): [bed[5], bed[1]], frozenset(["UTR", "exon", "mRNA", "CDS", "tRNA", "gene"]): [bed[4]], frozenset(["exon", "mRNA", "CDS", "tRNA", "intron", "gene"]): [bed[7]], }
def main(): """ Make a pie chart of features overlapping annotations (e.g., peaks in introns, exons, etc) """ ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) ap.add_argument("--bed", help="BED file of e.g. peaks") ap.add_argument("--gff", help="GFF file of e.g. annotations") ap.add_argument("--out", default="out.png", help="Output PNG file") ap.add_argument("--stranded", action="store_true", help="Use strand-specific intersections") ap.add_argument("--include", nargs="*", help="Featuretypes to include") ap.add_argument("--exclude", nargs="*", help="Featuretypes to exclude") ap.add_argument("--thresh", type=float, help="Threshold percentage below which output will be " "suppressed") ap.add_argument( "--test", action="store_true", help="Run test, overwriting all other args. Result will " 'be "out.png" in current directory.', ) args = ap.parse_args() if not (args.bed and args.gff) and not args.test: ap.print_help() sys.exit(1) if not args.test: if args.include and args.exclude: raise ValueError("Cannot specify both --include and --exclude") make_pie( bed=args.bed, gff=args.gff, out=args.out, thresh=args.thresh, stranded=args.stranded, include=args.include, exclude=args.exclude, ) else: make_pie( bed=pybedtools.example_filename("gdc.bed"), gff=pybedtools.example_filename("gdc.gff"), stranded=True, out="out.png", include=["exon", "CDS", "intron", "five_prime_UTR", "three_prime_UTR"], )
def test_links(): # have to be careful about the path, since it is embedded in the HTML # output. a = pybedtools.BedTool( os.path.join(os.path.relpath(pybedtools.data_dir()), 'a.bed')) a = a.links() exp = open(pybedtools.example_filename('a.links.html')).read() obs = open(a.links_html).read() assert exp == obs
def get_sequence(reference_fasta, coordinates, strand): """Takes coordinates and returns sequence bed_coor is space separated""" bed_coor = pybedtools.BedTool(coordinates, from_string=True) fasta = pybedtools.example_filename(reference_fasta) seq = bed_coor.sequence(fi=fasta) seq_str = open(seq.seqfn, 'r').read() pybedtools.cleanup(remove_all=True) return seq_str.replace('>', '').split('\n')[0:-1]
def _classifier(): c = Classifier( bed=pybedtools.example_filename('gdc.bed'), annotations=pybedtools.example_filename('gdc.gff')) c.classify() bed = pybedtools.example_bedtool('gdc.bed') assert c.class_counts == { frozenset(['UTR', 'exon', 'mRNA', 'CDS', 'tRNA', 'gene']): 1, frozenset(['intron', 'gene', 'mRNA']): 3, frozenset([]): 1, frozenset(['gene', 'exon', 'mRNA', 'CDS']): 2, frozenset(['exon', 'mRNA', 'CDS', 'tRNA', 'intron', 'gene']): 1} assert c.feature_classes == { bed[0]: set(['.']), bed[1]: set(['gene', 'exon', 'mRNA', 'CDS']), bed[2]: set(['intron', 'gene', 'mRNA']), bed[3]: set(['intron', 'gene', 'mRNA']), bed[4]: set(['tRNA', 'UTR', 'exon', 'mRNA', 'CDS', 'gene']), bed[5]: set(['gene', 'exon', 'mRNA', 'CDS']), bed[6]: set(['intron', 'gene', 'mRNA']), bed[7]: set(['tRNA', 'intron', 'exon', 'mRNA', 'CDS', 'gene']), } print('use these indexes for debugging') for i, f in enumerate(bed): print(i, f) for k, v in list(c.class_features.items()): print(k) for i in v: print('\t' + str(i)) assert c.class_features == { frozenset([]): [bed[0]], frozenset(['intron', 'gene', 'mRNA']): [bed[6], bed[2], bed[3]], frozenset(['gene', 'exon', 'mRNA', 'CDS']): [bed[5], bed[1]], frozenset(['UTR', 'exon', 'mRNA', 'CDS', 'tRNA', 'gene']): [bed[4]], frozenset(['exon', 'mRNA', 'CDS', 'tRNA', 'intron', 'gene']): [bed[7]], }
def _get_sequence(chrom, pos, strand, genome): a = pybedtools.BedTool("{0}\t{1}\t{2}\t.\t.\t{3}".format(chrom, pos-150, pos+150, strand), from_string=True) fasta = pybedtools.example_filename(genome) a = a.sequence(fi=fasta,s=True) seq = open(a.seqfn).read().split("\n") pre = seq[1][:150] nt = seq[1][150] post = seq[1][151:] # print [pre, nt, post] return [chrom , str(pos), "%s-%s-%s" % (pre, nt, post)]
def test_classifier(): c = Classifier(bed=pybedtools.example_filename('gdc.bed'), annotations=pybedtools.example_filename('gdc.gff')) c.classify() bed = pybedtools.example_bedtool('gdc.bed') assert c.class_counts == { frozenset(['UTR', 'exon', 'mRNA', 'CDS', 'tRNA', 'gene']): 1, frozenset(['intron', 'gene', 'mRNA']): 3, frozenset([]): 1, frozenset(['gene', 'exon', 'mRNA', 'CDS']): 2, frozenset(['exon', 'mRNA', 'CDS', 'tRNA', 'intron', 'gene']): 1 } assert c.feature_classes == { bed[0]: set(['.']), bed[1]: set(['gene', 'exon', 'mRNA', 'CDS']), bed[2]: set(['intron', 'gene', 'mRNA']), bed[3]: set(['intron', 'gene', 'mRNA']), bed[4]: set(['tRNA', 'UTR', 'exon', 'mRNA', 'CDS', 'gene']), bed[5]: set(['gene', 'exon', 'mRNA', 'CDS']), bed[6]: set(['intron', 'gene', 'mRNA']), bed[7]: set(['tRNA', 'intron', 'exon', 'mRNA', 'CDS', 'gene']), } print 'use these indexes for debugging' for i, f in enumerate(bed): print i, f for k, v in c.class_features.items(): print k for i in v: print '\t' + str(i) assert c.class_features == { frozenset([]): [bed[0]], frozenset(['intron', 'gene', 'mRNA']): [bed[6], bed[2], bed[3]], frozenset(['gene', 'exon', 'mRNA', 'CDS']): [bed[5], bed[1]], frozenset(['UTR', 'exon', 'mRNA', 'CDS', 'tRNA', 'gene']): [bed[4]], frozenset(['exon', 'mRNA', 'CDS', 'tRNA', 'intron', 'gene']): [bed[7]], }
def _classifier(): c = Classifier(bed=pybedtools.example_filename("gdc.bed"), annotations=pybedtools.example_filename("gdc.gff")) c.classify() bed = pybedtools.example_bedtool("gdc.bed") assert c.class_counts == { frozenset(["UTR", "exon", "mRNA", "CDS", "tRNA", "gene"]): 1, frozenset(["intron", "gene", "mRNA"]): 3, frozenset([]): 1, frozenset(["gene", "exon", "mRNA", "CDS"]): 2, frozenset(["exon", "mRNA", "CDS", "tRNA", "intron", "gene"]): 1, } assert c.feature_classes == { bed[0]: set(["."]), bed[1]: set(["gene", "exon", "mRNA", "CDS"]), bed[2]: set(["intron", "gene", "mRNA"]), bed[3]: set(["intron", "gene", "mRNA"]), bed[4]: set(["tRNA", "UTR", "exon", "mRNA", "CDS", "gene"]), bed[5]: set(["gene", "exon", "mRNA", "CDS"]), bed[6]: set(["intron", "gene", "mRNA"]), bed[7]: set(["tRNA", "intron", "exon", "mRNA", "CDS", "gene"]), } print "use these indexes for debugging" for i, f in enumerate(bed): print i, f for k, v in c.class_features.items(): print k for i in v: print "\t" + str(i) assert c.class_features == { frozenset([]): [bed[0]], frozenset(["intron", "gene", "mRNA"]): [bed[6], bed[2], bed[3]], frozenset(["gene", "exon", "mRNA", "CDS"]): [bed[5], bed[1]], frozenset(["UTR", "exon", "mRNA", "CDS", "tRNA", "gene"]): [bed[4]], frozenset(["exon", "mRNA", "CDS", "tRNA", "intron", "gene"]): [bed[7]], }
def test_links(): # have to be careful about the path, since it is embedded in the HTML # output. a = pybedtools.BedTool( os.path.join( os.path.relpath(pybedtools.data_dir()), 'a.bed')) a = a.links() exp = open(pybedtools.example_filename('a.links.html')).read() obs = open(a.links_html).read() assert exp == obs
def test_gzipped_output(): _filename = pybedtools.example_filename('a.bed') compressed_file = pybedtools.BedTool(_filename).saveas(compressed=True) # Open gzipped file in text mode with gzip.open(compressed_file.fn, 'rt') as gf: uncompressed_content = gf.read() with open(_filename) as f: original_content = f.read() assert_equal(original_content, uncompressed_content)
def test_a_b_methods(): """ Generator that yields tests, inserting different versions of `a` and `b` as needed """ for method, send_kwargs, expected in parse_yaml(config_fn): a_isbam = False b_isbam = False if 'abam' in send_kwargs: send_kwargs['abam'] = pybedtools.example_filename( send_kwargs['abam']) send_kwargs['a'] = send_kwargs['abam'] a_isbam = True if not (('a' in send_kwargs) and ('b' in send_kwargs)): continue # If abam, makes a BedTool out of it anyway. orig_a = pybedtools.example_bedtool(send_kwargs['a']) orig_b = pybedtools.example_bedtool(send_kwargs['b']) del send_kwargs['a'] del send_kwargs['b'] if orig_a._isbam: a_isbam = True if orig_b._isbam: b_isbam = True for kind_a, kind_b in itertools.permutations( ('filename', 'generator', 'stream', 'gzip'), 2): if a_isbam and (kind_a not in supported_bam): continue if b_isbam and (kind_b not in supported_bam): continue # Convert to file/generator/stream bedtool = converter[kind_a](orig_a) b = converter[kind_b](orig_b) kind = 'a=%(kind_a)s, b=%(kind_b)s abam=%(a_isbam)s bbam=%(b_isbam)s' % locals( ) send_kwargs['b'] = b f = partial(run, method, bedtool, expected, **send_kwargs) # Meaningful description f.description = '%(method)s, %(kind)s, %(send_kwargs)s' % locals() yield (f, )
def test_gzipped_output(): _filename = pybedtools.example_filename('a.bed') compressed_file = pybedtools.BedTool(_filename).saveas(compressed=True) # Open gzipped file in text mode with gzip.open(compressed_file.fn, 'rt') as gf: uncompressed_content = gf.read() with open(_filename) as f: original_content = f.read() assert original_content == uncompressed_content
def test_getting_example_beds(): assert 'a.bed' in pybedtools.list_example_files() a_fn = pybedtools.example_filename('a.bed') assert a_fn == os.path.join(testdir, 'data', 'a.bed') a = pybedtools.example_bedtool('a.bed') assert a.fn == os.path.join(testdir, 'data', 'a.bed') # complain appropriately if nonexistent paths are asked for assert_raises(ValueError, pybedtools.example_filename, 'nonexistent') assert_raises(ValueError, pybedtools.example_bedtool, 'nonexistent') assert_raises(ValueError, pybedtools.set_tempdir, 'nonexistent')
def test_i_methods(): """ Generator that yields tests, inserting different versions of `i` as needed """ for method, send_kwargs, expected in parse_yaml(config_fn): i_isbam = False if 'ibam' in send_kwargs: i_isbam = True send_kwargs['ibam'] = pybedtools.example_filename( send_kwargs['ibam']) send_kwargs['i'] = send_kwargs['ibam'] if ('a' in send_kwargs) and ('b' in send_kwargs): continue if ('i' not in send_kwargs) and ('ibam' not in send_kwargs): continue if 'files' in send_kwargs: send_kwargs['files'] = [ pybedtools.example_filename(i) for i in send_kwargs['files'] ] orig_i = pybedtools.example_bedtool(send_kwargs['i']) if orig_i._isbam: i_isbam = True del send_kwargs['i'] done = [] for kind_i in ('filename', 'generator', 'stream', 'gzip'): if i_isbam: if (kind_i not in supported_bam): continue i = converter[kind_i](orig_i) kind = 'i=%(kind_i)s ibam=%(i_isbam)s' % locals() f = partial(run, method, i, expected, **send_kwargs) f.description = '%(method)s, %(kind)s, %(send_kwargs)s' % locals() yield (f, )
def test_bed_methods(): """ Generator that yields tests, inserting different versions of `bed` as needed """ for method, send_kwargs, expected in parse_yaml(config_fn): ignore = ['a', 'b', 'abam', 'i'] skip_test = False for i in ignore: if i in send_kwargs: skip_test = True if skip_test: continue if 'bed' not in send_kwargs: continue if 'files' in send_kwargs: send_kwargs['files'] = [ pybedtools.example_filename(i) for i in send_kwargs['files'] ] if 'bams' in send_kwargs: send_kwargs['bams'] = [ pybedtools.example_filename(i) for i in send_kwargs['bams'] ] if 'fi' in send_kwargs: send_kwargs['fi'] = pybedtools.example_filename(send_kwargs['fi']) orig_bed = pybedtools.example_bedtool(send_kwargs['bed']) del send_kwargs['bed'] done = [] for kind_bed in ('filename', 'generator', 'stream', 'gzip'): bed = converter[kind_bed](orig_bed) kind = 'i=%(kind_bed)s' % locals() f = partial(run, method, bed, expected, **send_kwargs) f.description = '%(method)s, %(kind)s, %(send_kwargs)s' % locals() yield (f, )
def test_tabix(): a = pybedtools.example_bedtool('a.bed') t = a.tabix() assert t._tabixed() results = str(t.tabix_intervals('chr1:99-200')) print results assert results == fix(""" chr1 1 100 feature1 0 + chr1 100 200 feature2 0 + chr1 150 500 feature3 0 -""") assert str(t.tabix_intervals(a[2])) == fix(""" chr1 100 200 feature2 0 + chr1 150 500 feature3 0 -""") # clean up fns = [ pybedtools.example_filename('a.bed.gz'), pybedtools.example_filename('a.bed.gz.tbi'), ] for fn in fns: if os.path.exists(fn): os.unlink(fn)
def test_a_b_methods(): """ Generator that yields tests, inserting different versions of `a` and `b` as needed """ for method, send_kwargs, expected in parse_yaml(config_fn): a_isbam = False b_isbam = False if 'abam' in send_kwargs: send_kwargs['abam'] = pybedtools.example_filename(send_kwargs['abam']) send_kwargs['a'] = send_kwargs['abam'] a_isbam = True if not (('a' in send_kwargs) and ('b' in send_kwargs)): continue # If abam, makes a BedTool out of it anyway. orig_a = pybedtools.example_bedtool(send_kwargs['a']) orig_b = pybedtools.example_bedtool(send_kwargs['b']) del send_kwargs['a'] del send_kwargs['b'] if orig_a._isbam: a_isbam = True if orig_b._isbam: b_isbam = True for kind_a, kind_b in itertools.permutations(('filename', 'generator', 'stream', 'gzip'), 2): if a_isbam and (kind_a not in supported_bam): continue if b_isbam and (kind_b not in supported_bam): continue # Convert to file/generator/stream bedtool = converter[kind_a](orig_a) b = converter[kind_b](orig_b) kind = 'a=%(kind_a)s, b=%(kind_b)s abam=%(a_isbam)s bbam=%(b_isbam)s' % locals() send_kwargs['b'] = b f = partial(run, method, bedtool, expected, **send_kwargs) # Meaningful description f.description = '%(method)s, %(kind)s, %(send_kwargs)s' % locals() yield (f, )
def get_trinuc(bed_i, fa_loc): """Get trinucleotide context per DNV.""" trinuc_out_loc = 'trinuc_out/{}_trinuc.txt'.format(bed_i[:-4]) if os.path.exists(trinuc_out_loc): return 'trinuc already made for ' + bed_i dnv_bed = pybedtools.BedTool(bed_i) # change start column to start-1, end to end+1 dnv_bed = dnv_bed.each(mod_start_end) dnv_bed = dnv_bed.saveas() # run getfasta using pybedtools wrapper fasta = pybedtools.example_filename(fa_loc) dnv_bed = dnv_bed.sequence(fi=fasta) # save output sequence write_trinuc(dnv_bed, trinuc_out_loc) return bed_i + ' trinuc done'
def test_i_methods(): """ Generator that yields tests, inserting different versions of `i` as needed """ for method, send_kwargs, expected in parse_yaml(config_fn): i_isbam = False if 'ibam' in send_kwargs: i_isbam = True send_kwargs['ibam'] = pybedtools.example_filename(send_kwargs['ibam']) send_kwargs['i'] = send_kwargs['ibam'] if ('a' in send_kwargs) and ('b' in send_kwargs): continue if ('i' not in send_kwargs) and ('ibam' not in send_kwargs): continue if 'files' in send_kwargs: send_kwargs['files'] = [pybedtools.example_filename(i) for i in send_kwargs['files']] orig_i = pybedtools.example_bedtool(send_kwargs['i']) if orig_i._isbam: i_isbam = True del send_kwargs['i'] done = [] for kind_i in ('filename', 'generator', 'stream', 'gzip'): if i_isbam: if (kind_i not in supported_bam): continue i = converter[kind_i](orig_i) kind = 'i=%(kind_i)s ibam=%(i_isbam)s' % locals() f = partial(run, method, i, expected, **send_kwargs) f.description = '%(method)s, %(kind)s, %(send_kwargs)s' % locals() yield (f, )
def test_cat(): a = pybedtools.example_bedtool('a.bed') b = pybedtools.example_bedtool('b.bed') b_fn = pybedtools.example_filename('b.bed') assert a.cat(b) == a.cat(b_fn) expected = fix(""" chr1 1 500 chr1 800 950 """) assert a.cat(b) == expected a = pybedtools.example_bedtool('a.bed') b = pybedtools.example_bedtool('b.bed') c = a.cat(b, postmerge=False) assert len(a) + len(b) == len(c), (len(a), len(b), len(c))
def test_gzipping_is_default_when_extension_is_dot_gz(): _filename = pybedtools.example_filename('a.bed') with open(_filename) as f: expected_content = f.read() __, temp_filename = tempfile.mkstemp(suffix='.gz') try: bedtool = pybedtools.BedTool(_filename) bedtool.saveas(fn=temp_filename) with gzip.open(temp_filename, 'rt') as gf: # gzip will fail next line if file is not gzipped actual_content = gf.read() assert expected_content == actual_content finally: if os.path.isfile(temp_filename): os.unlink(temp_filename)
def test_gzipping_can_be_turned_off_even_for_dot_gz(): _filename = pybedtools.example_filename('a.bed') with open(_filename) as f: expected_content = f.read() __, temp_filename = tempfile.mkstemp(suffix='.gz') try: bedtool = pybedtools.BedTool(_filename) bedtool.saveas(fn=temp_filename, compressed=False) with open(temp_filename) as non_gz_f: # actual content will be jumbled if non_gz_f is unset actual_content = non_gz_f.read() assert expected_content == actual_content finally: if os.path.isfile(temp_filename): os.unlink(temp_filename)
def test_gzipping_can_be_turned_off_even_for_dot_gz(): _filename = pybedtools.example_filename('a.bed') with open(_filename) as f: expected_content = f.read() __, temp_filename = tempfile.mkstemp(suffix='.gz') try: bedtool = pybedtools.BedTool(_filename) bedtool.saveas(fn=temp_filename, compressed=False) with open(temp_filename) as non_gz_f: # actual content will be jumbled if non_gz_f is unset actual_content = non_gz_f.read() assert_equal(expected_content, actual_content) finally: if os.path.isfile(temp_filename): os.unlink(temp_filename)
def test_gzipping_is_default_when_extension_is_dot_gz(): _filename = pybedtools.example_filename('a.bed') with open(_filename) as f: expected_content = f.read() __, temp_filename = tempfile.mkstemp(suffix='.gz') try: bedtool = pybedtools.BedTool(_filename) bedtool.saveas(fn=temp_filename) with gzip.open(temp_filename, 'rt') as gf: # gzip will fail next line if file is not gzipped actual_content = gf.read() assert_equal(expected_content, actual_content) finally: if os.path.isfile(temp_filename): os.unlink(temp_filename)
def test_bam_to_fastq(): x = pybedtools.example_bedtool('small.bam') tmpfn = pybedtools.BedTool._tmp() y = x.bam_to_fastq(fq=tmpfn) assert open(y.fastq).read() == open(pybedtools.example_filename('small.fastq')).read()
def test_igv(): a = pybedtools.example_bedtool('a.bed') a = a.igv() obs = open(a.igv_script).read() exp = open(pybedtools.example_filename('a.igv_script')).read() assert obs == exp
conf['method'] = method conf['method_kwargs'] = method_kwargs conf.update(kwargs) super(ConfiguredBedToolsDemo, self).__init__(**conf) if __name__ == "__main__": """ bts = [ pybedtools.example_bedtool('BEAF_Kc_Bushey_2009.bed'), pybedtools.example_bedtool('CTCF_Kc_Bushey_2009.bed'), pybedtools.example_bedtool('Cp190_Kc_Bushey_2009.bed'), pybedtools.example_bedtool('SuHw_Kc_Bushey_2009.bed'), ] names = ['BEAF', 'CTCF', 'Cp190', 'Su(Hw)'] #bts = [ # pybedtools.example_bedtool('a.bed'), # pybedtools.example_bedtool('b.bed')] #names = ['a','b'] d, m = binary_heatmap(bts, names) print binary_summary(d) """ conf_file = pybedtools.example_filename('democonfig.yaml') data_path = pybedtools.example_filename("") # dir name ax1 = ConfiguredBedToolsDemo(conf_file, method='intersect', method_kwargs={}, data_path=data_path).plot() ax2 = ConfiguredBedToolsDemo(conf_file, method='intersect', method_kwargs=dict(u=True), data_path=data_path).plot() plt.show()