Exemplo n.º 1
0
def iterator_over_file_from_extension(filename):
    import gzip
    openers = {"gz" : gzip.open}
    ext = filename.split(".")

    opener = openers.get(ext[-1], open)

    i_o_f = iterator_over_file(filename, opener)
    
    if "fa" in ext or "fasta" in ext:
        from jbio.fasta import record_iterator as fasta_iterator
        return fasta_iterator(i_o_f)
    elif "fq" in ext or "fastq" in ext:
        from jbio.fastq import record_iterator as fastq_iterator
        return fastq_iterator(i_o_f)
    else:
        raise Exception("Unknown File Extension \'%s\'" % ext[-1])
Exemplo n.º 2
0
if not len(sys.argv) == 3:
    print "gene_fasta.py input.fa input.gff"
    sys.exit(1)

#FIELDS = ["ID","Alias","orf_classification","gene","Note"]
FIELDS = ["ID","Note"]

fa_fn,gff_fn  = sys.argv[1:3]

#read fasta records into memory
def fasta_clean_getter(fasta_entry):
    name = fasta_entry.name.split()[0]
    return (name, fasta_entry.seq)

fasta_records = dict(imap(fasta_clean_getter,fasta_iterator(iterator_over_file(fa_fn))))

gene_entries = ifilter(lambda x: x.feature == "gene", 
                       gff_iterator(iterator_over_file(gff_fn)))

for gene_record in gene_entries:
    attrs = dict(map(lambda x: x.split("="), gene_record.attribute.split(";")))
    header = ">" + attrs["Name"]
    fields = FIELDS
    field_getter_func = lambda x : unquote(attrs.get(x,"None")) if x =="Note" else attrs.get(x,"None")
    field_getter = imap(field_getter_func, fields)
    header += " " + " ".join(imap(lambda fv: "[%s=%s]" % fv, izip(fields, field_getter)))
    
    start, end = gene_record.start-1, gene_record.end-1
    seq = fasta_records[gene_record.seqname][start:end+1]
    print header
Exemplo n.º 3
0
#!/usr/bin/env python

import sys

from jbio.io.file import iterator_over_file
from jbio.fasta import record_iterator as fasta_iterator

if not len(sys.argv) == 2:
    sys.exit("fasta_to_line.py in.fa")


for record in fasta_iterator(iterator_over_file(sys.argv[1])):
    print "\t".join([record.name, record.seq])
Exemplo n.º 4
0
    sys.exit(1)

#FIELDS = ["ID","Alias","orf_classification","gene","Note"]
FIELDS = ["ID", "Note"]

fa_fn, gff_fn = sys.argv[1:3]


#read fasta records into memory
def fasta_clean_getter(fasta_entry):
    name = fasta_entry.name.split()[0]
    return (name, fasta_entry.seq)


fasta_records = dict(
    imap(fasta_clean_getter, fasta_iterator(iterator_over_file(fa_fn))))

gene_entries = ifilter(lambda x: x.feature == "gene",
                       gff_iterator(iterator_over_file(gff_fn)))

for gene_record in gene_entries:
    attrs = dict(map(lambda x: x.split("="), gene_record.attribute.split(";")))
    header = ">" + attrs["Name"]
    fields = FIELDS
    field_getter_func = lambda x: unquote(attrs.get(
        x, "None")) if x == "Note" else attrs.get(x, "None")
    field_getter = imap(field_getter_func, fields)
    header += " " + " ".join(
        imap(lambda fv: "[%s=%s]" % fv, izip(fields, field_getter)))

    start, end = gene_record.start - 1, gene_record.end - 1
Exemplo n.º 5
0
#!/usr/bin/env python

import sys

from itertools import imap

from jbio.io.file import iterator_over_file
from jbio.fasta import record_iterator as fasta_iterator

##Create Kmers

if not len(sys.argv) == 3:
    sys.exit("Usage: kmer.py k-size in.fa\n")

fn = sys.argv[2]
ksize = int(sys.argv[1])

for record in fasta_iterator(iterator_over_file(fn)):
    seq = record.seq
    starts = range(len(seq)-ksize+1)
    kmers = imap(lambda start: seq[start:start+ksize], starts)
    for kmer in kmers:
        print kmer