def __init__(self, filename): f = open(filename) self.commandline = re.match(r'Command line: \[([^]]*)', f.readline()).group(1) self.hostname = re.match(r'Hostname: \[([^]]*)', f.readline()).group(1) self.queries = [] while True: try: l = f.next() except StopIteration, si: break if not l: continue if l.startswith(C4_ALIGNMENT): f.next() # skip ------ self.queries.append(Query(f)) elif l.startswith('vulgar:'): self.queries[-1].vulgar = l[7:].strip() elif l.startswith(START_GFF): gff = [] l = f.next() while not l.startswith(END_GFF): gff.append(l) l = f.next() self.queries[-1].gff = parse_gff2(iter(gff))
def parse_exonerate(filename): return parse_gff2(exonerate_file(filename))
print "Specify either gff2 or gff3 but not both." sys.exit(1) if o.fas is None: print "Specify the fasta database file." sys.exit(1) if o.output is None: o.output = sys.stdout else: o.output = file(o.output, "w") fas = fasta.Fasta() fas.read_from(o.fas) if o.gff2: gff = parse_gff2.parse_gff2(o.gff2) else: gff = parse_gff3.parse_gff3(o.gff3) try: l = [fasta.Sequence( g.reference_sequence + ' ' + g.start + ' ' + g.stop + ' ' + g.strand, fas[g.reference_sequence].sequence[int(g.start) - 1:int(g.stop)]) for g in gff] print >> o.output, '\n'.join(imap(str, l)) except KeyError, ke: print "Sequence was not found in fasta file :", str(ke)
sys.exit(1) if o.fas is None: print "Specify the fasta database file." sys.exit(1) if o.output is None: o.output = sys.stdout else: o.output = file(o.output, "w") fas = fasta.Fasta() fas.read_from(o.fas) if o.gff2: gff = parse_gff2.parse_gff2(o.gff2) else: gff = parse_gff3.parse_gff3(o.gff3) try: l = [ fasta.Sequence( g.reference_sequence + ' ' + g.start + ' ' + g.stop + ' ' + g.strand, fas[g.reference_sequence].sequence[int(g.start) - 1:int(g.stop)]) for g in gff ] print >> o.output, '\n'.join(imap(str, l)) except KeyError, ke: print "Sequence was not found in fasta file :", str(ke)