def BEDIterator(handle): """Generator function to iterate over Fasta records (as SeqRecord objects). handle - input file If this is not given, then the entire title line will be used as the description, and the first word as the id and name. Note that use of title2ids matches that of Bio.Fasta.SequenceParser but the defaults are slightly different. """ line_no = 0 #Skip any text before the first record (e.g. blank lines, comments) while True : line_no += 1 line = handle.readline().strip() if not line: return if line[0] == "#" or len(line) == 0: continue try: ref,source,type,start,end,score,strand,frame,attributes = \ line.split("\t") except: raise FormatError, "Problem with line %d in %s. Line was\n%s" %\ (line_no,handle.name,line) attr_pairs = attributes.strip(';').split(";") attr_dict = dict(map(lambda x: tuple(x.split("=")), attr_pairs)) result = SeqFeature(location=FeatureLocation(int(start),int(end)), type=type,strand=_gff3_strand_to_numeric[strand],ref=ref,ref_db=source) result.id = attr_dict.get("ID",None) result.name = attr_dict.get("Name",None) result.attributes = attr_dict # not an official property of SeqFeature. yield result
def GTFIterator(handle): """Generator function to iterate over Fasta records (as SeqRecord objects). handle - input file If this is not given, then the entire title line will be used as the description, and the first word as the id and name. Note that use of title2ids matches that of Bio.Fasta.SequenceParser but the defaults are slightly different. """ line_no = 0 #Skip any text before the first record (e.g. blank lines, comments) while True: line_no += 1 line = handle.next() if line is None: return line = line.strip() if len(line) == 0 or line[0] == '#': continue try: ref,source,type,start,end,score,strand,frame,attributes = \ line.split('\t') except: raise ValueError, 'Problem with line %d in %s. Line was\n%s' %\ (line_no,handle.name,line) attr_pairs = attributes.strip(';').split(';') attr_dict = {} for pair in attr_pairs: try: key, value = pair.strip().split(' ', 1) value = value.strip('"') attr_dict[key] = value except: attr_dict['transcript_id'] = pair #attr_dict = dict(map(lambda x: tuple([y.strip('"') #for y in x.split(' ',1)]), attr_pairs)) result = SeqFeature(location=FeatureLocation(int(start), int(end)), type=type, strand=_gff3_strand_to_numeric[strand], ref=ref, ref_db=source) result.name = result.id = attr_dict.get('transcript_id', None) result.attributes = attr_dict # not an official property of SeqFeature. result.frame = frame yield result