def tab_parser(handle, quiet=False): from Bio.GenBank import _FeatureConsumer from Bio.GenBank.utils import FeatureValueCleaner def Si_parse_tab_features(object, skip=False): """Return list of tuples for the features (if present) Each feature is returned as a tuple (key, location, qualifiers) where key and location are strings (e.g. "CDS" and "complement(join(490883..490885,1..879))") while qualifiers is a list of two string tuples (feature qualifier keys and values). Assumes you have already read to the start of the features table. """ # if object.line.rstrip() not in object.FEATURE_START_MARKERS: # if object.debug : print "Didn't find any feature table" # return [] # # while object.line.rstrip() in object.FEATURE_START_MARKERS: # object.line = object.handle.readline() features = [] line = object.line while True: if not line: break raise ValueError("Premature end of line during features table") if line[:object.HEADER_WIDTH].rstrip() in object.SEQUENCE_HEADERS: if object.debug: print "Found start of sequence" break line = line.rstrip() if line == "//": raise ValueError( "Premature end of features table, marker '//' found") if line in object.FEATURE_END_MARKERS: if object.debug: print "Found end of features" line = object.handle.readline() break if line[2:object.FEATURE_QUALIFIER_INDENT].strip() == "": print line[2:object.FEATURE_QUALIFIER_INDENT].strip() raise ValueError("Expected a feature qualifier in line '%s'" % line) if line.split()[0] in ["ID", "source"]: line = object.handle.readline() continue if skip: line = object.handle.readline() while line[:object. FEATURE_QUALIFIER_INDENT] == object.FEATURE_QUALIFIER_SPACER: line = object.handle.readline() else: #Build up a list of the lines making up this feature: feature_key = line[2:object.FEATURE_QUALIFIER_INDENT].strip() feature_lines = [line[object.FEATURE_QUALIFIER_INDENT:]] line = object.handle.readline() while line and ( line[:object.FEATURE_QUALIFIER_INDENT] == object.FEATURE_QUALIFIER_SPACER or line.rstrip() == "" ): # cope with blank lines in the midst of a feature feature_lines.append( line[object.FEATURE_QUALIFIER_INDENT:].rstrip()) line = object.handle.readline() if len(line) == 0: break #EOF feature_lines.append('/seq="N"') sys.stdout.flush() features.append( object.parse_feature(feature_key, feature_lines)) object.line = line return features def Si_feed(object, handle, consumer, do_features=True): """Feed a set of data into the consumer. This method is intended for use with the "old" code in Bio.GenBank Arguments: handle - A handle with the information to parse. consumer - The consumer that should be informed of events. do_features - Boolean, should the features be parsed? Skipping the features can be much faster. Return values: true - Passed a record false - Did not find a record """ #Should work with both EMBL and GenBank files provided the #equivalent Bio.GenBank._FeatureConsumer methods are called... # object.set_handle(handle) # if not object.find_start(): # #Could not find (another) record # consumer.data=None # print "here" # return False #We use the above class methods to parse the file into a simplified format. #The first line, header lines and any misc lines after the features will be #dealt with by GenBank / EMBL specific derived classes. #First line and header: # object._feed_first_line(consumer, object.line) # object._feed_header_lines(consumer, object.parse_header()) #Features (common to both EMBL and GenBank): if do_features: object._feed_feature_table( consumer, Si_parse_tab_features(object, skip=False)) else: Si_parse_tab_features(object, skip=True) # ignore the data #Footer and sequence # misc_lines, sequence_string = object.parse_footer() # object._feed_misc_lines(consumer, misc_lines) sequence_string = "N" consumer.sequence(sequence_string) # Calls to consumer.base_number() do nothing anyway consumer.record_end("//") length = 0 for record in consumer.data.features: if record.location.nofuzzy_end > length: length = record.location.nofuzzy_end consumer.data.seq = "N" * length # assert object.line == "//" #And we are done return True myscanner = Scanner.InsdcScanner() myscanner.set_handle(handle) myscanner.line = myscanner.handle.readline() myscanner.FEATURE_QUALIFIER_INDENT = 21 myscanner.FEATURE_QUALIFIER_SPACER = "FT" + " " * ( myscanner.FEATURE_QUALIFIER_INDENT - 2) myscanner.debug = True #featuretuples=Si_parse_tab_features(myscanner) consumer = _FeatureConsumer(use_fuzziness=1, feature_cleaner=FeatureValueCleaner()) Si_feed(myscanner, handle, consumer) return consumer.data
def tab_parser(handle, quiet=False): def Drawer_parse_tab_features(object, skip=False): features = [] line = object.line while True: if not line: break raise ValueError("Premature end of line during features table") if line[:object.HEADER_WIDTH].rstrip() in object.SEQUENCE_HEADERS: if object.debug: print("Found start of sequence") break line = line.rstrip() if line == "//": raise ValueError( "Premature end of features table, marker '//' found") if line in object.FEATURE_END_MARKERS: if object.debug: print("Found end of features") line = object.handle.readline() break if line[2:object.FEATURE_QUALIFIER_INDENT].strip() == "": print(line[2:object.FEATURE_QUALIFIER_INDENT].strip()) raise ValueError("Expected a feature qualifier in line '%s'" % line) if skip: line = object.handle.readline() while line[:object. FEATURE_QUALIFIER_INDENT] == object.FEATURE_QUALIFIER_SPACER: line = object.handle.readline() else: #Build up a list of the lines making up this feature: feature_key = line[2:object.FEATURE_QUALIFIER_INDENT].strip() feature_lines = [line[object.FEATURE_QUALIFIER_INDENT:]] line = object.handle.readline() while line[:object. FEATURE_QUALIFIER_INDENT] == object.FEATURE_QUALIFIER_SPACER or line.rstrip( ) == "": # cope with blank lines in the midst of a feature feature_lines.append( line[object.FEATURE_QUALIFIER_INDENT:].rstrip()) line = object.handle.readline() if len(line) == 0: break #EOF feature_lines.append('/seq="N"') sys.stdout.flush() features.append( object.parse_feature(feature_key, feature_lines)) object.line = line return features def Drawer_feed(object, handle, consumer, do_features=True): if do_features: object._feed_feature_table( consumer, Drawer_parse_tab_features(object, skip=False)) else: Drawer_parse_tab_features(object, skip=True) # ignore the data sequence_string = "N" consumer.sequence(sequence_string) consumer.record_end("//") length = 0 for record in consumer.data.features: if record.location.nofuzzy_end > length: length = record.location.nofuzzy_end consumer.data.seq = "N" * length return True myscanner = Scanner.InsdcScanner() myscanner.set_handle(handle) myscanner.line = myscanner.handle.readline() myscanner.FEATURE_QUALIFIER_INDENT = 21 myscanner.FEATURE_QUALIFIER_SPACER = "FT" + " " * ( myscanner.FEATURE_QUALIFIER_INDENT - 2) myscanner.debug = True consumer = _FeatureConsumer(use_fuzziness=1, feature_cleaner=FeatureValueCleaner()) Drawer_feed(myscanner, handle, consumer) return consumer.data