Exemplo n.º 1
0
def tab_parser(handle, quiet=False):
    from Bio.GenBank import _FeatureConsumer
    from Bio.GenBank.utils import FeatureValueCleaner

    def Si_parse_tab_features(object, skip=False):
        """Return list of tuples for the features (if present)

		Each feature is returned as a tuple (key, location, qualifiers)
		where key and location are strings (e.g. "CDS" and
		"complement(join(490883..490885,1..879))") while qualifiers
		is a list of two string tuples (feature qualifier keys and values).
		Assumes you have already read to the start of the features table.
		"""
        #		if object.line.rstrip() not in object.FEATURE_START_MARKERS:
        #			if object.debug : print "Didn't find any feature table"
        #			return []
        #
        #		while object.line.rstrip() in object.FEATURE_START_MARKERS:
        #			object.line = object.handle.readline()

        features = []
        line = object.line
        while True:
            if not line:
                break
                raise ValueError("Premature end of line during features table")

            if line[:object.HEADER_WIDTH].rstrip() in object.SEQUENCE_HEADERS:
                if object.debug: print "Found start of sequence"
                break
            line = line.rstrip()
            if line == "//":
                raise ValueError(
                    "Premature end of features table, marker '//' found")
            if line in object.FEATURE_END_MARKERS:
                if object.debug: print "Found end of features"
                line = object.handle.readline()
                break
            if line[2:object.FEATURE_QUALIFIER_INDENT].strip() == "":
                print line[2:object.FEATURE_QUALIFIER_INDENT].strip()
                raise ValueError("Expected a feature qualifier in line '%s'" %
                                 line)
            if line.split()[0] in ["ID", "source"]:
                line = object.handle.readline()
                continue
            if skip:
                line = object.handle.readline()
                while line[:object.
                           FEATURE_QUALIFIER_INDENT] == object.FEATURE_QUALIFIER_SPACER:
                    line = object.handle.readline()
            else:
                #Build up a list of the lines making up this feature:
                feature_key = line[2:object.FEATURE_QUALIFIER_INDENT].strip()
                feature_lines = [line[object.FEATURE_QUALIFIER_INDENT:]]
                line = object.handle.readline()
                while line and (
                        line[:object.FEATURE_QUALIFIER_INDENT]
                        == object.FEATURE_QUALIFIER_SPACER
                        or line.rstrip() == ""
                ):  # cope with blank lines in the midst of a feature
                    feature_lines.append(
                        line[object.FEATURE_QUALIFIER_INDENT:].rstrip())
                    line = object.handle.readline()
                    if len(line) == 0:
                        break  #EOF

                feature_lines.append('/seq="N"')
                sys.stdout.flush()
                features.append(
                    object.parse_feature(feature_key, feature_lines))
        object.line = line

        return features

    def Si_feed(object, handle, consumer, do_features=True):
        """Feed a set of data into the consumer.

		This method is intended for use with the "old" code in Bio.GenBank

		Arguments:
		handle - A handle with the information to parse.
		consumer - The consumer that should be informed of events.
		do_features - Boolean, should the features be parsed?
				      Skipping the features can be much faster.

		Return values:
		true  - Passed a record
		false - Did not find a record
		"""
        #Should work with both EMBL and GenBank files provided the
        #equivalent Bio.GenBank._FeatureConsumer methods are called...
        #		object.set_handle(handle)

        #		if not object.find_start():
        #			#Could not find (another) record
        #			consumer.data=None
        #			print "here"
        #			return False

        #We use the above class methods to parse the file into a simplified format.
        #The first line, header lines and any misc lines after the features will be
        #dealt with by GenBank / EMBL specific derived classes.

        #First line and header:
        #		object._feed_first_line(consumer, object.line)
        #		object._feed_header_lines(consumer, object.parse_header())

        #Features (common to both EMBL and GenBank):
        if do_features:
            object._feed_feature_table(
                consumer, Si_parse_tab_features(object, skip=False))
        else:
            Si_parse_tab_features(object, skip=True)  # ignore the data

        #Footer and sequence
#		misc_lines, sequence_string = object.parse_footer()
#		object._feed_misc_lines(consumer, misc_lines)
        sequence_string = "N"
        consumer.sequence(sequence_string)
        #		Calls to consumer.base_number() do nothing anyway
        consumer.record_end("//")

        length = 0

        for record in consumer.data.features:
            if record.location.nofuzzy_end > length:
                length = record.location.nofuzzy_end

        consumer.data.seq = "N" * length

        #		assert object.line == "//"

        #And we are done
        return True

    myscanner = Scanner.InsdcScanner()
    myscanner.set_handle(handle)

    myscanner.line = myscanner.handle.readline()
    myscanner.FEATURE_QUALIFIER_INDENT = 21
    myscanner.FEATURE_QUALIFIER_SPACER = "FT" + " " * (
        myscanner.FEATURE_QUALIFIER_INDENT - 2)

    myscanner.debug = True

    #featuretuples=Si_parse_tab_features(myscanner)

    consumer = _FeatureConsumer(use_fuzziness=1,
                                feature_cleaner=FeatureValueCleaner())

    Si_feed(myscanner, handle, consumer)

    return consumer.data
Exemplo n.º 2
0
def tab_parser(handle, quiet=False):
    def Drawer_parse_tab_features(object, skip=False):
        features = []
        line = object.line
        while True:
            if not line:
                break
                raise ValueError("Premature end of line during features table")
            if line[:object.HEADER_WIDTH].rstrip() in object.SEQUENCE_HEADERS:
                if object.debug: print("Found start of sequence")
                break
            line = line.rstrip()
            if line == "//":
                raise ValueError(
                    "Premature end of features table, marker '//' found")
            if line in object.FEATURE_END_MARKERS:
                if object.debug: print("Found end of features")
                line = object.handle.readline()
                break
            if line[2:object.FEATURE_QUALIFIER_INDENT].strip() == "":
                print(line[2:object.FEATURE_QUALIFIER_INDENT].strip())
                raise ValueError("Expected a feature qualifier in line '%s'" %
                                 line)

            if skip:
                line = object.handle.readline()
                while line[:object.
                           FEATURE_QUALIFIER_INDENT] == object.FEATURE_QUALIFIER_SPACER:
                    line = object.handle.readline()
            else:
                #Build up a list of the lines making up this feature:
                feature_key = line[2:object.FEATURE_QUALIFIER_INDENT].strip()
                feature_lines = [line[object.FEATURE_QUALIFIER_INDENT:]]
                line = object.handle.readline()
                while line[:object.
                           FEATURE_QUALIFIER_INDENT] == object.FEATURE_QUALIFIER_SPACER or line.rstrip(
                           ) == "":  # cope with blank lines in the midst of a feature

                    feature_lines.append(
                        line[object.FEATURE_QUALIFIER_INDENT:].rstrip())
                    line = object.handle.readline()
                    if len(line) == 0:
                        break  #EOF

                feature_lines.append('/seq="N"')
                sys.stdout.flush()
                features.append(
                    object.parse_feature(feature_key, feature_lines))
        object.line = line

        return features

    def Drawer_feed(object, handle, consumer, do_features=True):
        if do_features:
            object._feed_feature_table(
                consumer, Drawer_parse_tab_features(object, skip=False))
        else:
            Drawer_parse_tab_features(object, skip=True)  # ignore the data

        sequence_string = "N"
        consumer.sequence(sequence_string)
        consumer.record_end("//")
        length = 0
        for record in consumer.data.features:
            if record.location.nofuzzy_end > length:
                length = record.location.nofuzzy_end

        consumer.data.seq = "N" * length

        return True

    myscanner = Scanner.InsdcScanner()
    myscanner.set_handle(handle)

    myscanner.line = myscanner.handle.readline()
    myscanner.FEATURE_QUALIFIER_INDENT = 21
    myscanner.FEATURE_QUALIFIER_SPACER = "FT" + " " * (
        myscanner.FEATURE_QUALIFIER_INDENT - 2)

    myscanner.debug = True

    consumer = _FeatureConsumer(use_fuzziness=1,
                                feature_cleaner=FeatureValueCleaner())

    Drawer_feed(myscanner, handle, consumer)

    return consumer.data