Exemplo n.º 1
0
 def parse_position(string: str):
     """ Converts a positiong from a string into a Position subclass """
     if string[0] == '<':
         return BeforePosition(int(string[1:]))
     if string[0] == '>':
         return AfterPosition(int(string[1:]))
     if string == "UnknownPosition()":
         return UnknownPosition()
     return ExactPosition(int(string))
Exemplo n.º 2
0
    def test_unknown_position(self):
        location = FeatureLocation(ExactPosition(1),
                                   UnknownPosition(),
                                   strand=1)
        new_location = self.convert(location)

        assert isinstance(new_location.start, ExactPosition)
        assert new_location.start == 1

        assert isinstance(new_location.end, UnknownPosition)
Exemplo n.º 3
0
    def test_start_before_end(self):
        expected = "must be greater than or equal to start location"
        with self.assertRaises(ValueError) as err:
            FeatureLocation(42, 23, 1)
        self.assertIn(expected, str(err.exception))

        with self.assertRaises(ValueError) as err:
            FeatureLocation(42, 0, 1)
        self.assertIn(expected, str(err.exception))

        with self.assertRaises(ValueError) as err:
            FeatureLocation(BeforePosition(42), AfterPosition(23), -1)
        self.assertIn(expected, str(err.exception))

        with self.assertRaises(ValueError) as err:
            FeatureLocation(42, AfterPosition(0), 1)
        self.assertIn(expected, str(err.exception))

        # Features with UnknownPositions should pass check
        FeatureLocation(42, UnknownPosition())
        FeatureLocation(UnknownPosition(), 42)

        # Same start and end should pass check
        FeatureLocation(42, 42)
Exemplo n.º 4
0
def _read_ft(record, line):
    name = line[5:13].rstrip()
    if name:
        if line[13:21] == "        ":  # new-style FT line
            location = line[21:80].rstrip()
            try:
                isoform_id, location = location.split(":")
            except ValueError:
                isoform_id = None
            try:
                from_res, to_res = location.split("..")
            except ValueError:
                from_res = location
                to_res = ""
            qualifiers = {}
        else:  # old-style FT line
            from_res = line[14:20].lstrip()
            to_res = line[21:27].lstrip()
            isoform_id = None
            description = line[34:75].rstrip()
            qualifiers = {"description": description}
        if from_res == "?":
            from_res = UnknownPosition()
        elif from_res.startswith("?"):
            position = int(from_res[1:]) - 1  # Python zero-based counting
            from_res = UncertainPosition(position)
        elif from_res.startswith("<"):
            position = int(from_res[1:]) - 1  # Python zero-based counting
            from_res = BeforePosition(position)
        else:
            position = int(from_res) - 1  # Python zero-based counting
            from_res = ExactPosition(position)
        if to_res == "":
            position = from_res + 1
            to_res = ExactPosition(position)
        elif to_res == "?":
            to_res = UnknownPosition()
        elif to_res.startswith("?"):
            position = int(to_res[1:])
            to_res = UncertainPosition(position)
        elif to_res.startswith(">"):
            position = int(to_res[1:])
            to_res = AfterPosition(position)
        else:
            position = int(to_res)
            to_res = ExactPosition(position)
        location = FeatureLocation(from_res, to_res, ref=isoform_id)
        feature = FeatureTable(
            location=location, type=name, id=None, qualifiers=qualifiers
        )
        record.features.append(feature)
        return
    # this line is a continuation of the previous feature
    feature = record.features[-1]
    if line[5:34] == "                             ":  # old-style FT line
        description = line[34:75].rstrip()
        if description.startswith("/FTId="):
            # store the FTId as the feature ID
            feature.id = description[6:].rstrip(".")
            return
        # this line is a continuation of the description of the previous feature
        old_description = feature.qualifiers["description"]
        if old_description.endswith("-"):
            description = "%s%s" % (old_description, description)
        else:
            description = "%s %s" % (old_description, description)

        if feature.type in ("VARSPLIC", "VAR_SEQ"):  # special case
            # Remove unwanted spaces in sequences.
            # During line carryover, the sequences in VARSPLIC/VAR_SEQ can get
            # mangled with unwanted spaces like:
            # 'DISSTKLQALPSHGLESIQT -> PCRATGWSPFRRSSPC LPTH'
            # We want to check for this case and correct it as it happens.
            try:
                first_seq, second_seq = description.split(" -> ")
            except ValueError:
                pass
            else:
                extra_info = ""
                # we might have more information at the end of the
                # second sequence, which should be in parenthesis
                extra_info_pos = second_seq.find(" (")
                if extra_info_pos != -1:
                    extra_info = second_seq[extra_info_pos:]
                    second_seq = second_seq[:extra_info_pos]
                # now clean spaces out of the first and second string
                first_seq = first_seq.replace(" ", "")
                second_seq = second_seq.replace(" ", "")
                # reassemble the description
                description = first_seq + " -> " + second_seq + extra_info
        feature.qualifiers["description"] = description
    else:  # new-style FT line
        value = line[21:].rstrip()
        if value.startswith("/id="):
            qualifier_type = "id"
            value = value[4:]
            assert value.startswith('"')
            assert value.endswith('"')
            feature.id = value[1:-1]
            return
        elif value.startswith("/evidence="):
            value = value[10:]
            assert value.startswith('"')
            if value.endswith('"'):
                value = value[1:-1]
            else:  # continues on the next line
                value = value[1:]
            assert "evidence" not in feature.qualifiers
            feature.qualifiers["evidence"] = value
            return
        elif value.startswith("/note="):
            value = value[6:]
            assert value.startswith('"')
            if value.endswith('"'):
                value = value[1:-1]
            else:  # continues on the next line
                value = value[1:]
            assert "note" not in feature.qualifiers
            feature.qualifiers["note"] = value
            return
        # this line is a continuation of the description of the previous feature
        keys = list(feature.qualifiers.keys())
        key = keys[-1]
        description = value.rstrip('"')
        old_description = feature.qualifiers[key]
        if key == "evidence" or old_description.endswith("-"):
            description = "%s%s" % (old_description, description)
        else:
            description = "%s %s" % (old_description, description)
        if feature.type == "VAR_SEQ":  # see VARSPLIC above
            try:
                first_seq, second_seq = description.split(" -> ")
            except ValueError:
                pass
            else:
                extra_info = ""
                # we might have more information at the end of the
                # second sequence, which should be in parenthesis
                extra_info_pos = second_seq.find(" (")
                if extra_info_pos != -1:
                    extra_info = second_seq[extra_info_pos:]
                    second_seq = second_seq[:extra_info_pos]
                # now clean spaces out of the first and second string
                first_seq = first_seq.replace(" ", "")
                second_seq = second_seq.replace(" ", "")
                # reassemble the description
                description = first_seq + " -> " + second_seq + extra_info
        feature.qualifiers[key] = description