def parse_position(string: str): """ Converts a positiong from a string into a Position subclass """ if string[0] == '<': return BeforePosition(int(string[1:])) if string[0] == '>': return AfterPosition(int(string[1:])) if string == "UnknownPosition()": return UnknownPosition() return ExactPosition(int(string))
def test_unknown_position(self): location = FeatureLocation(ExactPosition(1), UnknownPosition(), strand=1) new_location = self.convert(location) assert isinstance(new_location.start, ExactPosition) assert new_location.start == 1 assert isinstance(new_location.end, UnknownPosition)
def test_start_before_end(self): expected = "must be greater than or equal to start location" with self.assertRaises(ValueError) as err: FeatureLocation(42, 23, 1) self.assertIn(expected, str(err.exception)) with self.assertRaises(ValueError) as err: FeatureLocation(42, 0, 1) self.assertIn(expected, str(err.exception)) with self.assertRaises(ValueError) as err: FeatureLocation(BeforePosition(42), AfterPosition(23), -1) self.assertIn(expected, str(err.exception)) with self.assertRaises(ValueError) as err: FeatureLocation(42, AfterPosition(0), 1) self.assertIn(expected, str(err.exception)) # Features with UnknownPositions should pass check FeatureLocation(42, UnknownPosition()) FeatureLocation(UnknownPosition(), 42) # Same start and end should pass check FeatureLocation(42, 42)
def _read_ft(record, line): name = line[5:13].rstrip() if name: if line[13:21] == " ": # new-style FT line location = line[21:80].rstrip() try: isoform_id, location = location.split(":") except ValueError: isoform_id = None try: from_res, to_res = location.split("..") except ValueError: from_res = location to_res = "" qualifiers = {} else: # old-style FT line from_res = line[14:20].lstrip() to_res = line[21:27].lstrip() isoform_id = None description = line[34:75].rstrip() qualifiers = {"description": description} if from_res == "?": from_res = UnknownPosition() elif from_res.startswith("?"): position = int(from_res[1:]) - 1 # Python zero-based counting from_res = UncertainPosition(position) elif from_res.startswith("<"): position = int(from_res[1:]) - 1 # Python zero-based counting from_res = BeforePosition(position) else: position = int(from_res) - 1 # Python zero-based counting from_res = ExactPosition(position) if to_res == "": position = from_res + 1 to_res = ExactPosition(position) elif to_res == "?": to_res = UnknownPosition() elif to_res.startswith("?"): position = int(to_res[1:]) to_res = UncertainPosition(position) elif to_res.startswith(">"): position = int(to_res[1:]) to_res = AfterPosition(position) else: position = int(to_res) to_res = ExactPosition(position) location = FeatureLocation(from_res, to_res, ref=isoform_id) feature = FeatureTable( location=location, type=name, id=None, qualifiers=qualifiers ) record.features.append(feature) return # this line is a continuation of the previous feature feature = record.features[-1] if line[5:34] == " ": # old-style FT line description = line[34:75].rstrip() if description.startswith("/FTId="): # store the FTId as the feature ID feature.id = description[6:].rstrip(".") return # this line is a continuation of the description of the previous feature old_description = feature.qualifiers["description"] if old_description.endswith("-"): description = "%s%s" % (old_description, description) else: description = "%s %s" % (old_description, description) if feature.type in ("VARSPLIC", "VAR_SEQ"): # special case # Remove unwanted spaces in sequences. # During line carryover, the sequences in VARSPLIC/VAR_SEQ can get # mangled with unwanted spaces like: # 'DISSTKLQALPSHGLESIQT -> PCRATGWSPFRRSSPC LPTH' # We want to check for this case and correct it as it happens. try: first_seq, second_seq = description.split(" -> ") except ValueError: pass else: extra_info = "" # we might have more information at the end of the # second sequence, which should be in parenthesis extra_info_pos = second_seq.find(" (") if extra_info_pos != -1: extra_info = second_seq[extra_info_pos:] second_seq = second_seq[:extra_info_pos] # now clean spaces out of the first and second string first_seq = first_seq.replace(" ", "") second_seq = second_seq.replace(" ", "") # reassemble the description description = first_seq + " -> " + second_seq + extra_info feature.qualifiers["description"] = description else: # new-style FT line value = line[21:].rstrip() if value.startswith("/id="): qualifier_type = "id" value = value[4:] assert value.startswith('"') assert value.endswith('"') feature.id = value[1:-1] return elif value.startswith("/evidence="): value = value[10:] assert value.startswith('"') if value.endswith('"'): value = value[1:-1] else: # continues on the next line value = value[1:] assert "evidence" not in feature.qualifiers feature.qualifiers["evidence"] = value return elif value.startswith("/note="): value = value[6:] assert value.startswith('"') if value.endswith('"'): value = value[1:-1] else: # continues on the next line value = value[1:] assert "note" not in feature.qualifiers feature.qualifiers["note"] = value return # this line is a continuation of the description of the previous feature keys = list(feature.qualifiers.keys()) key = keys[-1] description = value.rstrip('"') old_description = feature.qualifiers[key] if key == "evidence" or old_description.endswith("-"): description = "%s%s" % (old_description, description) else: description = "%s %s" % (old_description, description) if feature.type == "VAR_SEQ": # see VARSPLIC above try: first_seq, second_seq = description.split(" -> ") except ValueError: pass else: extra_info = "" # we might have more information at the end of the # second sequence, which should be in parenthesis extra_info_pos = second_seq.find(" (") if extra_info_pos != -1: extra_info = second_seq[extra_info_pos:] second_seq = second_seq[:extra_info_pos] # now clean spaces out of the first and second string first_seq = first_seq.replace(" ", "") second_seq = second_seq.replace(" ", "") # reassemble the description description = first_seq + " -> " + second_seq + extra_info feature.qualifiers[key] = description