def test_tag_not_present_returns_default(self): tags = {"de": Tag.from_str("de:f:0.1")} record = PafRecord(tags=tags) tag = "NM" default = Tag.from_str("NM:i:0") actual = record.get_tag(tag, default=default) expected = default assert actual == expected
def test_with_tags(self): tags = {"NM": Tag.from_str("NM:i:1"), "ms": Tag.from_str("ms:i:1906")} record = PafRecord(tags=tags) actual = str(record) expected = (DELIM.join( str(x) for x in PafRecord._field_defaults.values() if x is not None) + DELIM + DELIM.join(tags)) assert actual == expected
def test_tag_present(self): expected = Tag.from_str("de:f:0.1") tags = {"de": expected} record = PafRecord(tags=tags) tag = "de" actual = record.get_tag(tag) assert actual == expected
def test_tag_with_string_value_parsed(self): tag = "cg" tag_type = "Z" value = "97M1I13M" string = ":".join([tag, tag_type, value]) actual = Tag.from_str(string) expected = Tag(tag, tag_type, value) assert actual == expected
def test_tag_with_non_letter_char_value_parsed(self): tag = "tg" tag_type = "A" value = "*" string = ":".join([tag, tag_type, value]) actual = Tag.from_str(string) expected = Tag(tag, tag_type, value) assert actual == expected
def test_tag_with_inf_float_value_parsed(self): tag = "de" tag_type = "f" value = "inf" string = ":".join([tag, tag_type, str(value)]) actual = Tag.from_str(string) expected = Tag(tag, tag_type, float(value)) assert actual == expected
def test_tag_with_int_value_parsed(self): tag = "NM" tag_type = "i" value = 50 string = ":".join([tag, tag_type, str(value)]) actual = Tag.from_str(string) expected = Tag(tag, tag_type, value) assert actual == expected
def test_line_with_tags(self): fields = [ "query_name", "1239", "65", "1239", "-", "target_name", "4378340", "2555250", "2556472", "1139", "1228", "60", "NM:i:89", "ms:i:1906", ] line = "\t".join(fields) actual = PafRecord.from_str(line) tags = {"NM": Tag.from_str("NM:i:89"), "ms": Tag.from_str("ms:i:1906")} expected = PafRecord( "query_name", 1239, 65, 1239, Strand.Reverse, "target_name", 4378340, 2555250, 2556472, 1139, 1228, 60, tags, ) assert actual == expected
def from_str(line: str) -> "PafRecord": """Construct a `PafRecord` from a string. > *Note: If there are duplicate SAM-like tags, only the last one will be retained.* ## Example ```py from pafpy import PafRecord line = "query_name\t123\t65\t123\t+\ttname\t43783\t25552\t25564\t1139\t1228\t60" record = PafRecord.from_str(line) assert record.qname == "query_name" assert record.mapq == 60 ``` ## Errors - If there are less than the expected number of fields (12), this function will raise a `MalformattedRecord` exception. - If there is an invalid tag, an `pafpy.tag.InvalidTagFormat` exception will be raised. """ fields = line.rstrip().split(DELIM) if len(fields) < MIN_FIELDS: raise MalformattedRecord( f"Expected {MIN_FIELDS} fields, but got {len(fields)}\n{line}") tags: Tags = dict() for tag_str in fields[12:]: tag = Tag.from_str(tag_str) tags[tag.tag] = tag return PafRecord( qname=fields[0], qlen=int(fields[1]), qstart=int(fields[2]), qend=int(fields[3]), strand=Strand(fields[4]), tname=fields[5], tlen=int(fields[6]), tstart=int(fields[7]), tend=int(fields[8]), mlen=int(fields[9]), blen=int(fields[10]), mapq=int(fields[11]), tags=tags or None, )
def test_mapped_record(self): record = PafRecord( qname="05f868dc-6760-47ec-b7e7-ab4054b0e4fe", qlen=4641, qstart=5, qend=4640, strand=Strand.Reverse, tname="NODE_1_length_4378477_cov_60.093643", tlen=4378340, tstart=1069649, tend=1074329, mlen=4499, blen=4740, mapq=60, tags={"tp": Tag.from_str("tp:A:P")}, ) assert not record.is_unmapped()
def test_line_with_dupliacte_tag_returns_last_one(self): fields = [ "query_name", "1239", "65", "1239", "-", "target_name", "4378340", "2555250", "2556472", "1139", "1228", "60", "NM:i:89", "NM:i:2", ] line = "\t".join(fields) actual = PafRecord.from_str(line) expected_tags = {"NM": Tag.from_str("NM:i:2")} expected = PafRecord( "query_name", 1239, 65, 1239, Strand.Reverse, "target_name", 4378340, 2555250, 2556472, 1139, 1228, 60, expected_tags, ) assert actual == expected
def test_unknown_char_raises_error(self): tag = Tag.from_str("tp:A:?") with pytest.raises(ValueError): PafRecord(strand=Strand.Forward, tags={ tag.tag: tag }).is_inversion()
def test_primary_record_returns_true(self): tag = Tag.from_str("tp:A:P") record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag}) assert record.is_primary()
def test_tag_type_too_long_raises_error(self): string = "NM:ii:5" with pytest.raises(InvalidTagFormat): Tag.from_str(string)
def test_unknown_tag_type_raises_error(self): string = "NM:x:5" with pytest.raises(InvalidTagFormat): Tag.from_str(string)
def test_tag_missing_field(self): string = "NM:i:" with pytest.raises(InvalidTagFormat): Tag.from_str(string)
def test_invalid_string_raises_error(self): string = "foo" with pytest.raises(InvalidTagFormat): Tag.from_str(string)
def test_lower_case_secondary_returns_true(self): tag = Tag.from_str("tp:A:s") record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag}) assert record.is_secondary()
def test_lower_case_inversion_returns_true(self): tag = Tag.from_str("tp:A:i") record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag}) assert record.is_inversion()
def test_primary_record_returns_false(self): tag = Tag.from_str("tp:A:P") record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag}) assert not record.is_inversion()