Exemplo n.º 1
0
def test_header_contig_header_line():
    line1 = header.ContigHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "1"), ("length", 234)]))
    line2 = header.ContigHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "1"), ("length", 234)]))
    line3 = header.ContigHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "2"), ("length", 123)]))
    assert line1 == line2
    assert line1 != line3
    if sys.version_info < (3, 6):
        assert str(line1) == (
            "ContigHeaderLine('contig', '<ID=1,length=234>', OrderedDict([('ID', '1'), ('length', 234)]))"
        )
        assert repr(line1) == (
            "ContigHeaderLine('contig', '<ID=1,length=234>', OrderedDict([('ID', '1'), ('length', 234)]))"
        )
    else:
        assert str(line1) == (
            "ContigHeaderLine('contig', '<ID=1,length=234>', {'ID': '1', 'length': 234})"
        )
        assert repr(line1) == (
            "ContigHeaderLine('contig', '<ID=1,length=234>', {'ID': '1', 'length': 234})"
        )
    assert line1.value == "<ID=1,length=234>"
    assert line1.serialize() == "##contig=<ID=1,length=234>"
    with pytest.raises(TypeError):
        hash(line1)
Exemplo n.º 2
0
def test_header_info_header_line():
    line1 = header.InfoHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "SVTYPE"), ("Number", 1),
                           ("Type", "String")]))
    line2 = header.InfoHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "SVTYPE"), ("Number", 1),
                           ("Type", "String")]))
    line3 = header.InfoHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "END"), ("Number", 1), ("Type", "Integer")]))
    assert line1 == line2
    assert line1 != line3
    if sys.version_info < (3, 6):
        assert str(line1) == (
            "InfoHeaderLine('INFO', '<ID=SVTYPE,Number=1,Type=String>', "
            "OrderedDict([('ID', 'SVTYPE'), ('Number', 1), ('Type', 'String')]))"
        )
        assert repr(line1) == (
            "InfoHeaderLine('INFO', '<ID=SVTYPE,Number=1,Type=String>', "
            "OrderedDict([('ID', 'SVTYPE'), ('Number', 1), ('Type', 'String')]))"
        )
    else:
        assert str(line1) == (
            "InfoHeaderLine('INFO', '<ID=SVTYPE,Number=1,Type=String>', "
            "{'ID': 'SVTYPE', 'Number': 1, 'Type': 'String'})")
        assert repr(line1) == (
            "InfoHeaderLine('INFO', '<ID=SVTYPE,Number=1,Type=String>', "
            "{'ID': 'SVTYPE', 'Number': 1, 'Type': 'String'})")
    assert line1.value == "<ID=SVTYPE,Number=1,Type=String>"
    assert line1.serialize() == "##INFO=<ID=SVTYPE,Number=1,Type=String>"
    with pytest.raises(TypeError):
        hash(line1)
Exemplo n.º 3
0
def test_header_format_header_line():
    line1 = header.FormatHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "AD"), ("Number", "R"),
                           ("Type", "Integer")]))
    line2 = header.FormatHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "AD"), ("Number", "R"),
                           ("Type", "Integer")]))
    line3 = header.FormatHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "DP"), ("Number", 1), ("Type", "Integer")]))
    assert line1 == line2
    assert line1 != line3
    if sys.version_info < (3, 6):
        assert str(line1) == (
            "FormatHeaderLine('FORMAT', '<ID=AD,Number=R,Type=Integer>', "
            "OrderedDict([('ID', 'AD'), ('Number', 'R'), ('Type', 'Integer')]))"
        )
        assert repr(line1) == (
            "FormatHeaderLine('FORMAT', '<ID=AD,Number=R,Type=Integer>', "
            "OrderedDict([('ID', 'AD'), ('Number', 'R'), ('Type', 'Integer')]))"
        )
    else:
        assert str(line1) == (
            "FormatHeaderLine('FORMAT', '<ID=AD,Number=R,Type=Integer>', "
            "{'ID': 'AD', 'Number': 'R', 'Type': 'Integer'})")
        assert repr(line1) == (
            "FormatHeaderLine('FORMAT', '<ID=AD,Number=R,Type=Integer>', "
            "{'ID': 'AD', 'Number': 'R', 'Type': 'Integer'})")
    assert line1.value == "<ID=AD,Number=R,Type=Integer>"
    assert line1.serialize() == "##FORMAT=<ID=AD,Number=R,Type=Integer>"
    with pytest.raises(TypeError):
        hash(line1)
Exemplo n.º 4
0
def test_header_sample_header_line():
    line1 = header.SampleHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "sample1")]))
    line2 = header.SampleHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "sample1")]))
    line3 = header.SampleHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "sample2")]))
    assert line1 == line2
    assert line1 != line3
    if sys.version_info < (3, 6):
        assert str(line1) == (
            "SampleHeaderLine('SAMPLE', '<ID=sample1>', OrderedDict([('ID', 'sample1')]))"
        )
        assert repr(line1) == (
            "SampleHeaderLine('SAMPLE', '<ID=sample1>', OrderedDict([('ID', 'sample1')]))"
        )
    else:
        assert str(line1) == (
            "SampleHeaderLine('SAMPLE', '<ID=sample1>', {'ID': 'sample1'})")
        assert repr(line1) == (
            "SampleHeaderLine('SAMPLE', '<ID=sample1>', {'ID': 'sample1'})")
    assert line1.value == "<ID=sample1>"
    assert line1.serialize() == "##SAMPLE=<ID=sample1>"
    with pytest.raises(TypeError):
        hash(line1)
Exemplo n.º 5
0
def test_header_pedigree_header_line():
    line1 = header.PedigreeHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "child"), ("Father", "father")]))
    line2 = header.PedigreeHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "child"), ("Father", "father")]))
    line3 = header.PedigreeHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "father")]))
    assert line1 == line2
    assert line1 != line3
    if sys.version_info < (3, 6):
        assert str(line1) == (
            "PedigreeHeaderLine('PEDIGREE', '<ID=child,Father=father>', "
            "OrderedDict([('ID', 'child'), ('Father', 'father')]))")
        assert repr(line1) == (
            "PedigreeHeaderLine('PEDIGREE', '<ID=child,Father=father>', "
            "OrderedDict([('ID', 'child'), ('Father', 'father')]))")
    else:
        assert str(line1) == (
            "PedigreeHeaderLine('PEDIGREE', '<ID=child,Father=father>', {'ID': 'child', 'Father': 'father'})"
        )
        assert repr(line1) == (
            "PedigreeHeaderLine('PEDIGREE', '<ID=child,Father=father>', {'ID': 'child', 'Father': 'father'})"
        )
    assert line1.value == "<ID=child,Father=father>"
    assert line1.serialize() == "##PEDIGREE=<ID=child,Father=father>"
    with pytest.raises(TypeError):
        hash(line1)
Exemplo n.º 6
0
def test_header_filter_header_line():
    line1 = header.FilterHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "PASS"),
                           ("Description", "All filters passed")]))
    line2 = header.FilterHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "PASS"),
                           ("Description", "All filters passed")]))
    line3 = header.FilterHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "q30"), ("Description", "Phred score <30")]))
    assert line1 == line2
    assert line1 != line3
    if sys.version_info < (3, 6):
        assert str(line1) == (
            "FilterHeaderLine('FILTER', '<ID=PASS,Description=\"All filters passed\">', "
            "OrderedDict([('ID', 'PASS'), ('Description', 'All filters passed')]))"
        )
        assert repr(line1) == (
            "FilterHeaderLine('FILTER', '<ID=PASS,Description=\"All filters passed\">', "
            "OrderedDict([('ID', 'PASS'), ('Description', 'All filters passed')]))"
        )
    else:
        assert str(line1) == (
            "FilterHeaderLine('FILTER', '<ID=PASS,Description=\"All filters passed\">', "
            "{'ID': 'PASS', 'Description': 'All filters passed'})")
        assert repr(line1) == (
            "FilterHeaderLine('FILTER', '<ID=PASS,Description=\"All filters passed\">', "
            "{'ID': 'PASS', 'Description': 'All filters passed'})")
    assert line1.value == '<ID=PASS,Description="All filters passed">'
    assert line1.serialize(
    ) == '##FILTER=<ID=PASS,Description="All filters passed">'
    with pytest.raises(TypeError):
        hash(line1)
Exemplo n.º 7
0
def test_header_alt_allele_header_line():
    line1 = header.AltAlleleHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "DEL"), ("Description", "deletion")]))
    line2 = header.AltAlleleHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "DEL"), ("Description", "deletion")]))
    line3 = header.AltAlleleHeaderLine.from_mapping(
        vcfpy.OrderedDict([("ID", "DUP"), ("Description", "duplication")]))
    assert line1 == line2
    assert line1 != line3
    if sys.version_info < (3, 6):
        assert str(line1) == (
            """AltAlleleHeaderLine('ALT', '<ID=DEL,Description="deletion">', """
            """OrderedDict([('ID', 'DEL'), ('Description', 'deletion')]))""")
        assert repr(line1) == (
            """AltAlleleHeaderLine('ALT', '<ID=DEL,Description="deletion">', """
            """OrderedDict([('ID', 'DEL'), ('Description', 'deletion')]))""")
    else:
        assert str(line1) == (
            "AltAlleleHeaderLine('ALT', '<ID=DEL,Description=\"deletion\">', "
            "{'ID': 'DEL', 'Description': 'deletion'})")
        assert repr(line1) == (
            "AltAlleleHeaderLine('ALT', '<ID=DEL,Description=\"deletion\">', "
            "{'ID': 'DEL', 'Description': 'deletion'})")
    assert line1.value == '<ID=DEL,Description="deletion">'
    assert line1.serialize() == '##ALT=<ID=DEL,Description="deletion">'
    with pytest.raises(TypeError):
        hash(line1)
Exemplo n.º 8
0
def test_header_without_lines():
    lines = [
        header.HeaderLine("foo", "bar"),
        header.HeaderLine("foo2", "bar2")
    ]
    samples = header.SamplesInfos(["one", "two", "three"])
    hdr = header.Header(lines, samples)
    hdr.add_filter_line(vcfpy.OrderedDict([("ID", "PASS")]))
    hdr.add_filter_line(vcfpy.OrderedDict([("ID", "q30")]))
    assert len(hdr.lines) == 4

    hdr2 = header.header_without_lines(hdr, [("foo", "bar"),
                                             ("FILTER", "q30")])
    assert len(hdr2.lines) == 2
    assert hdr2.samples == hdr.samples
Exemplo n.º 9
0
def test_add_contig_line_shortcut(vcf_header):
    # check header before adding
    assert len(vcf_header.lines) == 18
    assert "20a" not in vcf_header._indices["contig"]

    # add header line
    mapping = vcfpy.OrderedDict([
        ("ID", "20a"),
        ("length", 62435964),
        ("assembly", "B36"),
        ("md5", "f126cdf8a6e0c7f379d618ff66beb2da"),
        ("species", "H**o sapiens"),
        ("taxonomy", "x"),
    ])
    vcf_header.add_contig_line(mapping)

    # check header after adding
    assert len(vcf_header.lines) == 19
    assert "20a" in vcf_header._indices["contig"]
    assert vcf_header._indices["contig"]["20a"] is vcf_header.lines[-1]

    # Check resulting added header line
    assert vcf_header.lines[-1].key == "contig"
    VALUE = ("<ID=20a,length=62435964,assembly=B36,"
             "md5=f126cdf8a6e0c7f379d618ff66beb2da,"
             'species="H**o sapiens",taxonomy=x>')
    assert vcf_header.lines[-1].value == VALUE
    assert len(vcf_header.lines[-1].mapping) == 6
    assert vcf_header.lines[-1].mapping["ID"] == "20a"
    assert vcf_header.lines[-1].mapping["length"] == 62435964
    assert vcf_header.lines[-1].mapping["assembly"] == "B36"
    assert vcf_header.lines[-1].mapping[
        "md5"] == "f126cdf8a6e0c7f379d618ff66beb2da"
    assert vcf_header.lines[-1].mapping["species"] == "H**o sapiens"
    assert vcf_header.lines[-1].mapping["taxonomy"] == "x"
Exemplo n.º 10
0
def test_add_info_line_shortcut(vcf_header):
    # check header before adding
    assert len(vcf_header.lines) == 18

    # add header line
    VALUE = '<ID=DPa,Number=1,Type=Integer,Description="Total Depth">'
    mapping = vcfpy.OrderedDict([("ID", "DPa"), ("Number", 1),
                                 ("Type", "Integer"),
                                 ("Description", "Total Depth")])
    vcf_header.add_info_line(mapping)
    assert len(vcf_header.lines) == 19

    # check header after adding
    assert len(vcf_header.lines) == 19
    assert "DPa" in vcf_header._indices["INFO"]
    assert vcf_header._indices["INFO"]["DPa"] is vcf_header.lines[-1]

    # Check resulting added header line
    assert vcf_header.lines[-1].key == "INFO"
    assert vcf_header.lines[-1].value == VALUE
    assert len(vcf_header.lines[-1].mapping) == 4
    assert vcf_header.lines[-1].mapping["ID"] == "DPa"
    assert vcf_header.lines[-1].mapping["Number"] == 1
    assert vcf_header.lines[-1].mapping["Type"] == "Integer"
    assert vcf_header.lines[-1].mapping["Description"] == "Total Depth"
Exemplo n.º 11
0
def test_add_format_line(vcf_header):
    # check header before adding
    assert len(vcf_header.lines) == 18

    # add header line
    VALUE = '<ID=GTa,Number=1,Type=String,Description="Genotype">'
    line = header.FormatHeaderLine(
        "FORMAT",
        VALUE,
        vcfpy.OrderedDict([("ID", "GTa"), ("Number", 1), ("Type", "String"),
                           ("Description", "Genotype")]),
    )
    vcf_header.add_line(line)

    # check header after adding
    assert len(vcf_header.lines) == 19
    assert "GTa" in vcf_header._indices["FORMAT"]
    assert vcf_header._indices["FORMAT"]["GTa"] is vcf_header.lines[-1]

    # Check resulting added header line
    assert vcf_header.lines[-1].key == "FORMAT"
    assert vcf_header.lines[-1].value == VALUE
    assert len(vcf_header.lines[-1].mapping) == 4
    assert vcf_header.lines[-1].mapping["ID"] == "GTa"
    assert vcf_header.lines[-1].mapping["Number"] == 1
    assert vcf_header.lines[-1].mapping["Type"] == "String"
    assert vcf_header.lines[-1].mapping["Description"] == "Genotype"
Exemplo n.º 12
0
def test_header_has_header_line_positive():
    lines = [
        header.FormatHeaderLine.from_mapping(
            vcfpy.OrderedDict([("ID", "DP"), ("Number", "R"),
                               ("Type", "Integer")])),
        header.InfoHeaderLine.from_mapping(
            vcfpy.OrderedDict([("ID", "AD"), ("Number", "R"),
                               ("Type", "Integer")])),
        header.FilterHeaderLine.from_mapping(
            vcfpy.OrderedDict([("ID", "PASS"),
                               ("Description", "All filters passed")])),
        header.ContigHeaderLine.from_mapping(
            vcfpy.OrderedDict([("ID", "1"), ("length", 234)])),
    ]
    samples = header.SamplesInfos(["one", "two", "three"])
    hdr = header.Header(lines, samples)

    assert hdr.has_header_line("FORMAT", "DP")
    assert hdr.has_header_line("INFO", "AD")
    assert hdr.has_header_line("FILTER", "PASS")
    assert hdr.has_header_line("contig", "1")
Exemplo n.º 13
0
def build_rec(calls=None, format_extras=None):
    calls = calls or []
    format_extras = format_extras or []
    alt1 = record.Substitution(vcfpy.SNV, "T")
    alt2 = record.Substitution(vcfpy.SNV, "A")
    return record.Record(
        "2",
        100,
        [],
        "C",
        [alt1, alt2],
        None,
        [],
        vcfpy.OrderedDict(),
        ["GT"] + format_extras,
        calls,
    )
Exemplo n.º 14
0
def test_add_filter_line_shortcut(vcf_header):
    # check header before adding
    assert len(vcf_header.lines) == 18

    # add header line
    mapping = vcfpy.OrderedDict([("ID", "q10a"),
                                 ("Description", "Quality below 10")])
    vcf_header.add_filter_line(mapping)

    # check header after adding
    assert len(vcf_header.lines) == 19
    assert "q10a" in vcf_header._indices["FILTER"]
    assert vcf_header._indices["FILTER"]["q10a"] is vcf_header.lines[-1]

    # Check resulting added header line
    assert vcf_header.lines[-1].key == "FILTER"
    VALUE = '<ID=q10a,Description="Quality below 10">'
    assert vcf_header.lines[-1].value == VALUE
    assert len(vcf_header.lines[-1].mapping) == 2
    assert vcf_header.lines[-1].mapping["ID"] == "q10a"
    assert vcf_header.lines[-1].mapping["Description"] == "Quality below 10"
Exemplo n.º 15
0
def test_gt_phase_char_slash():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "0/1")]))
    assert call.gt_phase_char == "/"
Exemplo n.º 16
0
def test_gt_phase_char_pipe():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "0|1")]))
    assert call.gt_phase_char == "|"
Exemplo n.º 17
0
def test_is_phased_false():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "0/1")]))
    assert call.is_phased is False
Exemplo n.º 18
0
def test_is_phased_mixed():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "0/1|2")]))
    assert call.is_phased is True
Exemplo n.º 19
0
def test_gt_type_filtered_pass():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "1/1"), ("FT", ["PASS"])]))
    assert not call.is_filtered()
Exemplo n.º 20
0
def test_gt_type_filtered_no_ft():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "1/1")]))
    assert not call.is_filtered()
Exemplo n.º 21
0
def test_is_variant_hom_alt():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "1/1")]))
    assert call.is_variant
Exemplo n.º 22
0
def test_is_variant_hom_ref():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "0/0")]))
    assert not call.is_variant
Exemplo n.º 23
0
def test_gt_type_het():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "0|1")]))
    assert call.gt_type == vcfpy.HET
Exemplo n.º 24
0
def test_gt_bases_1_1():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "1|1")]))
    build_rec([call])
    assert call.gt_bases == ("T", "T")
Exemplo n.º 25
0
def test_gt_bases_0_2():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "0|2")]))
    build_rec([call])
    assert call.gt_bases == ("C", "A")
Exemplo n.º 26
0
def test_gt_type_hom_alt():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "1/1")]))
    assert call.gt_type == vcfpy.HOM_ALT
Exemplo n.º 27
0
def test_gt_type_hom_ref():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "0/0")]))
    assert call.gt_type == vcfpy.HOM_REF
Exemplo n.º 28
0
def test_is_het_het():
    call = record.Call("sample", vcfpy.OrderedDict([("GT", "0|1")]))
    assert call.is_het