Exemplo n.º 1
0
def test_writer(yrb_writer, capsys):
    yrb_writer = kevlar.vcf.VCFWriter(sys.stdout, source='py.test')
    yrb_writer.register_sample('NA19238')
    yrb_writer.register_sample('NA19239')
    yrb_writer.register_sample('NA19240')
    yrb_writer.describe_format('GT', 'String', '1', 'Genotype')
    yrb_writer.write_header()

    v = Variant('1', 12345, 'G', 'C')
    v.annotate('PART', '42')
    v.annotate('CONTIG', 'A' * 100)
    v.format('NA19238', 'GT', '0/0')
    v.format('NA19239', 'GT', '0/0')
    v.format('NA19240', 'GT', '0/1')
    v.format('NA19238', 'ALTABUND', '12,9,8')
    v.format('NA19239', 'ALTABUND', '0,0,0')
    v.format('NA19240', 'ALTABUND', '0,0,0')
    yrb_writer.write(v)

    out, err = capsys.readouterr()
    print(out)

    outlines = out.strip().split('\n')
    fmtlines = [l for l in outlines if l.startswith('##FORMAT')]
    assert len(fmtlines) == 2
    gtfmt = '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">'
    assert gtfmt in fmtlines

    varlines = [l for l in outlines if not l.startswith('#')]
    assert len(varlines) == 1
    values = varlines[0].split('\t')
    assert len(values) == 12
    assert values[8:12] == [
        'ALTABUND:GT', '12,9,8:0/0', '0,0,0:0/0', '0,0,0:0/1'
    ]
Exemplo n.º 2
0
def test_writer_bad_fmt(yrb_writer):
    v = Variant('1', 12345, 'G', 'C')
    v.annotate('PART', '42')
    v.annotate('CONTIG', 'A' * 100)
    v.format('NA19238', 'GT', '0/0')
    v.format('NA19240', 'GT', '0/1')
    v.format('NA19239', 'ALTABUND', '0,0,0')
    v.format('NA19240', 'ALTABUND', '0,0,0')
    errormsg = r'samples not annotated with the same FORMAT fields'
    with pytest.raises(kevlar.vcf.VariantAnnotationError, match=errormsg):
        yrb_writer.write(v)
Exemplo n.º 3
0
def test_info():
    """Test handling of "info" field attributes.

    This tests the mechanics of the .annotate() and .attribute() API, and the
    FormattedList class underpinning it.
    """
    values = FormattedList()
    assert str(values) == '.'
    values.append(42)
    assert str(values) == '42'
    values.append(1776)
    assert str(values) == '42,1776'
    values.append('B0gU$')
    with pytest.raises(kevlar.vcf.KevlarMixedDataTypeError):
        str(values)

    v = Variant('1', 12345, 'G', 'C')
    assert v.attribute('VW') is None

    v.annotate('VW', 'AGTNNNNNNNNNNNNNNNNNNNNNTGA')
    assert v.attribute('VW') == 'AGTNNNNNNNNNNNNNNNNNNNNNTGA'

    v.annotate('VW', 'GATTACA')
    assert v.attribute('VW') == 'GATTACA'
    assert v.attribute('VW', pair=True) == 'VW=GATTACA'

    v.annotate('VW', 'ATGCCCTAG', replace=False)
    assert v.info['VW'] == ['GATTACA', 'ATGCCCTAG']
    assert v.attribute('VW') == ['GATTACA', 'ATGCCCTAG']
    assert v.attribute('VW', string=True) == 'GATTACA,ATGCCCTAG'
    assert v.attribute('VW', pair=True) == 'VW=GATTACA,ATGCCCTAG'

    v.annotate('VW', 'AAAAAAAAA', replace=False)
    assert v.attribute('VW') == ['GATTACA', 'ATGCCCTAG', 'AAAAAAAAA']
    assert v.attribute('VW', pair=True) == 'VW=GATTACA,ATGCCCTAG,AAAAAAAAA'

    v.annotate('DROPPED', 3)
    assert v.attribute('DROPPED') == 3
    assert v.attribute('DROPPED', string=True) == '3'

    v.annotate('DROPPED', 31, replace=False)
    assert v.attribute('DROPPED') == [3, 31]
    assert v.attribute('DROPPED', string=True) == '3,31'
    assert v.attribute('DROPPED', pair=True) == 'DROPPED=3,31'

    v.annotate('MATEDIST', 432.1234, replace=False)
    v.annotate('MATEDIST', 8765.4321, replace=False)
    assert v.attribute('MATEDIST', string=True) == '432.123,8765.432'

    v.annotate('LLIH', -436.0111857750478)
    assert v.attribute('LLIH', pair=True) == 'LLIH=-436.011'