Пример #1
0
        def anno(line):
            line = line.rstrip().split('\t')

            chr, pos, _, ref, alt = line[:5]

            line[-3] += ':UDP'  # format
            line[-2] += ':'  # gdna
            line[-1] += ':'  # cfdna

            for i, sam in enumerate((o.cfdna, o.gdna)):
                if sam != None:
                    reads = get_reads(o, sam, chr, pos)
                    unique_pairs, unique_single, *_ = aggregate_reads(
                        o, reads, None if o.fast else pad_softclip(sam))
                    mor, mnr, msr, oor, onr, osr, moa, mna, msa, ooa, ona, osa, _ = count_different_type(
                        o, unique_pairs, unique_single, alt, ref)
                    if o.simple:
                        line[-i - 1] += ','.join(
                            map(str, (moa, mna + msa, ooa, ona + osa)))
                    else:
                        line[-i - 1] += ','.join(
                            map(str, (mor, mnr, msr, oor, onr, osr, moa, mna,
                                      msa, ooa, ona, osa)))

            print(file=fout, sep='\t', *line)
Пример #2
0
def test_aggregate_reads_2():
    "it should aggregate singles"

    o = Namespace(verbos=False, qual=20)

    reads = (("r1", 'A', 60, 2, 11, 0, 0, False, False),
             ("r2", 'C', 60, 2, 11, 0, 0, False, False), ("r3", 'C', 60, 2, 11,
                                                          0, 0, True, False))

    _, unique_single, *_ = aggregate_reads(o, reads)

    assert len(unique_single) == 2
Пример #3
0
def test_aggregate_reads_1():
    "it should aggregate pairs"

    o = Namespace(verbos=False, qual=20)

    reads = (("r1", 'A', 60, 2, 11, 4, 11, False, True), ("r1", 'A', 60, 4, 13,
                                                          2, -11, False, True),
             ("r2", 'C', 60, 2, 11, 4, 11, False, True), ("r2", 'C', 60, 4, 13,
                                                          2, -11, False, True))

    unique_pairs, *_ = aggregate_reads(o, reads)

    assert len(unique_pairs) == 1
Пример #4
0
def test_aggregate_reads_4():
    "it should ignore when base in overlap area inconsistent between two reads"

    o = Namespace(verbos=False, qual=20)

    reads = (("r1", 'A', 60, 2, 11, 4, 11, False, True), ("r1", 'C', 60, 4, 13,
                                                          2, -11, False, True),
             ("r2", 'C', 60, 3, 12, 5, 11, False, True), ("r2", 'C', 60, 5, 14,
                                                          3, -11, False, True))

    unique_pairs, unique_single, *_, ninconsis = aggregate_reads(o, reads)

    assert len(unique_pairs) == 1
    assert ninconsis == 2
Пример #5
0
def test_aggregate_reads_3():
    "it should ignore when 3+ reads share the same name"

    o = Namespace(verbos=False, qual=20)

    reads = (("r1", 'A', 60, 2, 11, 2, 9, False, True), ("r1", 'C', 60, 2, 11,
                                                         2, -9, False, True),
             ("r1", 'C', 60, 2, 11, 2, 9, True, True), ("r2", 'C', 60, 2, 11,
                                                        0, 0, True, False))

    unique_pairs, unique_single, _, nerror, *_ = aggregate_reads(o, reads)

    assert len(unique_pairs) == 0
    assert len(unique_single) == 1
    assert nerror == 3
Пример #6
0
def test_aggregate_reads_5():
    "it should drop reads that has too much mismatch"

    o = Namespace(verbos=False, qual=20, mismatch_limit=2)

    reads = (("r1", 'C', 60, 2, 11, 1, 4, 11, False,
              True), ("r1", 'C', 60, 4, 13, 1, 2, -11, True,
                      True), ("r2", 'C', 60, 3, 12, 3, 5, 11, False, True),
             ("r2", 'C', 60, 5, 14, 1, 3, -11, True,
              True), ("r3", 'C', 60, 6, 14, 3, 0, 0, True,
                      False), ("r4", 'C', 60, 7, 14, 1, 0, 0, True, False))

    unique_pairs, unique_single, *_, nlowq, ninconsis = aggregate_reads(
        o, reads)

    assert len(unique_pairs) == 1
    assert len(unique_single) == 1
    assert nlowq == 3