Пример #1
0
def test_count_different_type_3():
    "allow inconsis"

    o = Namespace(verbos=False, allow_inconsist=True)

    pair = {
        ('ref', 2, True): [('A', 60), ('T', 60), ('T', 60)],
        ('ref', 3, False): [('A', 60), ('T', 60), ('T', 60)]
    }

    mor, mnr, msr, oor, onr, osr, moa, mna, msa, ooa, ona, osa, inconsis = count_different_type(
        o, pair, {}, 'T', 'A')

    assert mor == 0
    assert mnr == 0
    assert msr == 0
    assert oor == 1
    assert onr == 1
    assert osr == 0
    assert moa == 1
    assert mna == 1
    assert msa == 0
    assert ooa == 0
    assert ona == 0
    assert osa == 0
    assert inconsis == 0
Пример #2
0
        def anno(line):
            line = line.rstrip().split('\t')

            chr, pos, _, ref, alt = line[:5]

            line[-3] += ':UDP'  # format
            line[-2] += ':'  # gdna
            line[-1] += ':'  # cfdna

            for i, sam in enumerate((o.cfdna, o.gdna)):
                if sam != None:
                    reads = get_reads(o, sam, chr, pos)
                    unique_pairs, unique_single, *_ = aggregate_reads(
                        o, reads, None if o.fast else pad_softclip(sam))
                    mor, mnr, msr, oor, onr, osr, moa, mna, msa, ooa, ona, osa, _ = count_different_type(
                        o, unique_pairs, unique_single, alt, ref)
                    if o.simple:
                        line[-i - 1] += ','.join(
                            map(str, (moa, mna + msa, ooa, ona + osa)))
                    else:
                        line[-i - 1] += ','.join(
                            map(str, (mor, mnr, msr, oor, onr, osr, moa, mna,
                                      msa, ooa, ona, osa)))

            print(file=fout, sep='\t', *line)
Пример #3
0
def test_count_different_type_1():
    "it should NOT count dna that more than 10% reads have different bases"

    o = Namespace(verbos=False)

    pair = {
        ('ref', 2, True): [('A', 60), ('T', 60), ('T', 60)],
        ('ref', 3, False): [('A', 60), ('T', 60), ('T', 60)]
    }

    mor, mnr, msr, oor, onr, osr, moa, mna, msa, ooa, ona, osa, inconsis = count_different_type(
        o, pair, {}, 'T', 'A')

    assert inconsis == 2
    assert sum(
        (mor, mnr, msr, oor, onr, osr, moa, mna, msa, ooa, ona, osa)) == 0
Пример #4
0
def test_count_different_type_2():
    "basic non-trival test case"

    o = Namespace(verbos=False)

    pair = {
        (2, 5, True): [('A', 60), ('A', 60)],
        (3, 5, False): [('T', 60), ('T', 60), ('T', 60)],
        (4, 5, False): [
            ('T', 60),
        ]
    }

    single = {
        (2, 5, True): [('T', 60), ('A', 60)],
        (3, 5, False): [('T', 60), ('T', 60), ('T', 60)],
        (4, 5, False): [
            ('A', 60),
        ]
    }

    mor, mnr, msr, oor, onr, osr, moa, mna, msa, ooa, ona, osa, inconsis = count_different_type(
        o, pair, single, 'T', 'A')

    assert mor == 1
    assert mnr == 0
    assert msr == 0
    assert oor == 0
    assert onr == 0
    assert osr == 1
    assert moa == 0
    assert mna == 1
    assert msa == 1
    assert ooa == 0
    assert ona == 1
    assert osa == 0
    assert inconsis == 1
Пример #5
0
def output(poss, info, o, fout):
    if o.verbos:
        print("continus", poss)
    if len(poss) > 2:
        for i in poss:
            print(info[i])
        return
    sep1 = info[poss[0]].split('\t')
    sep2 = info[poss[1]].split('\t')
    ref = sep1[3] + sep2[3]
    alt = sep1[4] + sep2[4]

    fetch = o.cfdna.fetch(sep1[0], poss[0], poss[1] + 1)
    reads = []
    for read in fetch:
        reads.append(read)

    new_reads = get_infor(o, reads, poss, ref)
    mut_set, name_dict, unique_pairs, unique_single = snv_mut(
        new_reads, o, ref, alt, None)
    if len(mut_set) == 0:
        if o.verbos:
            print('continus fail ', poss)
        for i in poss:
            print(info[i])
        return

    sep1[-3] += ':UDP'
    sep1[3] = ref
    sep1[4] = alt
    sep1[2] = int(sep1[1]) + 1

    if o.verbos:
        print("test\t", mut_set, sep1[0], poss[0], ref, sep1[3], sep1[4], alt,
              len(name_dict), len(unique_pairs), len(unique_single))

    mor, mnr, msr, oor, onr, osr, moa, mna, msa, ooa, ona, osa, _ = count_different_type(
        o, unique_pairs, unique_single, alt, ref)
    (nq10_a, nterminal_a, nmulti_a, noverlap_pe_a, noverlap_se_a, numi_a, nCN_1_a, nCN_2_a, nNonterminalMolecule_a, \
        ave_mapqual_a, ave_insertSize_a) = extra_info(o, name_dict, ref, alt)
    extra_1 = ','.join(
        map(str, (nq10_a, nterminal_a, nmulti_a, noverlap_pe_a, noverlap_se_a,
                  numi_a, nCN_1_a, nCN_2_a, nNonterminalMolecule_a,
                  ave_mapqual_a, ave_insertSize_a)))
    sep1[-1] += ':' + ','.join(
        map(str, (mor, mnr, msr, oor, onr, osr, moa, mna, msa, ooa, ona, osa)))

    fetch = o.gdna.fetch(sep1[0], poss[0], poss[1] + 1)
    reads = []
    for read in fetch:
        reads.append(read)

    new_reads = get_infor(o, reads, poss, ref)
    mut_set, name_dict, unique_pairs, unique_single = snv_mut(
        new_reads, o, ref, alt, None)
    mor, mnr, msr, oor, onr, osr, moa, mna, msa, ooa, ona, osa, _ = count_different_type(
        o, unique_pairs, unique_single, alt, ref)
    (nq10_a, nterminal_a, nmulti_a, noverlap_pe_a, noverlap_se_a, numi_a, nCN_1_a, nCN_2_a, nNonterminalMolecule_a, \
        ave_mapqual_a, ave_insertSize_a) = extra_info(o, name_dict, ref, alt)
    extra_2 = ','.join(map(str, (nq10_a, nterminal_a, nmulti_a, noverlap_pe_a, noverlap_se_a, numi_a, nCN_1_a, nCN_2_a, \
     nNonterminalMolecule_a, ave_mapqual_a, ave_insertSize_a)))
    sep1[-2] += ':' + ','.join(
        map(str, (mor, mnr, msr, oor, onr, osr, moa, mna, msa, ooa, ona, osa)))

    sep1.append(extra_2)
    sep1.append(extra_1)
    print(file=fout, sep="\t", *sep1)
Пример #6
0
def output(chr, pos, alt, variation_info, o, fout, reads_pos, k, rep_ref):
    tmp = ''.join((chr, str(pos), alt))
    if k == 0:
        variation_info[tmp][-3] += ':UDP'
    line = deepcopy(variation_info[tmp])

    ref = variation_info[tmp][3]

    repeat = repeat_area(rep_ref, chr, pos + 1, o)
    pos_set = [pos]
    # to corporate with continous.py
    new_reads = get_infor(o, reads_pos, pos_set, ref)
    mut_set, name_dict, unique_pairs, unique_single = snv_mut(
        new_reads, o, ref, alt, None if o.fast else pad_softclip(sam))

    for each_mut in mut_set:
        tmp = ''.join((chr, str(pos), each_mut))
        if each_mut != alt:
            # MNV(multiple nucleotide variation)
            if k == 0:
                # novel mutation in gdna
                variation_info[tmp] = deepcopy(line)
                variation_info[tmp][4] = each_mut

            else:
                if tmp not in variation_info:
                    # cfdna owing novel mutation while gnda not
                    # gdna owing novel muatation while cfdna not won't be output
                    variation_info[tmp] = deepcopy(line)
                    #This is  copy from gdna-produced result
                    variation_info[tmp][4] = each_mut

                    try:
                        gdna_alt = variation_info[tmp][-3].split(
                            ':')[-1].split(',')
                        for i in range(-6, 0):
                            gdna_alt[i] = '0'
                        variation_info[tmp][-3] = ':'.join(
                            (variation_info[tmp][-3].split(':')[:-1]
                             )) + ":" + ','.join(gdna_alt)
                    except:
                        raise Exception(gdna_alt, i, variation_info[tmp])

                    #extra_2 = repeat + ',' +  ','.join(map(str, [0] * 9))
                    extra_2 = ','.join(map(str, [0] * 9))
                    variation_info[tmp][-1] = extra_2

        mor, mnr, msr, oor, onr, osr, moa, mna, msa, ooa, ona, osa, _ = count_different_type(
            o, unique_pairs, unique_single, each_mut, ref)
        (nq10_a, nterminal_a, nmulti_a, noverlap_pe_a, noverlap_se_a, numi_a,
         nCN_1_a, nCN_2_a, nNonterminalMolecule_a, ave_mapqual_a,
         ave_insertSize_a) = extra_info(o, name_dict, ref, each_mut)
        variation_info[tmp][-2] += ':' + ','.join(
            map(str,
                (mor, mnr, msr, oor, onr, osr, moa, mna, msa, ooa, ona, osa)))
        extra_2 = ','.join(
            map(str,
                (nq10_a, nterminal_a, nmulti_a, noverlap_pe_a, noverlap_se_a,
                 numi_a, nCN_1_a, nCN_2_a, nNonterminalMolecule_a,
                 ave_mapqual_a, ave_insertSize_a)))
        variation_info[tmp].append(extra_2)

        if k == 1:
            print(file=fout, sep='\t', *variation_info[tmp])
            fout.flush()
            del variation_info[tmp]

    return variation_info