Exemplo n.º 1
0
def call_cn_var(cnvtag, var_alt, var_ref, alt_forward, alt_reverse, var_list,
                var_db):
    """
    Call CN for variant sites in non-homology regions.
    Use different minimum read cutoffs for clean variant sites and other sites.
    Total CN at each site is also considered during filtering.
    """
    total_cn = get_total_cn_per_site(cnvtag, var_db, var_list)
    assert total_cn is not None
    cn_prob = []

    for i, forward in enumerate(alt_forward):
        reverse = alt_reverse[i]
        total_ref = var_ref[i]
        total_var = var_alt[i]
        if total_var > 0 and var_list[i] in NOISY_VAR:
            ntotal = forward + reverse
            oddsratio, pvalue = fisher_exact([[forward, reverse],
                                              [ntotal / 2, ntotal / 2]])
            if pvalue < P_CUTOFF or forward <= 1 or reverse <= 1:
                total_var = 0

        if var_list[i] in CLEAN_VAR:
            cn_prob.append(call_reg1_cn(total_cn[i], total_var, total_ref, 2))
        elif var_list[i] in NOISY_VAR:
            cn_prob.append(call_reg1_cn(total_cn[i], total_var, total_ref, 7))
        else:
            cn_prob.append(call_reg1_cn(total_cn[i], total_var, total_ref, 4))
    cn_call = process_raw_call_denovo(cn_prob, 0.8, 0.65, total_cn)
    return cn_call
Exemplo n.º 2
0
def call_exon9gc(d6_count, d7_count, full_length_cn):
    """
    Call exon 9 conversion
    """
    lsnp1 = d6_count
    lsnp2 = d7_count

    if full_length_cn is not None:
        full_length_cn = int(full_length_cn)
        d6_values = []
        cn_prob = []
        for i, count1 in enumerate(lsnp1):
            count2 = lsnp2[i]
            d6_values.append(full_length_cn * count1 / (count1 + count2))
            cn_prob.append(call_reg1_cn(full_length_cn, count1, count2, 3))
        cn_prob_processed_stringent = process_raw_call_gc(cn_prob, 0.88)

        cn_calls = list(
            set([a for a in cn_prob_processed_stringent if a is not None]))
        if len(cn_calls) == 1:
            count1 = np.mean(d6_count)
            count2 = np.mean(d7_count)
            ave_call = process_raw_call_gc(
                [call_reg1_cn(full_length_cn, count1, count2, 3)], 0.75)
            if ave_call[0] is not None and ave_call[0] == cn_calls[0]:
                if ave_call[0] == 1:
                    if min(d6_values) < 1.2 and max(d6_values) < 1.3:
                        return ave_call[0]
                else:
                    return ave_call[0]

    return None
Exemplo n.º 3
0
def call_cn_var(cnvtag, lsnp1, lsnp2, var_list, var_db):
    """
    Call CN for variant sites in non-homology regions.
    Use different minimum read cutoffs for clean variant sites and other sites.
    Total CN at each site is also considered during filtering.
    """
    total_cn = get_total_cn_per_site(cnvtag, var_db, var_list)
    assert total_cn is not None
    cn_prob = []
    for i, count1 in enumerate(lsnp1):
        count2 = lsnp2[i]
        if var_list[i] not in CLEAN_VAR:
            cn_prob.append(call_reg1_cn(total_cn[i], count1, count2, 4))
        else:
            cn_prob.append(call_reg1_cn(total_cn[i], count1, count2, 2))
    cn_call = process_raw_call_denovo(cn_prob, 0.8, 0.65, total_cn)
    return cn_call
Exemplo n.º 4
0
def call_cn_snp(total_cn, lsnp1, lsnp2, threshold=0.6):
    """
    Call CN for SNP sites between CYP2D6 and CYP2D7.
    Use a loose cutoff as this is for CNV/hybrid group calling.
    """
    cn_prob = []
    for i, count1 in enumerate(lsnp1):
        count2 = lsnp2[i]
        cn_prob.append(call_reg1_cn(total_cn, count1, count2))
    cn_call = process_raw_call_gc(cn_prob, threshold)
    return cn_call
Exemplo n.º 5
0
def call_cn_var_homo(total_cn, lsnp1, lsnp2):
    """
    Call CN for variant sites in homology regions.
    """
    cn_prob = []
    for i, count1 in enumerate(lsnp1):
        count2 = lsnp2[i]
        cn_prob.append(call_reg1_cn(total_cn, count1, count2, 4))
    cn_call = []
    for site_call in process_raw_call_denovo(cn_prob, 0.8, 0.65):
        if site_call is None:
            cn_call.append(None)
        else:
            cn_call.append(min(site_call, total_cn - 2))
    return cn_call
Exemplo n.º 6
0
def call_exon9gc(d6_count, d7_count, full_length_cn):
    """
    Call exon 9 conversion
    """
    lsnp1 = [d6_count]
    lsnp2 = [d7_count]

    if full_length_cn is not None:
        full_length_cn = int(full_length_cn)
        cn_prob = []
        for i, count1 in enumerate(lsnp1):
            count2 = lsnp2[i]
            cn_prob.append(call_reg1_cn(full_length_cn, count1, count2, 3))
        cn_prob_processed_stringent = process_raw_call_gc(cn_prob, 0.9)

    return cn_prob_processed_stringent[0]
Exemplo n.º 7
0
def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth):
    """Return the copy nubmer call of SMN1, SMN2 and SMNstar."""
    smn1_fraction = get_fraction(lsnp1, lsnp2)
    smn_call = namedtuple(
        'smn_call', 'SMN1 SMN2 SMNstar isCarrier isSMA \
        SMN1_CN_raw Info Confidence g27134TG_raw g27134TG_CN')
    raw_cn_call = update_full_length_cn(raw_cn_call)
    full_length_cn = raw_cn_call.exon78_cn

    if full_length_cn is None:
        # No-call for full-length CN
        tag = 'FLCNnoCall'
        full_length_cn = raw_cn_call.exon78_depth
        raw_smn1_cn = get_raw_smn1_cn(full_length_cn, smn1_fraction)
        # In cases where full length copy number is no-call,
        # Test for zero copy of SMN1 at the splice variant site.
        # If true, report range for SMN2 CN
        sma_likelihood_ratio = smn1_cn_zero(lsnp1[SPLICE_INDEX],
                                            lsnp2[SPLICE_INDEX], mdepth)
        if sma_likelihood_ratio > 1 / SMA_CUTOFF:
            cn_smn2 = '%i-%i' % (math.floor(full_length_cn),
                                 math.ceil(full_length_cn))
            dout = smn_call(0, cn_smn2, None, False, True, raw_smn1_cn, tag,
                            [None] * TOTAL_NUM_SITES, None, None)
        elif sma_likelihood_ratio < SMA_CUTOFF:
            dout = smn_call(None, None, None, None, False, raw_smn1_cn, tag,
                            [None] * TOTAL_NUM_SITES, None, None)
        else:
            dout = smn_call(None, None, None, None, None, raw_smn1_cn, tag,
                            [None] * TOTAL_NUM_SITES, None, None)

    else:
        full_length_cn = int(full_length_cn)
        raw_smn1_cn = get_raw_smn1_cn(full_length_cn, smn1_fraction)

        # Most likely SMN1 CN (or the best two if posterior probability is low)
        # at each site.
        cn_prob = []
        for i in range(TOTAL_NUM_SITES):
            cn_prob.append(call_reg1_cn(full_length_cn, lsnp1[i], lsnp2[i]))
        # Combine all 6 sites and make a call.
        combined_call = call_reg1_cn(
            full_length_cn, sum([lsnp1[a] for a in SELECTED_SITES_INDEX]),
            sum([lsnp2[a] for a in SELECTED_SITES_INDEX]))

        tag, cn_smn1, lsitecall_loose = get_smn1_call_and_tag(
            cn_prob, combined_call)
        sma_likelihood_ratio = smn1_cn_zero(lsnp1[SPLICE_INDEX],
                                            lsnp2[SPLICE_INDEX], mdepth)
        is_sma = get_sma_status(lsitecall_loose, cn_prob, cn_smn1,
                                sma_likelihood_ratio)
        is_carrier = get_carrier_status(lsitecall_loose, cn_prob, cn_smn1,
                                        sma_likelihood_ratio)

        # targeted variant(s)
        var_cn_confident = None
        raw_var_cn = None
        var_fraction = get_fraction(var_alt, var_ref)
        raw_var_cn = get_raw_smn1_cn(full_length_cn, var_fraction)[0]
        var_cn = [call_reg1_cn(full_length_cn, var_alt[0], var_ref[0])]
        var_cn_filtered = process_raw_call_denovo(var_cn,
                                                  POSTERIOR_CUTOFF_MEDIUM,
                                                  POSTERIOR_CUTOFF_LOOSE,
                                                  keep_none=False)
        if var_cn_filtered != []:
            var_cn_confident = var_cn_filtered[0]
        if var_cn_confident is not None and cn_smn1 is not None \
                and cn_smn1 < var_cn_confident:
            var_cn_confident = cn_smn1

        # Call CN for SMN2 and SMN*
        cn_smn2 = None
        cn_smnstar = None
        if raw_cn_call.exon16_cn is not None:
            cn_smnstar = int(raw_cn_call.exon16_cn) - full_length_cn
            if cn_smnstar < 0:
                raise Exception(
                    'Total SMN CN is smaller than full-length SMN CN.')
        if cn_smn1 is not None:
            cn_smn2 = full_length_cn - cn_smn1

        dout = smn_call(cn_smn1, cn_smn2, cn_smnstar, is_carrier, is_sma,
                        raw_smn1_cn, tag, cn_prob, raw_var_cn,
                        var_cn_confident)

    return dout