Пример #1
0
def dprimecalc(record1, record2, snpcheck = True):
    '''Calculates Lewontin's D' statistic (D/Dmax) between two VCF records.
    Will check that records are biallelic (single-ALT) SNPs unless snpcheck = False.
    '''
    if snpcheck:
        snpchecker(record1, record2)
    elif not snpcheck:
        pass
    values = freqsgetter(record1, record2, snpcheck = False)[1] # get allele frequencies
    d = dcalc(record1, record2, snpcheck = False)
    if d >= 0:
        dmax = min(values['p1'] * values['q2'], values['p2'] * values['q1'])
        if dmax == 0:
            out = 0
        else:
            out = d/dmax
    elif d < 0:
        dmin = max(-1 * values['p1'] * values['q1'], -1 * values['p2'] * values['q2'])
        if dmin == 0:
            out = 0
        else:
            out = d/dmin
    elif round(d, 6) == 0:
        out = 0
    return out
Пример #2
0
 def quick_dcalc(record1, record2):
     haps = freqsgetter(record1, record2, snpcheck = False)[2]
     try:
         LHS = haps['AB'] * haps['ab']
     except KeyError: # either hap missing
         LHS = 0
     try:
         RHS = haps['Ab'] * haps['aB']
     except KeyError:
         RHS = 0
     d = LHS - RHS
     # d = round(d, 5)
     return d
Пример #3
0
def freqscalc(record1, record2, snpcheck = True, aaf = False):
    '''Exploratory convenience function. Given two VCF records, returns 
    observed haplotype frequencies. Will check that records are biallelic (single-ALT) SNPs 
    unless snpcheck = False. aaf will return AF values hardcoded in the VCF
    itself, while aaf = False (default) will make freqscalc calculate them instead
    (more accurate option).
    '''
    if snpcheck:
        snpchecker(record1, record2)
    elif not snpcheck:
        pass
    # get allele frequencies
    if aaf == True:
        p = 1 - record1.aaf[0]
        q = 1 - record2.aaf[0]
        p2 = record1.aaf[0]
        q2 = record2.aaf[0]
    elif aaf == False:
        values = freqsgetter(record1, record2)[1]
        p = values['p1']
        q = values['q1']
        p2 = values['p2']
        q2 = values['q2']
    print(record1.CHROM, record1.POS, '- ref', record1.REF, 'alt', record1.ALT[0])
    print(record2.CHROM, record2.POS, '- ref', record2.REF, 'alt', record2.ALT[0])
    print('p1 ', p, 'p2 ', p2)
    print('q1 ', q, 'q2 ', q2)
    # get samples + check for same samples b/w both records
    strainlist = straingetter(record1, record2)   
    # score haplotypes
    haplist = []
    for strain in strainlist:
        gt1 = record1.genotype(strain)['GT']
        gt2 = record2.genotype(strain)['GT']
        if gt1 == '.' or gt2 == '.':
            continue
        if gt1 == '1' and gt2 == '1':
            outgt = str(record1.ALT[0]) + str(record2.ALT[0]) 
        elif gt1 == '1' and gt2 == '0':
            outgt = str(record1.ALT[0]) + record2.REF
        elif gt1 == '0' and gt2 == '1':
            outgt = record1.REF + str(record2.ALT[0])
        elif gt1 == '0' and gt2 == '0':
            outgt = record1.REF + record2.REF
        haplist.append(outgt) # create list of observed genotypes
    uniques = set(haplist)
    for hap in uniques:
        print(hap, round(haplist.count(hap)/len(haplist), 5))            
Пример #4
0
def r2calc(record1, record2, snpcheck = True):
    '''Calculates r^2 (correlation) between two VCF records.
    Will check that records are biallelic (single-ALT) SNPs unless snpcheck = False.
    '''
    if snpcheck:
        snpchecker(record1, record2)
    elif not snpcheck:
        pass
    values = freqsgetter(record1, record2, snpcheck = False)[1]
    if values['p1'] == 0 or values['q1'] == 0 or values['p2'] == 0 or values['q2'] == 0:
        out = 0
    else:
        dsquared = dcalc(record1, record2, snpcheck = False)**2
        out = dsquared/(values['p1'] * values['q1'] * values['p2'] * values['q2'])
        # out = round(out, 4)
    return out
Пример #5
0
def dcalc(record1, record2, snpcheck = True):
    '''Calculates D statistic between two VCF records.
    Will check that records are biallelic (single-ALT) SNPs unless snpcheck = False.
    '''
    if snpcheck:
        snpchecker(record1, record2)
    elif not snpcheck:
        pass    
    haps = freqsgetter(record1, record2, snpcheck = False)[2]
    try:
        LHS = haps['AB'] * haps['ab']
    except KeyError: # either hap missing
        LHS = 0
    try:
        RHS = haps['Ab'] * haps['aB']
    except KeyError:
        RHS = 0
    d = LHS - RHS
    # d = round(d, 5)
    return d