Exemplo n.º 1
0
def identify_zerops(region_list, zero_list):
    z_region_list = list()
    for zero in zero_list:
        # print('Current ZTup being considered:')
        # print zero
        for region in region_list:
            ##            print('Current Region being considered:')
            ##            print region
            if str(zero.chro) == str(region.chro) and pc_toolbox.in_interval(
                float(zero.pos), float(region.start), float(region.end)
            ):
                if region not in z_region_list:
                    z_region_list.append(region)
                    print (
                        """
***********************************************************
    
        NOTE: REMOVING Region {0} from first list due to:
            {1} at chr{2}:{3} with p=0.
        
***********************************************************
""".format(
                            region.ID, zero.lz, zero.chro, zero.pos
                        )
                    )
                    sys.stdout.flush()
                    continue
                # region_list.remove(region)
    sys.stdout.flush()
    return region_list, z_region_list
Exemplo n.º 2
0
def read_meta(meta_loc, region, outfolder,build, annot_dict=None):
    line1 = True
    standardizer = 1e6
    reg_chr = region.chro
    reg_start = int(float(region.start) *standardizer)
    reg_end = int(float(region.end) * standardizer)
    sym = region.sym
    band = region.band
    outfile = os.path.join(outfolder,band+'.tbl')
    out = open(outfile, mode = "w")
    title_list = ['imchip_name','rs_name','chr','{0}_pos'.format(build),'p-value','z-score','weight','a1','a2']
    out.write('\t'.join(title_list)+'\n')
    with open(meta_loc, mode="r") as meta:
        for line in meta:
            if line1:
                meta_indices = pc_toolbox.read_meta_titles(line)
                line1 = False
            else:
                line_list = line.strip().split()
                snp_pos = int(line_list[meta_indices['pos']])
                weight = line_list[meta_indices['weight']]
                if int(reg_chr) == int(line_list[meta_indices['chr']]
                                       ) and pc_toolbox.in_interval(snp_pos, reg_start, reg_end):
                    if float(weight)>WEIGHT_MIN:
                        p_val = line_list[meta_indices['p']]
                        lz_snp = line_list[meta_indices['snp']]
                        z_score = line_list[meta_indices['z']]
                        a1 = line_list[meta_indices['a1']]
                        a2 = line_list[meta_indices['a2']]
                        snp_rs = annot_dict[lz_snp].rs
                        snp_im = annot_dict[lz_snp].name
                        out.write('\t'.join([snp_im,snp_rs,reg_chr,str(snp_pos),p_val,z_score,weight,a1,a2])+'\n')
    out.close()
def snp_in_interval(region_tuple, line_list, index_dict):
    reg_chr,reg_start,reg_end = region_tuple
    snp_pos = int(line_list[index_dict['pos']])
    if not int(reg_chr) == int(line_list[index_dict['chr']]):
        return False
    elif pc_toolbox.in_interval(snp_pos, reg_start, reg_end):
        return True
    else:
        return False
Exemplo n.º 4
0
def read_assoc(assoc_loc, gene_name,gene_region_dict):
    line1 = True
    
    old_p = 1
    old_out=[]
    reg_chr, reg_start, reg_end = gene_region_dict[gene_name]
    assoc_counter = 0
    multi_counter = 1
    sig_list = []
    blank_dict = {'chr':'--','start':'--','end':'--','sym':'--','snp':'--',
                  'lz':'--','pval':'--','stat':'--','OR':'NA','aa1':'NA',
                  'pos':'--','notes':'--','maf':'NA','ma1':'NA','ma2':'NA'}
    order_list = ['chr','start','end','sym','snp','lz','pval','stat','OR',
                  'aa1','maf','ma1','ma2','notes']
    with open(assoc_loc, mode="r") as assoc:
        for line in assoc:
            
            if line1:
                  assoc_indices = read_assoc_titles(line)
                  assoc_counter = 1
                  line1 = False
            else:
                line_list = line.strip().split()
                snp_pos = int(line_list[assoc_indices['pos']])
                if not line_list[assoc_indices['p']] == 'NA':
                    if reg_chr == int(line_list[assoc_indices['chr']]) and pc_toolbox.in_interval(snp_pos,
                                                                                reg_start, reg_end):
                        cur_p = float(line_list[assoc_indices['p']])
                        cur_snp , lz_snp = pc_toolbox.correct_snp(assoc_indices,line_list)
##                        cur_snp = line_list[assoc_indices['snp']]
##                        lz_snp = '--'
##                        if not cur_snp.startswith('rs'):
##                            lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos)
##                        else:
##                            lz_snp = cur_snp
                        cur_out = [str(reg_chr), str(reg_start), str(reg_end),gene_name,
                                   cur_snp,lz_snp, str(cur_p), str(snp_pos),'--']
                        if cur_p == old_p:
                            multi_counter = multi_counter + 1
                            cur_tup = (cur_snp, lz_snp,str(cur_p))
                            old_tup = (old_out[4], old_out[5],str(old_p))
                            sig_list.append(cur_tup)
                            if old_tup not in sig_list:
                                sig_list.append(old_tup)
                            print('NOTE: Both {0} and {1} have p-values of {2}.'.format(old_out[4],cur_snp, cur_p))
                            print('There are no Z-scores with which to decide between them, so I arbitrarily retain {0}.'.format(old_out[4]))
                            old_out[8]='{0}SNPs(p={1})'.format(multi_counter, old_p)
                            print(old_out[8])
                        elif cur_p < old_p:
                            multi_counter = 1
                            old_p = cur_p
                            old_out = cur_out
                            sig_list = []
                    assoc_counter = assoc_counter + 1
    
    return old_out, sig_list
Exemplo n.º 5
0
def filter_by_region(region, file_in, file_out, index_dict):
    line1 = True
    with open(file_in, mode="r") as ifiley:
        with open(file_out, mode="w") as ofiley:
            for line in ifiley:
                line_split = line.strip().split()
                if line1:
                    ofiley.write('\t'.join(line_split)+'\n')
                    line1=False
                else:
                    bp = line_split[index_dict['pos']]
                    chro = line_split[index_dict['chr']]
                    if int(region.chro) == int(chro) and pc_toolbox.in_interval(int(bp),region.start,region.end):
                        ofiley.write('\t'.join(line_split)+'\n')
def read_table(table_loc, region_list, assoc, annot_dict):
    #print("Now entering read_table.")
    line1 = True
    standardizer = 1e6
    snp_list = []
    with open(table_loc, mode="r") as table:
        for line in table:
            if line1:
                if assoc:
                    table_indices = pc_toolbox.read_assoc_titles(line, .95, 'logistic')
                else:
                    table_indices = pc_toolbox.read_meta_titles(line)
                line1=False
            else:
                line_list = line.strip().split()
                snp_pos = int(line_list[table_indices['pos']])
                chro = int(line_list[table_indices['chr']])
                p_str = line_list[table_indices['p']]
                #print line_list
                p_ok = True
                if not pc_toolbox.is_number(p_str):
                    p_ok = False
                elif float(p_str)>P_BOUND:
                    p_ok=False
                in_reg = False
                for region in region_list:
                    reg_chr = int(region.chro)
                    reg_start = int(float(region.start) *standardizer)
                    reg_end = int(float(region.end) * standardizer)
##                    print("Now checking region:")
##                    print region
                    if reg_chr == chro and pc_toolbox.in_interval(snp_pos, reg_start, reg_end):
                        in_reg = True
                if p_ok and not in_reg:
                    snp_info = store_line(line, table_indices, assoc,annot_dict)
                    snp_list.append(snp_info)
    print snp_list
    sys.stdout.flush()
    tuple_list = sorted(snp_list, key=attrgetter('chro', 'p'))
    #snp_list.sort(key= lambda info : info.chro)
    print snp_list
    print tuple_list
    sys.stdout.flush()
    return tuple_list
Exemplo n.º 7
0
def remove_zerops(region_list,zero_list):
    for zero in zero_list:
##        print('Current ZTup being considered:')
##        print zero
        for region in region_list:
##            print('Current region being considered:')
##            print region
            if float(zero.chro) == float(region.chro) and pc_toolbox.in_interval(float(zero.pos),
                                                                                 float(region.start),
                                                                                 float(region.end)):
                print('''
***********************************************************
    
        NOTE: REMOVING Region {0} from list due to:
            {1} at chr{2}:{3} with p=0.
        
***********************************************************
'''.format(region.band,zero.lz,zero.chro,zero.pos))
                sys.stdout.flush()
                region_list.remove(region)
                
    return region_list
Exemplo n.º 8
0
def read_table(table_loc, gene, table_type, annot_dict=None):
    global weight_min, sex_weight
##    print("Weight Min is:")
##    print weight_min
    line1 = True
    standardizer = 1e6
##    if position_form == 'mb':
##        standardizer = 1e6
##    elif position_form == 'kb':
##        standardizer = 1e3
    
    old_p = 1
    old_out={}
    old_abs_z = 0
    reg_chr = gene.chro
    reg_start = int(float(gene.start) *standardizer)
    reg_end = int(float(gene.end) * standardizer)
    sym = gene.sym
    #band = gene.band
    ID = gene.ID
    table_counter = 0
    multi_counter = 1
    zero_list = []
    low_w_p = 1
    with open(table_loc, mode="r") as table:
        for line in table:
            if line1:
                if table_type == 'META':
                    index_dict = pc_toolbox.read_meta_titles(line)
                if table_type == 'FAMILY':
                    index_dict = pc_toolbox.read_fam_titles(line)
                table_counter = 1
                line1 = False
                old_out = {'chr':'','ID':'','start':'',
                           'end':'','sym':'','snp':'','im':'',
                           'lz':'','p':'1','pos':'',
                           '|z|':'0','z':'0','note':'--',
                           'weight':'','maf':'','meta_a1':'',
                           'meta_a2':'', 'checkme':''}
                lw_blank = {'chr':'','ID':'','start':'',
                            'end':'','sym':'','snp':None,'im':None,
                            'lz':None,'p':'1','pos':'',
                            '|z|':'0','z':'0','note':'--',
                            'weight':'','maf':'','meta_a1':'',
                            'meta_a2':''}
                low_weight_out = lw_blank
                  #,'checkme':'0'}
            else:
                line_list = line.strip().split()
                snp_pos = int(line_list[index_dict['pos']])
                if int(reg_chr) == int(line_list[index_dict['chr']]
                                       ) and pc_toolbox.in_interval(snp_pos, reg_start, reg_end):
                    
                    old_p = float(old_out['p'])
                    cur_p = float(line_list[index_dict['p']])
                    lz_snp = line_list[index_dict['snp']]
                    cur_w = float(line_list[index_dict['weight']])
                    cur_a1 = line_list[index_dict['a1']]
                    cur_a2 = line_list[index_dict['a2']]
                    cur_abs_z = abs(float(line_list[index_dict['z']]))
                    cur_z = line_list[index_dict['z']]
                    cur_snp = annot_dict[lz_snp].rs
                    cur_im = annot_dict[lz_snp].name
                    #snp, lz_snp = meta_toolbox.correct_snp(index_dict, line_list, annot_dict)
##                    if annot_dict is None:
##                        lz_snp=cur_snp
##                        if not cur_snp.startswith("rs"):
##                            lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos)
##                    else:
##                        lz_snp = annot_dict[cur_snp].lz
##                    if not cur_snp.startswith('rs'):
##                        lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos)
##                    else:
##                        lz_snp = cur_snp
                    cur_out = {'chr':gene.chro,'ID':gene.ID,'start':gene.start,
                               'end':gene.end,'sym':gene.sym,'snp':cur_snp,'im':cur_im,
                               'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos),
                               '|z|':str(cur_abs_z),'z':cur_z,'note':'--',
                               #'checkme':'0',
                               'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2}
                    if float(cur_out['weight']) < weight_min:
                        #print("Weight ({0}) is less than Minimum ({1}).".format(str(cur_w),weight_min))
                        cur_out['checkme']='1'
                    else:
                        cur_out['checkme']='0'
##                    if cur_w > weight_min or (reg_chr in ['23','24'] and cur_w > sex_weight):
##                        cur_out = {'chr':gene.chro,'band':gene.band,'start':gene.start,
##                                   'end':gene.end,'sym':gene.sym,'snp':cur_snp,
##                                   'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos),
##                                   '|z|':str(cur_abs_z),'z':cur_z,'note':'--','checkme':'0',
##                                   'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2}
##                    else:
##                        cur_out = {'chr':gene.chro,'band':gene.band,'start':gene.start,
##                                   'end':gene.end,'sym':gene.sym,'snp':cur_snp,
##                                   'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos),
##                                   '|z|':str(cur_abs_z),'z':cur_z,'note':'--','checkme':'1',
##                                   'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2}
                    if cur_p == old_p:
                        multi_counter = multi_counter + 1
                        ZTup = namedtuple('ZTup','snp,lz,p,z,w')
                        cur_tup = ZTup(snp=cur_out['snp'],lz=cur_out['lz'],
                                       p=cur_out['p'],z=cur_out['|z|'],w=cur_out['weight'])
                        old_tup = ZTup(snp=old_out['snp'],lz=old_out['lz'],
                                       p=old_out['p'],z=old_out['|z|'],w=old_out['weight'])
                        zero_list.append(cur_tup)
                        if old_tup not in zero_list:
                            zero_list.append(old_tup)
                        print('NOTE: Both {0} and {1} have p-values of {2}.'.format(old_out['snp'],cur_snp, cur_p))
                        print('{0} has a z-score of: {1}'.format(old_out['snp'],old_out['z']))
                        print('{0} has a z-score of: {1}'.format(cur_out['snp'],cur_out['z']))
                        if cur_out['|z|'] > old_out['|z|'] and cur_out['checkme']=='0':
                            old_p = cur_p
                            old_abs_z = cur_abs_z
                            old_out.update(cur_out)
                        print('Retaining {0} as the significant SNP based on z-score value.'.format(old_out['snp']))
                        old_out['note']='{0}SNPs(p={1})'.format(multi_counter, old_p)
                    elif cur_p < old_p:
                        if cur_out['checkme']=='0':
                            multi_counter = 1
                            old_p = cur_p
                            old_out.update(cur_out)
                            zero_list = []
                            if not low_weight_out['p'] == '1':
                                if cur_p < float(low_weight_out['p']):
                                    low_weight_out = lw_blank
                                    low_weight_out['p']='1'
                                    low_weight_out['lz']=None
                                    print("Wiping low weight snp!")
                        elif cur_p < float(low_weight_out['p']):
                            low_weight_out.update(cur_out)
                            print('''Adding {0} to low weight SNP list due to
weight = {1}
p-value = {2}'''.format(cur_snp, low_weight_out['weight'],low_weight_out['p']))
                table_counter = table_counter + 1

                
    sorted_list = sorted(zero_list, key=lambda member: member[3], reverse=True)
    #print old_out
    return old_out, sorted_list, low_weight_out['lz']
Exemplo n.º 9
0
                             't':'0','note':'--','checkme':'0'}
            else:
                line_info = read_assoc(line,.95, 'logistic')
##
##
##
##
##NOTE NOTE NOTE THIS IS WHERE EDITING STOPPED!!!! RESUME CHANGES HERE!!!!!!!!!!!!!
##
##
##
##                
                line_list = line.strip().split()
                snp_pos = int(line_list[assoc_indices['pos']])
                if int(reg_chr) == int(line_list[assoc_indices['chr']]
                                  ) and pc_toolbox.in_interval(snp_pos, reg_start, reg_end):
                    
                    old_p = float(old_out['p'])
                    cur_p = float(line_list[assoc_indices['p']])
                    cur_snp = line_list[assoc_indices['snp']]
                    cur_t = float(line_list[assoc_indices['t']])
                    cur_hi = line_list[assoc_indices['hi']]
                    cur_lo = line_list[assoc_indices['lo']]
                    cur_a1 = line_list[assoc_indices['a1']]
                    cur_or = line_list[assoc_indices['or']]
##                    if not cur_snp.startswith('rs'):
##                        lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos)
##                    else:
##                        lz_snp = cur_snp
##                    if cur_w > weight_min or (reg_chr in ['23','24'] and cur_w > sex_weight):
##                        cur_out = {'chr':gene.chro,'band':gene.band,'start':gene.start,