Exemplo n.º 1
0
def repair_for_zs(z_list, lw_list, table_loc, build, table_type):
    print ('''
LW_LIST is:''')
    print lw_list
    #fix_loc = fix_it.locate_fixed_meta(table_loc, build)
    base, ext = os.path.splitext(table_loc)
    new_loc = base + '_noZs'+ext
    line1 = True
    noZ = open(new_loc, mode = "w")
    with open(table_loc, mode="r") as tabby:
        for line in tabby:
            if line1:
                if table_type == 'FAMILY':
                    index_dict = pc_toolbox.read_fam_titles(line)
                if table_type == 'META':
                    index_dict = pc_toolbox.read_meta_titles(line)
                noZ.write(line)
                line1 = False
            else:
                line_list = line.strip().split()
                snp = line_list[index_dict['snp']]
                p = line_list[index_dict['p']]
                pos = line_list[index_dict['pos']]
                if p == '0':
                    if snp in z_list:
                        line_list[index_dict['p']] = '1e-101'
                    else:
                        line_list[index_dict['p']] = '1e-100'
                elif snp in lw_list:
                    
                    print('''Changing p-value of {0} from {1} to NA due to low weight
coupled with high p-value.'''.format(snp, line_list[index_dict['p']]))
                    line_list[index_dict['p']] = 'NA'
                noZ.write('\t'.join(line_list)+'\n')
    noZ.close()
Exemplo n.º 2
0
def filter_table(meta_loc, list_loc,table_type='META'):
    intersect_list = read_list(list_loc)
    base, ext = os.path.splitext(meta_loc)
    new_meta_loc = base + '_intersect'+ext
    line1 = True
    meta = open(meta_loc, mode="r")
    new_meta = open(new_meta_loc, mode="w")
    for line in meta:
        if line1:
            if table_type == 'META':
                index_dict = pc_toolbox.read_meta_titles(line)
            elif table_type == 'FAMILY':
                index_dict = pc_toolbox.read_fam_titles(line)
            new_meta.write(line)
            line1 = False
        else:
            line_list = line.strip().split()
            snp = line_list[index_dict['snp']]
            for isnp in intersect_list:
                if snp == isnp:
                    #print("Found in both lists: {0}".format(snp))
                    new_meta.write(line)
                    continue
    meta.close()
    new_meta.close()
Exemplo n.º 3
0
def read_meta(meta_loc, region, outfolder,build, annot_dict=None):
    line1 = True
    standardizer = 1e6
    reg_chr = region.chro
    reg_start = int(float(region.start) *standardizer)
    reg_end = int(float(region.end) * standardizer)
    sym = region.sym
    band = region.band
    outfile = os.path.join(outfolder,band+'.tbl')
    out = open(outfile, mode = "w")
    title_list = ['imchip_name','rs_name','chr','{0}_pos'.format(build),'p-value','z-score','weight','a1','a2']
    out.write('\t'.join(title_list)+'\n')
    with open(meta_loc, mode="r") as meta:
        for line in meta:
            if line1:
                meta_indices = pc_toolbox.read_meta_titles(line)
                line1 = False
            else:
                line_list = line.strip().split()
                snp_pos = int(line_list[meta_indices['pos']])
                weight = line_list[meta_indices['weight']]
                if int(reg_chr) == int(line_list[meta_indices['chr']]
                                       ) and pc_toolbox.in_interval(snp_pos, reg_start, reg_end):
                    if float(weight)>WEIGHT_MIN:
                        p_val = line_list[meta_indices['p']]
                        lz_snp = line_list[meta_indices['snp']]
                        z_score = line_list[meta_indices['z']]
                        a1 = line_list[meta_indices['a1']]
                        a2 = line_list[meta_indices['a2']]
                        snp_rs = annot_dict[lz_snp].rs
                        snp_im = annot_dict[lz_snp].name
                        out.write('\t'.join([snp_im,snp_rs,reg_chr,str(snp_pos),p_val,z_score,weight,a1,a2])+'\n')
    out.close()
Exemplo n.º 4
0
def create_index_dict(table_loc, table_type):
    line1 = True
    with open(table_loc, mode = "r") as table:
        for line in table:
            if line1:
                line1 = False
                if table_type == 'family':
                    index_dict = pc_toolbox.read_fam_titles(line)
                elif table_type == 'assoc':
                    index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None,plink_test='logistic')
                else:
                    index_dict = pc_toolbox.read_meta_titles(line)
    return index_dict
Exemplo n.º 5
0
def fix_eur(orig_table, out_table, fix_dict):
    line1 = True
    out = open(out_table, mode="w")
    key_list = fix_dict.keys()
    with open(orig_table, mode="r") as orig:
        for line in orig:
            if line1:
                index_dict = pc_toolbox.read_meta_titles(line)
                ls = line.strip().split()
                ls.append('eurlowP_SNP')
                out.write('\t'.join(ls)+'\n')
                #out.write(line)
                line1 = False
            else:
                lp_flag = '.'
                lsplit = line.strip().split()
                snp = lsplit[index_dict['snp']]
                if snp in key_list:
                    lsplit[index_dict['z']]=fix_dict[snp].z
                    #lsplit[index_dict['p']]=fix_dict[snp].p
                    lsplit[index_dict['weight']]=fix_dict[snp].w
                    lsplit[index_dict['a1']]=fix_dict[snp].a1
                    lsplit[index_dict['a2']]='.'
                    lsplit[index_dict['direction']]='.'
                    key_list.remove(snp)
                    lp_flag = 'yes'
                lsplit.append(lp_flag)
                out.write('\t'.join(lsplit) + '\n')
    if len(key_list) > 0:
        print("{0} keys not found.".format(len(key_list)))
        annot_dict_loc = fix_it.locate_annot_dict('hg18')
        annot_dict = fix_it.build_annot_dict('MAP', annot_dict_loc)
        for key in key_list:
            #print fix_dict
            print(key)
            a1 = fix_dict[key].a1
            a2 = '.'
            weight = fix_dict[key].w
            z = fix_dict[key].z
            p = '0'
            direction = '.'
            chro = annot_dict[key].hg18_chr
            pos = annot_dict[key].hg18_pos
            lp_flag = 'yes'
            key_info = [key,a1,a2,weight,z,p,direction,chro,pos,lp_flag]
            out.write('\t'.join(key_info)+'\n')
            #out.write('\t'.join(fix_dict[key])+'\n')
    out.close()
Exemplo n.º 6
0
def edit_table(table_loc, table_type):
    global perm
    line1 = True
    ztup_list = list()
    ##    lz_table_loc = new_table_loc(table_loc)
    with open(table_loc, mode="r") as table:
        for line in table:
            line_split = line.strip().split()
            if line1:
                if table_type == "assoc":
                    index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None, plink_test="logistic")
                elif table_type == "eqtl":
                    index_dict = achilleas_yank.read_eqtl_titles(line, perm)
                    print index_dict
                else:
                    index_dict = pc_toolbox.read_meta_titles(line)
                    print index_dict
                line1 = False
            else:
                ##                if line1:
                ##                    table_title_list = line_split
                ##                    parse_col()
                ##                    new_table.write('\t'.join(line_split)+'\n')
                ##                    line1 = False
                ##                else:
                ##                    cur_chr = line_split[meta_chrcol]
                cur_chr = line_split[index_dict["chr"]]
                cur_pos = int(line_split[index_dict["pos"]])
                ##                if not line_split[index_dict['snp']].startswith('rs'):
                ##                    line_split[index_dict['snp']]='chr'+cur_chr+':'+str(cur_pos)
                if not line_split[index_dict["p"]] == "NA":
                    if float(line_split[index_dict["p"]]) == 0:
                        ZTup = namedtuple("ZTup", "lz,chro,pos,abs_z")
                        if assoc:
                            z_or_t = abs(float(line_split[index_dict["t"]]))
                        else:
                            z_or_t = abs(float(line_split[index_dict["z"]]))
                        z = ZTup(
                            lz=line_split[index_dict["snp"]],
                            chro=line_split[index_dict["chr"]],
                            pos=line_split[index_dict["pos"]],
                            abs_z=z_or_t,
                        )
                        ztup_list.append(z)
    ztup_sort = sorted(ztup_list, key=lambda z: z.abs_z, reverse=True)
    return index_dict, ztup_sort, table_loc
def read_table(table_loc, region_list, assoc, annot_dict):
    #print("Now entering read_table.")
    line1 = True
    standardizer = 1e6
    snp_list = []
    with open(table_loc, mode="r") as table:
        for line in table:
            if line1:
                if assoc:
                    table_indices = pc_toolbox.read_assoc_titles(line, .95, 'logistic')
                else:
                    table_indices = pc_toolbox.read_meta_titles(line)
                line1=False
            else:
                line_list = line.strip().split()
                snp_pos = int(line_list[table_indices['pos']])
                chro = int(line_list[table_indices['chr']])
                p_str = line_list[table_indices['p']]
                #print line_list
                p_ok = True
                if not pc_toolbox.is_number(p_str):
                    p_ok = False
                elif float(p_str)>P_BOUND:
                    p_ok=False
                in_reg = False
                for region in region_list:
                    reg_chr = int(region.chro)
                    reg_start = int(float(region.start) *standardizer)
                    reg_end = int(float(region.end) * standardizer)
##                    print("Now checking region:")
##                    print region
                    if reg_chr == chro and pc_toolbox.in_interval(snp_pos, reg_start, reg_end):
                        in_reg = True
                if p_ok and not in_reg:
                    snp_info = store_line(line, table_indices, assoc,annot_dict)
                    snp_list.append(snp_info)
    print snp_list
    sys.stdout.flush()
    tuple_list = sorted(snp_list, key=attrgetter('chro', 'p'))
    #snp_list.sort(key= lambda info : info.chro)
    print snp_list
    print tuple_list
    sys.stdout.flush()
    return tuple_list
Exemplo n.º 8
0
def edit_table(table_loc, fix):
    line1=True
    ztup_list = []
    lz_table_loc = new_table_loc(table_loc)
    with open(table_loc, mode = 'r') as table:
        if fix:
            lz_table = open(lz_table_loc, mode='w')
        #with open(lz_table_loc, mode='w')as lz_table:
        for line in table:
            line_split = line.strip().split()
            if line1:
                if assoc:
                    index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None,plink_test='logistic')
                else:
                    index_dict = pc_toolbox.read_meta_titles(line)
                line1=False
            else:               
##                if line1:
##                    table_title_list = line_split
##                    parse_col()
##                    new_table.write('\t'.join(line_split)+'\n')
##                    line1 = False
##                else:
##                    cur_chr = line_split[meta_chrcol]
                cur_chr = line_split[index_dict['chr']]
                cur_pos = int(line_split[index_dict['pos']])
                if not line_split[index_dict['snp']].startswith('rs'):
                    line_split[index_dict['snp']]='chr'+cur_chr+':'+str(cur_pos)
                if not line_split[index_dict['p']]=='NA':
                    if float(line_split[index_dict['p']])==0:
                        ZTup = namedtuple('ZTup','lz,chro,pos')
                        z = ZTup(lz=line_split[index_dict['snp']],
                                 chro=line_split[index_dict['chr']],
                                 pos=line_split[index_dict['pos']])
                        ztup_list.append(z)
            if fix:
                lz_table.write('\t'.join(line_split)+'\n')
    if fix:
        lz_table.close()
        table_loc = lz_table_loc
    return index_dict, ztup_list, table_loc
Exemplo n.º 9
0
def edit_table(table_loc, change_loc):
    line1=True
    change_table = open(change_loc, mode="w")
    with open(table_loc, mode = 'r') as table:
        for line in table:
            line_split = line.strip().split()
            if line1:
                index_dict = pc_toolbox.read_meta_titles(line)
                print index_dict
                line1=False
            else:
                if not line_split[index_dict['p']]=='NA':
                    if float(line_split[index_dict['p']]) < 1e-16:
                        if line_split[index_dict['snp']] == 'rs653178':
                            line_split[index_dict['p']] = '1e-16'
                        elif line_split[index_dict['snp']] == 'rs61839660':
                            line_split[index_dict['p']] = '1e-16'
                        elif line_split[index_dict['snp']] == 'rs9273363':
                            line_split[index_dict['p']] = '1e-16'
                        elif line_split[index_dict['snp']] == 'rs2476601':
                            line_split[index_dict['p']] = '1e-16'
                        elif line_split[index_dict['snp']] == 'rs3842727':
                            line_split[index_dict['p']] = '1e-16'
                        elif line_split[index_dict['snp']] == 'rs1701704':
                            line_split[index_dict['p']] = '1.1e-16'
                        ###THESE ARE LOWER
                        elif line_split[index_dict['snp']] == 'rs3087243':
                            line_split[index_dict['p']] = '1e-16'
                        elif line_split[index_dict['snp']] == 'rs12416116':
                            line_split[index_dict['p']] = '1.1e-16'
                        elif line_split[index_dict['snp']] == 'rs3826110':
                            line_split[index_dict['p']] = '1e-16'
                        elif line_split[index_dict['snp']] == 'rs12927355':
                            line_split[index_dict['p']] = '1.1e-16'
                        else:
                            line_split[index_dict['p']] = '1.2e-16'
            change_table.write('\t'.join(line_split) + '\n')
    change_table.close()
Exemplo n.º 10
0
def read_table(table_loc, chromosome, index_dict):

    #global high_pos, low_pos
    low_pos = 5000000000
    high_pos = 0
    line1 = True
    with open(table_loc, mode='r') as table:
        for line in table:
            line_split = line.strip().split()
            if line1:
                if assoc:
                    index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None,plink_test='logistic')
                else:
                    index_dict = pc_toolbox.read_meta_titles(line)
                line1=False
            else:
                cur_chr = line_split[index_dict['chr']]
                cur_pos = int(line_split[index_dict['pos']])
                if chromosome==cur_chr and cur_pos < low_pos:
                    low_pos = cur_pos
                if chromosome==cur_chr and cur_pos > high_pos:
                    high_pos = cur_pos
    return (low_pos,high_pos)
Exemplo n.º 11
0
def list_info(table_loc, table_type):
    SnpFo = namedtuple('SnpFo','snp,chro,pos')
    fo_list = list()
    counter = 0
    line1 = True
    with open(table_loc, mode="r") as table:
        for line in table:
            if line1:
                if table_type in ['MAP','HAPMAP']:
                    index_dict = pc_toolbox.identify_map_indices()
                elif table_type in ['META']:
                    index_dict = pc_toolbox.read_meta_titles(line)
                elif table_type == 'FAMILY':
                    index_dict = pc_toolbox.read_fam_titles(line)
                line1 = False
            else:
                ls = line.strip().split()
                snp_fo = SnpFo(chro=ls[index_dict['chr']],
                               snp=ls[index_dict['snp']],
                               pos=ls[index_dict['pos']])
                chro = snp_fo.chro
                fo_list.append(snp_fo)
    #sort_fo = sorted(fo_list, key=attrgetter('chro', 'pos'))
    return fo_list
Exemplo n.º 12
0
def read_table(table_loc, gene, table_type, annot_dict=None):
    global weight_min, sex_weight
##    print("Weight Min is:")
##    print weight_min
    line1 = True
    standardizer = 1e6
##    if position_form == 'mb':
##        standardizer = 1e6
##    elif position_form == 'kb':
##        standardizer = 1e3
    
    old_p = 1
    old_out={}
    old_abs_z = 0
    reg_chr = gene.chro
    reg_start = int(float(gene.start) *standardizer)
    reg_end = int(float(gene.end) * standardizer)
    sym = gene.sym
    #band = gene.band
    ID = gene.ID
    table_counter = 0
    multi_counter = 1
    zero_list = []
    low_w_p = 1
    with open(table_loc, mode="r") as table:
        for line in table:
            if line1:
                if table_type == 'META':
                    index_dict = pc_toolbox.read_meta_titles(line)
                if table_type == 'FAMILY':
                    index_dict = pc_toolbox.read_fam_titles(line)
                table_counter = 1
                line1 = False
                old_out = {'chr':'','ID':'','start':'',
                           'end':'','sym':'','snp':'','im':'',
                           'lz':'','p':'1','pos':'',
                           '|z|':'0','z':'0','note':'--',
                           'weight':'','maf':'','meta_a1':'',
                           'meta_a2':'', 'checkme':''}
                lw_blank = {'chr':'','ID':'','start':'',
                            'end':'','sym':'','snp':None,'im':None,
                            'lz':None,'p':'1','pos':'',
                            '|z|':'0','z':'0','note':'--',
                            'weight':'','maf':'','meta_a1':'',
                            'meta_a2':''}
                low_weight_out = lw_blank
                  #,'checkme':'0'}
            else:
                line_list = line.strip().split()
                snp_pos = int(line_list[index_dict['pos']])
                if int(reg_chr) == int(line_list[index_dict['chr']]
                                       ) and pc_toolbox.in_interval(snp_pos, reg_start, reg_end):
                    
                    old_p = float(old_out['p'])
                    cur_p = float(line_list[index_dict['p']])
                    lz_snp = line_list[index_dict['snp']]
                    cur_w = float(line_list[index_dict['weight']])
                    cur_a1 = line_list[index_dict['a1']]
                    cur_a2 = line_list[index_dict['a2']]
                    cur_abs_z = abs(float(line_list[index_dict['z']]))
                    cur_z = line_list[index_dict['z']]
                    cur_snp = annot_dict[lz_snp].rs
                    cur_im = annot_dict[lz_snp].name
                    #snp, lz_snp = meta_toolbox.correct_snp(index_dict, line_list, annot_dict)
##                    if annot_dict is None:
##                        lz_snp=cur_snp
##                        if not cur_snp.startswith("rs"):
##                            lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos)
##                    else:
##                        lz_snp = annot_dict[cur_snp].lz
##                    if not cur_snp.startswith('rs'):
##                        lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos)
##                    else:
##                        lz_snp = cur_snp
                    cur_out = {'chr':gene.chro,'ID':gene.ID,'start':gene.start,
                               'end':gene.end,'sym':gene.sym,'snp':cur_snp,'im':cur_im,
                               'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos),
                               '|z|':str(cur_abs_z),'z':cur_z,'note':'--',
                               #'checkme':'0',
                               'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2}
                    if float(cur_out['weight']) < weight_min:
                        #print("Weight ({0}) is less than Minimum ({1}).".format(str(cur_w),weight_min))
                        cur_out['checkme']='1'
                    else:
                        cur_out['checkme']='0'
##                    if cur_w > weight_min or (reg_chr in ['23','24'] and cur_w > sex_weight):
##                        cur_out = {'chr':gene.chro,'band':gene.band,'start':gene.start,
##                                   'end':gene.end,'sym':gene.sym,'snp':cur_snp,
##                                   'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos),
##                                   '|z|':str(cur_abs_z),'z':cur_z,'note':'--','checkme':'0',
##                                   'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2}
##                    else:
##                        cur_out = {'chr':gene.chro,'band':gene.band,'start':gene.start,
##                                   'end':gene.end,'sym':gene.sym,'snp':cur_snp,
##                                   'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos),
##                                   '|z|':str(cur_abs_z),'z':cur_z,'note':'--','checkme':'1',
##                                   'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2}
                    if cur_p == old_p:
                        multi_counter = multi_counter + 1
                        ZTup = namedtuple('ZTup','snp,lz,p,z,w')
                        cur_tup = ZTup(snp=cur_out['snp'],lz=cur_out['lz'],
                                       p=cur_out['p'],z=cur_out['|z|'],w=cur_out['weight'])
                        old_tup = ZTup(snp=old_out['snp'],lz=old_out['lz'],
                                       p=old_out['p'],z=old_out['|z|'],w=old_out['weight'])
                        zero_list.append(cur_tup)
                        if old_tup not in zero_list:
                            zero_list.append(old_tup)
                        print('NOTE: Both {0} and {1} have p-values of {2}.'.format(old_out['snp'],cur_snp, cur_p))
                        print('{0} has a z-score of: {1}'.format(old_out['snp'],old_out['z']))
                        print('{0} has a z-score of: {1}'.format(cur_out['snp'],cur_out['z']))
                        if cur_out['|z|'] > old_out['|z|'] and cur_out['checkme']=='0':
                            old_p = cur_p
                            old_abs_z = cur_abs_z
                            old_out.update(cur_out)
                        print('Retaining {0} as the significant SNP based on z-score value.'.format(old_out['snp']))
                        old_out['note']='{0}SNPs(p={1})'.format(multi_counter, old_p)
                    elif cur_p < old_p:
                        if cur_out['checkme']=='0':
                            multi_counter = 1
                            old_p = cur_p
                            old_out.update(cur_out)
                            zero_list = []
                            if not low_weight_out['p'] == '1':
                                if cur_p < float(low_weight_out['p']):
                                    low_weight_out = lw_blank
                                    low_weight_out['p']='1'
                                    low_weight_out['lz']=None
                                    print("Wiping low weight snp!")
                        elif cur_p < float(low_weight_out['p']):
                            low_weight_out.update(cur_out)
                            print('''Adding {0} to low weight SNP list due to
weight = {1}
p-value = {2}'''.format(cur_snp, low_weight_out['weight'],low_weight_out['p']))
                table_counter = table_counter + 1

                
    sorted_list = sorted(zero_list, key=lambda member: member[3], reverse=True)
    #print old_out
    return old_out, sorted_list, low_weight_out['lz']
Exemplo n.º 13
0
def fix_table(table_loc, annot_dict, purpose, build):
    line1 = True
    counter = 0
    if purpose in ['MAP','HAPMAP','FAMMAP']:
##        ##    (basepath, ext) = os.path.splitext(table_loc)
##    if not meta:
        (basepath, ext) = os.path.splitext(table_loc)
        orig_rename = basepath + '~'
        new_loc = str(rename_as_necessary(orig_rename, ext)) + ext
        shutil.copy(table_loc, new_loc)
        output_loc = table_loc
        table_loc = new_loc
    else:
        output_loc = locate_fixed_meta(table_loc, build)
##    else:
##        output_loc = locate_fixed_meta(table_loc, build)
    with open(table_loc, mode="r") as table:
        output = open(output_loc, mode="w")
        index_dict = {'chr':0,'pos':3,'snp':1}
        error_list = list()
        for line in table:
            if counter > 5000:
                print line_list
            if line1 and purpose not in ['MAP','HAPMAP','FAMMAP']:
                if purpose == "META":
                    index_dict = pc_toolbox.read_meta_titles(line)
                if purpose == "FAMILY":
                    index_dict = pc_toolbox.read_fam_titles(line)
##                if meta:
##                index_dict = meta_toolbox.read_meta_titles(line)
                print index_dict
                line_list = line.strip().split()
                if not purpose in ['MAP','HAPMAP','FAMMAP']:
                    line_list.append('annotation')
                output.write('\t'.join(line_list)+'\n')
                index_dict['annot']=len(line_list)-1
                print index_dict
                line1 = False
##                else:
##                    index_dict = {'chr':0,'pos':3,'snp':1}
##                output.write(line)
##                line1 = False
            else:
                line_list = line.strip().split()
                snp = line_list[index_dict['snp']]
                if purpose in ['FAMILY','FAMMAP'] and snp in FAM_DIFF.keys():
                    print("MADE IT INTO CASE")
                    print snp
                    snp = FAM_DIFF[snp]
                    print snp
                if not purpose in ['MAP','HAPMAP','FAMMAP']:
                    lz_annot = get_lz_annot(annot_dict, snp)
                    line_list.append(lz_annot)
##                chro = line_list[index_dict['chr']]
##                pos = line_list[index_dict['pos']]
                if build == 'hg18':
                    try:
                        line_list[index_dict['chr']]= annot_dict[snp].hg18_chr.replace('X','23').replace('Y','24').replace('XY','23').replace('M','26')
                        line_list[index_dict['pos']]= annot_dict[snp].hg18_pos
                    except KeyError:
                        error_list.append(list(line_list))
##                        line_list[index_dict['chr']] = 'ERROR'
##                        line_list[index_dict['pos']]= 'ERROR'
                        
                if build == 'hg19':
                    try:
                        line_list[index_dict['chr']]= annot_dict[snp].hg19_chr.replace('X','23').replace('Y','24').replace('XY','23').replace('M','26')
                        line_list[index_dict['pos']]= annot_dict[snp].hg19_pos
                    except KeyError:
                        error_list.append(list(line_list))
##                        line_list[index_dict['chr']] = 'ERROR'
##                        line_list[index_dict['pos']]= 'ERROR'
##                line_list[index_dict['chr']]= annot_dict[snp].hg19_chr
##                line_list[index_dict['pos']]= annot_dict[snp].hg19_pos
                if not purpose == 'HAPMAP':
                    try:
                        line_list[index_dict['snp']]= annot_dict[snp].lz
                    except KeyError:
                        print("{0} is missing from the dictionary!".format(line_list[index_dict['snp']]))
##                      line_list[index_dict['snp']] = 'ERROR'
                if counter > 5000:
                    #print line_list
                    counter = 0
                counter = 1 + counter
                output.write('\t'.join(line_list)+'\n')
    output.close()
    base, ext = os.path.splitext(table_loc)
    error_loc = base + '_NAMEERRORS.txt'
    efile = open(error_loc,mode="w")
    efile.write('\t'.join(['CHR','RS','cM','POS'])+'\n')
    for error in error_list:
        efile.write('\t'.join(error)+'\n')
    efile.close()
Exemplo n.º 14
0
def fix_table(table_loc, annot_dict, purpose, build):
    line1 = True
    counter = 0
##    (basepath, ext) = os.path.splitext(table_loc)
    if purpose in ["HAPMAP","MAP"]:
        (basepath, ext) = os.path.splitext(table_loc)
        orig_rename = basepath + '~'
        new_loc = str(rename_as_necessary(orig_rename, ext)) + ext
        shutil.copy(table_loc, new_loc)
        output_loc = table_loc
        table_loc = new_loc
    else:
        output_loc = locate_fixed_table(table_loc, build)
    with open(table_loc, mode="r") as table:
        output = open(output_loc, mode="w")
        index_dict = {'chr':0,'pos':3,'snp':1}
        error_list = list()
        for line in table:
##            if counter > 5000:
##                print line_list
            if line1:
                if purpose == "META":
                    index_dict = pc_toolbox.read_meta_titles(line)
                elif purpose == "FAMILY":
                    index_dict = pc_toolbox.read_fam_titles(line)
                output.write(line)
                line1 = False
##                else:
##                    index_dict = {'chr':0,'pos':3,'snp':1}
##                output.write(line)
##                line1 = False
            else:
                line_list = line.strip().split()
                snp = line_list[index_dict['snp']]
##                chro = line_list[index_dict['chr']]
##                pos = line_list[index_dict['pos']]
                if build == 'hg18':
                    try:
                        line_list[index_dict['chr']]= annot_dict[snp].hg18_chr
                        line_list[index_dict['pos']]= annot_dict[snp].hg18_pos
                    except KeyError:
                        error_list.append(list(line_list))
##                        line_list[index_dict['chr']] = 'ERROR'
##                        line_list[index_dict['pos']]= 'ERROR'
                        
                if build == 'hg19':
                    try:
                        line_list[index_dict['chr']]= annot_dict[snp].hg19_chr
                        line_list[index_dict['pos']]= annot_dict[snp].hg19_pos
                    except KeyError:
                        error_list.append(list(line_list))
##                        line_list[index_dict['chr']] = 'ERROR'
##                        line_list[index_dict['pos']]= 'ERROR'
##                line_list[index_dict['chr']]= annot_dict[snp].hg19_chr
##                line_list[index_dict['pos']]= annot_dict[snp].hg19_pos
                try:
                    line_list[index_dict['snp']]= annot_dict[snp].lz
                except KeyError:
                    print("{0} is missing from the dictionary!".format(line_list[index_dict['snp']]))
##                    line_list[index_dict['snp']] = 'ERROR'
##                if counter > 5000:
##                    print line_list
##                    counter = 0
                counter = 1 + counter
                output.write('\t'.join(line_list)+'\n')
    output.close()
    base, ext = os.path.splitext(table_loc)
    error_loc = base + '_NAMEERRORS.txt'
    efile = open(error_loc,mode="w")
    efile.write('\t'.join(['CHR','RS','cM','POS'])+'\n')
    for error in error_list:
        efile.write('\t'.join(error)+'\n')
    efile.close()