def read_gm(opts, logger): opts = copy.deepcopy(opts) opts.infp = opts.exposure_gm header = cleangwas.read_header(opts.infp, opts.sep) cnames = cleangwas.parse_header(header, opts, logger, cleangwas.default_cnames) opts.rmpali = False opts.rmindel = False opts.unique = False df = cleangwas.qc(opts, cnames, logger) return df
def read_exp(opts, logger): opts = copy.deepcopy(opts) opts.infp = opts.exposure header = cleangwas.read_header(opts.infp, opts.sep) cnames = cleangwas.parse_header(header, opts, logger, cleangwas.default_cnames) df = cleangwas.qc(opts, cnames, logger) df = cleangwas.selectSNP(opts, df, logger) df = cleangwas.resort_col(df) df = cleangwas.truncate(opts, df) return df
def read_outc(opts, logger): for k, v in cleangwas.del_no.items(): cleangwas.del_no[k] = 0 opts = copy.deepcopy(opts) opts.infp = opts.outcome opts.pThresh = 1 opts.include = 'include' header = cleangwas.read_header(opts.infp, opts.sep) cnames = cleangwas.parse_header(header, opts, logger, cleangwas.default_cnames) insnps = pd.read_csv(opts.include, header=None, names=["RS"], sep='\s+') insnps['RS'] = insnps['RS'].str.upper() total_df = pd.DataFrame() converter = cleangwas.get_converter(cnames) for chunk in pd.read_csv(opts.infp, sep=opts.sep, header=0, names=cnames, dtype=str, iterator=True, chunksize=2000000): for k, v in converter.items(): chunk[k] = chunk[k].apply(v) if 'RS' in chunk.columns: chunk['RS'] = chunk['RS'].str.upper() chunk = chunk[chunk['RS'].isin(insnps['RS'])] if not chunk.empty: total_df = total_df.append(chunk, sort=False) else: logger.logger.warning( 'Can not identify RS column for {0}\nExiting'.format( opts.infp)) exit(1) if not total_df.empty: content = total_df.to_csv(path_or_buf=None, sep='\t', na_rep='NA', float_format='%g', encoding='utf-8', index=False) opts.infp = StringIO(content) df = cleangwas.qc(opts, cnames, logger) df = cleangwas.selectSNP(opts, df, logger) df = cleangwas.resort_col(df) df = cleangwas.truncate(opts, df) else: logger.logger.info('No common SNPs among exposure and outcome.') df = pd.DataFrame() try: os.remove('include') except: pass return df
def read_outc(opts, logger): for k, v in cleangwas.del_no.items(): cleangwas.del_no[k] = 0 opts = copy.deepcopy(opts) opts.infp = opts.outcome opts.pThresh = 1 opts.include = 'include' header = cleangwas.read_header(opts.infp, opts.sep) cnames = cleangwas.parse_header(header, opts, logger, cleangwas.default_cnames) df = cleangwas.qc(opts, cnames, logger) df = cleangwas.selectSNP(opts, df, logger) df = cleangwas.resort_col(df) df = cleangwas.truncate(opts, df) try: os.remove('include') except: pass return df
def mr4gm(opts, logger): opts = copy.deepcopy(opts) opts.infp = opts.exposure_gm header = cleangwas.read_header(opts.infp, opts.sep) cnames = cleangwas.parse_header(header, opts, logger, cleangwas.default_cnames) all_df = pd.read_csv(opts.exposure_gm, header=0, names=cnames, sep='\s+', dtype=str) all_df['RS'].to_csv('include', header=False, sep='\t', na_rep='NA', float_format='%g', encoding='utf-8', index=False) out_df = read_outc(opts, logger) mr_results = [] mr_heters = [] mr_pleios = [] mr_datas = [] mr_pleiosnps = [] for phe in [x for x in all_df.columns if x in ['Phenotype']]: #for phe in [x for x in all_df.columns if x in [ 'Phenotype', 'Category']]: for x in set(all_df.loc[:, phe]): all_df[all_df[phe] == x].copy().to_csv( path_or_buf='.exposure_gm.txt', sep='\t', na_rep='NA', float_format='%g', encoding='utf-8', index=False) out_df.to_csv(path_or_buf='.outcome_gm.txt', sep='\t', na_rep='NA', float_format='%g', encoding='utf-8', index=False) opts.exposure = '.exposure_gm.txt' opts.outcome = '.outcome_gm.txt' opts.exp_name = x opts.exposure_gm = None mr_result, mr_heter, mr_pleio, mr_data, mr_pleiosnp = mr4general( opts, logger) mr_results.append(mr_result) mr_heters.append(mr_heter) mr_pleios.append(mr_pleio) mr_datas.append(mr_data) mr_pleiosnps.append(mr_pleiosnp) try: os.remove('.exposure_gm.txt') os.remove('.outcome_gm.txt') except: pass if len([x for x in mr_results if not x.empty]) != 0: mr_results = pd.concat([x for x in mr_results if not x.empty], axis=0, ignore_index=True, sort=False, copy=True) if len([x for x in mr_heters if not x.empty]) != 0: mr_heters = pd.concat([x for x in mr_heters if not x.empty], axis=0, ignore_index=True, sort=False, copy=True) if len([x for x in mr_pleios if not x.empty]) != 0: mr_pleios = pd.concat([x for x in mr_pleios if not x.empty], axis=0, ignore_index=True, sort=False, copy=True) if len([x for x in mr_datas if not x.empty]) != 0: mr_datas = pd.concat([x for x in mr_datas if not x.empty], axis=0, ignore_index=True, sort=False, copy=True) if opts.heterogeneity != 'none' and len( [x for x in mr_pleiosnps if not x.empty]) != 0: mr_pleiosnps = pd.concat([x for x in mr_pleiosnps if not x.empty], axis=0, ignore_index=True, sort=False, copy=True) else: mr_pleiosnps = pd.DataFrame() return mr_results, mr_heters, mr_pleios, mr_datas, mr_pleiosnps