def test_adafdr_retest(): """ Test for adafdr_retest """ p, x, h, n_full, _ = dl.load_2d_bump_slope(n_sample=20000) res = md.adafdr_test(p, x, alpha=0.1, single_core=True) res_temp = md.adafdr_test(p, x, alpha=0.02, single_core=True) res_retest = md.adafdr_retest(res, alpha=0.02) print('adafdr_test discoveries at alpha=0.02:', np.sum(res_temp['decision'])) print('adafdr_retest discoveries at alpha=0.02:', np.sum(res_retest['decision'])) print('# diff', np.sum(res_temp['decision'] != res_retest['decision'])) assert np.sum(res_temp['decision'] != res_retest['decision']) < 10
def test_adafdr_test(): """ Test for adafdr_test """ p, x, h, n_full, _ = dl.load_2d_bump_slope(n_sample=20000) res = md.adafdr_test(p, x, K=2, alpha=0.1, h=None, n_full=n_full,\ n_itr=50, verbose=False, random_state=0,\ fast_mode = False, single_core=True) t = res['threshold'] FDP = np.sum((p < t) * (h == 0)) / np.sum(p < t) n_rej = np.sum(p < t) print('n_rej', n_rej) assert n_rej > 700 print('FDP', FDP) assert FDP < 0.12
def main(args): # Set up parameters. alpha = 0.01 n_itr = 1500 # Set up the output folder. output_folder = os.path.realpath( '..') + '/results/result_' + args.output_folder output_datafile = '/data3/martin/gtex_data/results/result_' + args.output_folder + '.pickle' if not os.path.exists(output_folder): os.makedirs(output_folder) else: filelist = [os.remove(os.path.join(output_folder, f))\ for f in os.listdir(output_folder)] # Load the data. p, x, n_full, cate_name, cis_name = dl.load_GTEx(args.data_name) # Logger. logging.basicConfig(level=logging.INFO,format='%(module)s:: %(message)s',\ filename=output_folder+'/result.log', filemode='w') logger = logging.getLogger() result_dic = {} # An overview of the data logger.info('# p: %s' % str(p[0:2])) logger.info('# x: %s' % str(x[0:2, :])) # Report the baseline methods. n_rej, t_rej = md.bh_test(p, alpha=alpha, n_full=n_full, verbose=False) logger.info('## BH, n_rej=%d, t_rej=%0.5f' % (n_rej, t_rej)) result_dic['bh'] = {'h_hat': p < t_rej} n_rej, t_rej, pi0_hat = md.sbh_test(p, alpha=alpha, n_full=n_full, verbose=False) result_dic['sbh'] = {'h_hat': p < t_rej} logger.info('## SBH, n_rej=%d, t_rej=%0.5f, pi0_hat=%0.3f' % (n_rej, t_rej, pi0_hat)) # Analysis md.adafdr_explore(p, x, alpha=alpha, n_full=n_full, vis_dim=None, cate_name=cate_name,\ output_folder=output_folder, h=None) # Fast mode. output_folder_fast = output_folder + '_fast' if not os.path.exists(output_folder_fast): os.makedirs(output_folder_fast) else: filelist = [os.remove(os.path.join(output_folder_fast, f))\ for f in os.listdir(output_folder_fast)] logger.info('# p: %s' % str(p[0:2])) logger.info('# x: %s' % str(x[0:2, :])) start_time = time.time() res = md.adafdr_test(p, x, K=5, alpha=alpha, h=None, n_full=n_full, n_itr=n_itr,\ verbose=True, output_folder=output_folder_fast, random_state=0,\ fast_mode=True) n_rej = res['n_rej'] t_rej = res['threshold'] result_dic['nfdr (fast)'] = {'h_hat': p < t_rej} logger.info('## nfdr2 (fast mode), n_rej1=%d, n_rej2=%d, n_rej_total=%d' % (n_rej[0], n_rej[1], n_rej[0] + n_rej[1])) logger.info('## Total time (fast mode): %0.1fs' % (time.time() - start_time)) # Full mode. logger.info('# p: %s' % str(p[0:2])) logger.info('# x: %s' % str(x[0:2, :])) start_time = time.time() res = md.adafdr_test(p, x, K=5, alpha=alpha, h=None, n_full=n_full, n_itr=n_itr,\ verbose=True, output_folder=output_folder, random_state=0,\ fast_mode=False, single_core=False) n_rej = res['n_rej'] t_rej = res['threshold'] result_dic['nfdr'] = {'h_hat': p < t_rej} logger.info('## nfdr2, n_rej1=%d, n_rej2=%d, n_rej_total=%d' % (n_rej[0], n_rej[1], n_rej[0] + n_rej[1])) logger.info('## Total time: %0.1fs' % (time.time() - start_time)) # Store the result fil = open(output_datafile, 'wb') pickle.dump(result_dic, fil) fil.close()
def main(args): # Set up parameters. alpha = 0.1 n_itr = 1500 # Set up the output folder. output_folder = os.path.realpath( '..') + '/result_small_data/result_' + args.output_folder if not os.path.exists(output_folder): os.makedirs(output_folder) else: filelist = [os.remove(os.path.join(output_folder, f))\ for f in os.listdir(output_folder)] # Load the data. if 'GTEx' in args.data_loader: p, x, n_full, cate_name = dl.load_GTEx_full(verbose=True) x = x[:, 0:3] h = None else: p, h, x = eval('dl.' + args.data_loader + '()') n_full = p.shape[0] cate_name = {} # Logger. logging.basicConfig(level=logging.INFO,format='%(module)s:: %(message)s',\ filename=output_folder+'/result.log', filemode='w') logger = logging.getLogger() # An overview of the data logger.info('# p: %s' % str(p[0:2])) logger.info('# x: %s' % str(x[0:2, :])) # Report the baseline methods. n_rej, t_rej = md.bh_test(p, alpha=alpha, n_full=n_full, verbose=False) logger.info('## BH, n_rej=%d, t_rej=%0.5f' % (n_rej, t_rej)) n_rej, t_rej, pi0_hat = md.sbh_test(p, alpha=alpha, n_full=n_full, verbose=False) logger.info('## SBH, n_rej=%d, t_rej=%0.5f, pi0_hat=%0.3f' % (n_rej, t_rej, pi0_hat)) # Analysis md.adafdr_explore(p, x, alpha=alpha, n_full=n_full, vis_dim=None, cate_name=cate_name,\ output_folder=output_folder, h=None) # Fast mode. output_folder_fast = output_folder + '_fast' if not os.path.exists(output_folder_fast): os.makedirs(output_folder_fast) else: filelist = [os.remove(os.path.join(output_folder_fast, f))\ for f in os.listdir(output_folder_fast)] logger.info('# p: %s' % str(p[0:2])) logger.info('# x: %s' % str(x[0:2, :])) start_time = time.time() res = md.adafdr_test(p, x, K=5, alpha=alpha, h=None, n_full=n_full, n_itr=n_itr,\ verbose=True, output_folder=output_folder_fast, random_state=0,\ fast_mode=True) n_rej = res['n_rej'] t_rej = res['threshold'] logger.info('## AdaFDR (fast mode), n_rej1=%d, n_rej2=%d, n_rej_total=%d' % (n_rej[0], n_rej[1], n_rej[0] + n_rej[1])) logger.info('## Total time (fast mode): %0.1fs' % (time.time() - start_time)) # Full mode. logger.info('# p: %s' % str(p[0:2])) logger.info('# x: %s' % str(x[0:2, :])) start_time = time.time() res = md.adafdr_test(p, x, K=5, alpha=alpha, h=None, n_full=n_full, n_itr=n_itr,\ verbose=True, output_folder=output_folder, random_state=0,\ fast_mode=False, single_core=False) n_rej = res['n_rej'] t_rej = res['threshold'] logger.info('## AdaFDR, n_rej1=%d, n_rej2=%d, n_rej_total=%d' % (n_rej[0], n_rej[1], n_rej[0] + n_rej[1])) logger.info('## Total time: %0.1fs' % (time.time() - start_time))
def main(args): # Set up the parameters. input_folder = args.input_folder output_folder = './temp_result/res_' + args.data_name if args.alpha is not None: alpha_list = [args.alpha] else: alpha_list = [0.05, 0.1, 0.15, 0.2] if not os.path.exists(output_folder): os.makedirs(output_folder) else: filelist = [os.remove(os.path.join(output_folder, f))\ for f in os.listdir(output_folder)] print('input_folder: %s'%input_folder) print('output_folder: %s'%output_folder) print('alpha_list: %s'%alpha_list) # Get a file for recording. f_write = open(output_folder+'/result.log', 'w') # Process all data in the folder file_list = os.listdir(args.input_folder) result_dic = {'bh': [], 'sbh': [], 'adafdr-fast': [], 'adafdr': []} for filename in file_list: if filename[0] == '.': continue file_path = args.input_folder + '/' + filename p, x, h = dl.load_simulation_data(file_path) for alpha in alpha_list: print('# Processing %s with alpha=%0.2f'%(filename, alpha)) f_write.write('# Processing %s with alpha=%0.2f\n'%(filename, alpha)) # BH result n_rej, t_rej = md.bh_test(p, alpha=alpha, verbose=False) fdp,power = get_fdp_and_power(h, p<=t_rej) result_dic['bh'].append([fdp, power, alpha, filename]) f_write.write('## BH discoveries: %d, threshold=%0.3f\n'%(n_rej,t_rej)) # SBH result n_rej, t_rej, pi0_hat = md.sbh_test(p, alpha=alpha, verbose=False) fdp,power = get_fdp_and_power(h, p<=t_rej) result_dic['sbh'].append([fdp, power, alpha, filename]) temp = '## SBH discoveries: %d, threshold=%0.3f, pi0_hat=%0.3f\n'%(n_rej, t_rej, pi0_hat) f_write.write(temp) # AdaFDR-fast result start_time = time.time() res = md.adafdr_test(p, x, alpha=alpha, fast_mode=True) n_rej = res['n_rej'] t_rej = res['threshold'] fdp,power = get_fdp_and_power(h, p<=t_rej) result_dic['adafdr-fast'].append([fdp, power, alpha, filename]) temp = '## AdaFDR-fast discoveries: fold_1=%d, fold_2=%d, total=%d\n'%\ (n_rej[0],n_rej[1],n_rej[0]+n_rej[1]) f_write.write(temp) f_write.write('## Time: %0.1fs'%(time.time()-start_time)) # AdaFDR result start_time = time.time() res = md.adafdr_test(p, x, alpha=alpha, fast_mode=False) n_rej = res['n_rej'] t_rej = res['threshold'] fdp,power = get_fdp_and_power(h, p<=t_rej) result_dic['adafdr'].append([fdp, power, alpha, filename]) temp = '## AdaFDR discoveries: fold_1=%d, fold_2=%d, total=%d\n'%\ (n_rej[0],n_rej[1],n_rej[0]+n_rej[1]) f_write.write(temp) f_write.write('## Time: %0.1fs'%(time.time()-start_time)) f_write.write('\n') # Store the result fil = open(output_folder+'/result.pickle','wb') pickle.dump(result_dic, fil) fil.close() f_write.close()
def main(args): # Set up parameters. alpha = 0.1 n_itr = 1500 # Set up the output folder. output_folder = os.path.realpath('..') + '/result_simulation/result_' + args.output_folder if not os.path.exists(output_folder): os.makedirs(output_folder) else: filelist = [os.remove(os.path.join(output_folder, f))\ for f in os.listdir(output_folder)] # Get logger. logging.basicConfig(level=logging.INFO,format='%(message)s',\ filename=output_folder+'/result.log', filemode='w') logger = logging.getLogger() # Run method in all data in the folder file_list = os.listdir(args.input_folder) alpha_list = [0.05, 0.1, 0.15, 0.2] if 'speed' in args.input_folder: alpha_list = [0.1] if 'ntest' in args.input_folder: alpha_list = [0.1] if 'prop_alt' in args.input_folder: alpha_list = [0.1] result_dic = {'bh': {}, 'sbh': {}, 'nfdr (fast)': {}, 'nfdr': {}} time_dic = {'nfdr (fast)': {}, 'nfdr': {}} for alpha in alpha_list: result_dic['bh'][alpha] = [] result_dic['sbh'][alpha] = [] result_dic['nfdr (fast)'][alpha] = [] result_dic['nfdr'][alpha] = [] time_dic['nfdr (fast)'][alpha] = {} time_dic['nfdr'][alpha] = {} for filename in file_list: filename_short = filename if filename[0] == '.': continue print('# Processing %s with alpha=%0.2f'%(filename, alpha)) logger.info('# Processing %s with alpha=%0.2f'%(filename, alpha)) filename = args.input_folder + '/' + filename p, x, h = dl.load_simulation_data(filename) n_full = p.shape[0] # Report the baseline. n_rej, t_rej = md.bh_test(p, alpha=alpha, n_full=n_full, verbose=False) result_dic['bh'][alpha].append([h, p<=t_rej]) logger.info('## BH, n_rej=%d, t_rej=%0.5f'%(n_rej,t_rej)) n_rej, t_rej, pi0_hat = md.sbh_test(p, alpha=alpha, n_full=n_full, verbose=False) result_dic['sbh'][alpha].append([h, p<=t_rej]) logger.info('## SBH, n_rej=%d, t_rej=%0.5f, pi0_hat=%0.3f'%(n_rej, t_rej, pi0_hat)) # Fast mode. start_time = time.time() # res = md.adafdr_test(p, x, K=5, alpha=alpha, h=h, n_full=n_full, n_itr=n_itr,\ # verbose=False, output_folder=None, random_state=0,\ # fast_mode=True) res_train = md.adafdr_train(p, x, K=5, alpha=alpha, h=h, n_full=n_full, n_itr=n_itr, verbose=False, output_folder=None, random_state=0, fast_mode=True) res = md.adafdr_test(res_train, alpha, n_full=n_full, output_folder = None) n_rej = res['n_rej'] t_rej = res['threshold'] time_dic['nfdr (fast)'][alpha][filename_short] = time.time()-start_time result_dic['nfdr (fast)'][alpha].append([h, p<=t_rej]) logger.info('## nfdr2 (fast mode), n_rej1=%d, n_rej2=%d, n_rej_total=%d'%(n_rej[0],n_rej[1],n_rej[0]+n_rej[1])) logger.info('## Total time (fast mode): %0.1fs'%(time.time()-start_time)) # Full mode. start_time = time.time() # res = md.adafdr_test(p, x, K=5, alpha=alpha, h=h, n_full=n_full, n_itr=n_itr,\ # verbose=False, output_folder=None, random_state=0,\ # fast_mode=False, single_core=False) res_train = md.adafdr_train(p, x, K=5, alpha=alpha, h=h, n_full=n_full, n_itr=n_itr, verbose=False, output_folder=None, random_state=0, fast_mode=False, single_core=False) res = md.adafdr_test(res_train, alpha, n_full=n_full, output_folder = None) n_rej = res['n_rej'] t_rej = res['threshold'] time_dic['nfdr'][alpha][filename_short] = time.time()-start_time result_dic['nfdr'][alpha].append([h, p<=t_rej]) logger.info('## nfdr2, n_rej1=%d, n_rej2=%d, n_rej_total=%d'%(n_rej[0],n_rej[1],n_rej[0]+n_rej[1])) logger.info('## Total time: %0.1fs'%(time.time()-start_time)) logger.info('\n') # Store the result fil = open(output_folder+'/result_dic.pickle','wb') pickle.dump(result_dic, fil) pickle.dump(time_dic, fil) fil.close()
def main(args): # Set up parameters. alpha = 0.01 n_itr = 1500 # Set up the output folder. output_folder = os.path.realpath( '..') + '/results/result_univariate_' + args.output_folder output_datafile = '/data3/martin/gtex_data/results_uni_covariate/result_' +\ args.output_folder + '.pickle' if not os.path.exists(output_folder): os.makedirs(output_folder) else: filelist = [os.remove(os.path.join(output_folder, f))\ for f in os.listdir(output_folder)] # Load the data. p, x, n_full, cate_name, cis_name = dl.load_GTEx(args.data_name) # Logger. logging.basicConfig(level=logging.INFO,format='%(message)s',\ filename=output_folder+'/result.log', filemode='w') logger = logging.getLogger() result_dic = {} # An overview of the data logger.info('# p: %s' % str(p[0:2])) logger.info('# x: %s' % str(x[0:2, :])) # Report the baseline methods. n_rej, t_rej = md.bh_test(p, alpha=alpha, n_full=n_full, verbose=False) logger.info('## BH, n_rej=%d, t_rej=%0.5f' % (n_rej, t_rej)) result_dic['bh'] = {'h_hat': p < t_rej} n_rej, t_rej, pi0_hat = md.sbh_test(p, alpha=alpha, n_full=n_full, verbose=False) result_dic['sbh'] = {'h_hat': p < t_rej} logger.info('## SBH, n_rej=%d, t_rej=%0.5f, pi0_hat=%0.3f\n' % (n_rej, t_rej, pi0_hat)) # Analysis md.adafdr_explore(p, x, alpha=alpha, n_full=n_full, vis_dim=None, cate_name=cate_name,\ output_folder=output_folder, h=None) # Four covaraites seperately cov_list = ['exp', 'maf', 'dist', 'chromotin', 'all'] for i_cov in range(5): logger.info('Covariate: %s' % cov_list[i_cov]) if i_cov < 4: temp_x = x[:, i_cov].reshape([-1, 1]) else: temp_x = x # Fast mode. # output_folder_fast = output_folder + '_fast' # if not os.path.exists(output_folder_fast): # os.makedirs(output_folder_fast) # else: # filelist = [os.remove(os.path.join(output_folder_fast, f))\ # for f in os.listdir(output_folder_fast)] output_folder_fast = None logger.info('# p: %s' % str(p[0:2])) logger.info('# x: %s' % str(temp_x[0:2, :])) start_time = time.time() res = md.adafdr_test(p, temp_x, K=5, alpha=alpha, h=None, n_full=n_full, n_itr=n_itr, verbose=True, output_folder=output_folder_fast, random_state=0, fast_mode=True) n_rej = res['n_rej'] t_rej = res['threshold'] result_dic['nfdr (fast)_%d' % i_cov] = {'h_hat': p < t_rej} logger.info( '## AdaFDR (fast mode), feature=%d, n_rej1=%d, n_rej2=%d, n_rej_total=%d' % (i_cov, n_rej[0], n_rej[1], n_rej[0] + n_rej[1])) logger.info('## Total time (fast mode): %0.1fs' % (time.time() - start_time)) # Full mode. logger.info('# p: %s' % str(p[0:2])) logger.info('# x: %s' % str(temp_x[0:2, :])) start_time = time.time() res = md.adafdr_test(p, temp_x, K=5, alpha=alpha, h=None, n_full=n_full, n_itr=n_itr,\ verbose=True, output_folder=None, random_state=0,\ fast_mode=False, single_core=False) n_rej = res['n_rej'] t_rej = res['threshold'] result_dic['nfdr_%d' % i_cov] = {'h_hat': p < t_rej} logger.info('## nfdr2, n_rej1=%d, n_rej2=%d, n_rej_total=%d' % (n_rej[0], n_rej[1], n_rej[0] + n_rej[1])) logger.info('## Total time: %0.1fs' % (time.time() - start_time)) logger.info(' ') # Store the result fil = open(output_datafile, 'wb') pickle.dump(result_dic, fil) fil.close()