def precompute_pca(self): """ compute pcs """ logging.info("computing PCA on train set") t0 = time.time() if not os.path.isfile(self.eigen_fn) or self.force_recompute: G = self.snp_reader.read(order='C').standardize().val G.flags.writeable = False chr1_idx, chr2_idx, rest_idx = split_data_helper.split_chr1_chr2_rest(self.snp_reader.pos) G_train = G.take(rest_idx, axis=1) from sklearn.decomposition import PCA pca = PCA() pcs = pca.fit_transform(G_train) logging.info("saving eigendecomp to file %s" % self.eigen_fn) eig_dec = {"pcs": pcs} save(self.eigen_fn, eig_dec) logging.info("time taken for pc computation: " + str(time.time()-t0)) else: logging.info("pc file already exists: %s" % (self.eigen_fn))
def precompute_pca(self): """ compute pcs """ logging.info("computing PCA on train set") t0 = time.time() if not os.path.isfile(self.eigen_fn) or self.force_recompute: G = self.snp_reader.read(order='C').standardize().val G.flags.writeable = False chr1_idx, chr2_idx, rest_idx = split_data_helper.split_chr1_chr2_rest( self.snp_reader.pos) G_train = G.take(rest_idx, axis=1) from sklearn.decomposition import PCA pca = PCA() pcs = pca.fit_transform(G_train) logging.info("saving eigendecomp to file %s" % self.eigen_fn) eig_dec = {"pcs": pcs} save(self.eigen_fn, eig_dec) logging.info("time taken for pc computation: " + str(time.time() - t0)) else: logging.info("pc file already exists: %s" % (self.eigen_fn))
def compute_core(input_tuple): """ Leave-two-chromosome-out evaluation scheme: Chr1: no causals, used for T1-error evaluation Chr2: has causals, not conditioned on, used for power evaluation Rest: has causals, conditioned on T1 Pow [ cond ] ===== ===== ===== .... ===== x x x x xx """ methods, snp_fn, eigen_fn, num_causal, num_pcs, seed, sim_id = input_tuple # partially load bed file from pysnptools.snpreader import Bed snp_reader = Bed(snp_fn) # determine indices for generation and evaluation ################################################################## chr1_idx, chr2_idx, rest_idx = split_data_helper.split_chr1_chr2_rest(snp_reader.pos) causal_candidates_idx = np.concatenate((chr2_idx, rest_idx)) # only compute t1-error (condition on all chr with causals on them) #causal_candidates_idx = rest_idx test_idx = np.concatenate((chr1_idx, chr2_idx)) if seed is not None: np.random.seed(int(seed % sys.maxint)) causal_idx = np.random.permutation(causal_candidates_idx)[0:num_causal] # generate phenotype ################################################################### genetic_var = 0.5 noise_var = 0.5 y = generate_phenotype(Bed(snp_fn).read(order='C').standardize(), causal_idx, genetic_var, noise_var) y.flags.writeable = False ############### only alter part until here --> modularize this # load pcs ################################################################### logging.info("loading eigendecomp from file %s" % eigen_fn) eig_dec = load(eigen_fn) G_pc = eig_dec["pcs"] G_pc.flags.writeable = False G_pc_ = G_pc[:,0:num_pcs] G_pc_norm = DiagKtoN(G_pc_.shape[0]).standardize(G_pc_.copy()) G_pc_norm.flags.writeable = False # run feature selection ######################################################### # generate pheno data structure pheno = {"iid": snp_reader.iid, "vals": y, "header": []} covar = {"iid": snp_reader.iid, "vals": G_pc_norm, "header": []} # subset readers G0 = snp_reader[:,rest_idx] test_snps = snp_reader[:,test_idx] result = {} fs_result = {} # additional methods can be defined and included in the benchmark for method_function in methods: result_, fs_result_ = method_function(test_snps, pheno, G0, covar) result.update(result_) fs_result.update(fs_result_) # save indices indices = {"causal_idx": causal_idx, "chr1_idx": chr1_idx, "chr2_idx": chr2_idx, "input_tuple": input_tuple, "fs_result": fs_result} #test_idx return result, indices
def compute_core(input_tuple): """ Leave-two-chromosome-out evaluation scheme: Chr1: no causals, used for T1-error evaluation Chr2: has causals, not conditioned on, used for power evaluation Rest: has causals, conditioned on T1 Pow [ cond ] ===== ===== ===== .... ===== x x x x xx """ methods, snp_fn, eigen_fn, num_causal, num_pcs, seed, sim_id = input_tuple # partially load bed file from pysnptools.snpreader import Bed snp_reader = Bed(snp_fn) # determine indices for generation and evaluation ################################################################## chr1_idx, chr2_idx, rest_idx = split_data_helper.split_chr1_chr2_rest( snp_reader.pos) causal_candidates_idx = np.concatenate((chr2_idx, rest_idx)) # only compute t1-error (condition on all chr with causals on them) #causal_candidates_idx = rest_idx test_idx = np.concatenate((chr1_idx, chr2_idx)) if seed is not None: np.random.seed(int(seed % sys.maxint)) causal_idx = np.random.permutation(causal_candidates_idx)[0:num_causal] # generate phenotype ################################################################### genetic_var = 0.5 noise_var = 0.5 y = generate_phenotype( Bed(snp_fn).read(order='C').standardize(), causal_idx, genetic_var, noise_var) y.flags.writeable = False ############### only alter part until here --> modularize this # load pcs ################################################################### logging.info("loading eigendecomp from file %s" % eigen_fn) eig_dec = load(eigen_fn) G_pc = eig_dec["pcs"] G_pc.flags.writeable = False G_pc_ = G_pc[:, 0:num_pcs] G_pc_norm = DiagKtoN(G_pc_.shape[0]).standardize(G_pc_.copy()) G_pc_norm.flags.writeable = False # run feature selection ######################################################### # generate pheno data structure pheno = {"iid": snp_reader.iid, "vals": y, "header": []} covar = {"iid": snp_reader.iid, "vals": G_pc_norm, "header": []} # subset readers G0 = snp_reader[:, rest_idx] test_snps = snp_reader[:, test_idx] result = {} fs_result = {} # additional methods can be defined and included in the benchmark for method_function in methods: result_, fs_result_ = method_function(test_snps, pheno, G0, covar) result.update(result_) fs_result.update(fs_result_) # save indices indices = { "causal_idx": causal_idx, "chr1_idx": chr1_idx, "chr2_idx": chr2_idx, "input_tuple": input_tuple, "fs_result": fs_result } #test_idx return result, indices