def __init__( self, entries_path, phenotype_path=None, covariate_path=None, ibd_path=None, one_hot_covariates=[], bool_covariates=[], drop_covariates=[], ): """ Creates a Population class instance. :param pedigree: A Pedigree object :param pheotypes: Series of phenotypes indexed by IID :param covariates: Dataframe of covariates indexed by IID :param covariances: List of covariance matrices aligned to Pedigree object entries list """ self.entries = self._load_entries(entries_path) self.entry_map = pd.Series(self.entries.index, index=self.entries.values, name='id') self.phenotype = self._load_phenotype(phenotype_path) self.covariate_info = None self.one_hot_covariates = one_hot_covariates self.drop_covariates = drop_covariates self.bool_covariates = bool_covariates self.covariates = self._load_covariates(covariate_path) self.ibd = load_sparse_csr(ibd_path) if ibd_path else None self.he = None self.results = None self.informative_indices = self._informative_indices()
def _load_ibd(self, ibd_path): """ Loads in IBD matrix :param ibd_path: Path to IBD file """ if ibd_path is None: return None ibd = load_sparse_csr(ibd_path) return ibd[self.entries.to_numpy()][:, self.entries.to_numpy()]
def SciLMM( simulate=False, sample_size=100000, sparsity_factor=0.001, gen_exp=1.4, init_keep_rate=0.8, fam=None, ibd=False, epis=False, dom=False, ibd_path=False, epis_path=False, dom_path=False, gen_y=False, y=None, cov=None, he=False, lmm=False, reml=False, sim_num=100, intercept=False, verbose=False, output_folder=".", remove_cycles=False, check_num_parents=False, ): if ibd or epis or dom: if not os.path.exists(output_folder): raise Exception("The output folder does not exists") if he or lmm: if y is None and gen_y is False: raise Exception("Can't estimate without a target value (--y)") rel, interest_in_relevant = None, None if fam: rel_org, sex, interest, entries_dict = read_fam(fam_file_path=fam) rel, interest_in_relevant = organize_rel( rel_org, interest, remove_cycles=remove_cycles, check_num_parents=check_num_parents, ) # TODO: have to do sex as well in this version entries_list = np.array(list(entries_dict.values()))[ interest_in_relevant ] np.save(os.path.join(output_folder, "entries_ids.npy"), entries_list) elif simulate: if sample_size <= 0: raise Exception("Sample size should be a positive number") if (sparsity_factor <= 0) or (sparsity_factor >= 1): raise Exception("Sparsity factor is within the range (0, 1)") if gen_exp <= 0: raise Exception("gen_exp is a positive number") if (init_keep_rate <= 0) or (init_keep_rate > 1): raise Exception("init_keep_rate is within the range (0, 1)") rel, sex, _ = simulate_tree( sample_size, sparsity_factor, gen_exp, init_keep_rate ) write_fam(os.path.join(output_folder, "rel.fam"), rel, sex, None) # if no subset of interest has been specified, keep all indices if interest_in_relevant is None: interest_in_relevant = np.ones((rel.shape[0])).astype(np.bool) if ibd_path: ibd = load_sparse_csr(os.path.join(output_folder, "IBD.npz")) elif ibd: if rel is None: raise Exception("No relationship matrix given") ibd, L, D = simple_numerator(rel) # keep the original L and D because they are useless otherwise save_sparse_csr(os.path.join(output_folder, "IBD.npz"), ibd) save_sparse_csr(os.path.join(output_folder, "L.npz"), L) save_sparse_csr(os.path.join(output_folder, "D.npz"), D) else: ibd = None if epis_path: epis = load_sparse_csr(os.path.join(output_folder, "Epistasis.npz")) elif epis: if ibd is None: raise Exception("Pairwise-epistasis requires an ibd matrix") epis = pairwise_epistasis(ibd) save_sparse_csr(os.path.join(output_folder, "Epistasis.npz"), epis) else: epis = None if dom_path: dom = load_sparse_csr(os.path.join(output_folder, "Dominance.npz")) elif dom: if ibd is None or rel is None: raise Exception( "Dominance requires both an ibd matrix and a relationship matrix" ) dom = dominance(rel, ibd) save_sparse_csr(os.path.join(output_folder, "Dominance.npz"), dom) else: dom = None covariance_matrices = [] for mat in [ibd, epis, dom]: if mat is not None: covariance_matrices.append(mat) if cov is not None: cov = np.hstack((cov, np.load(cov))) else: cov = sex[:, np.newaxis] y = None if gen_y: sigs = np.random.rand(len(covariance_matrices) + 1) sigs /= sigs.sum() fe = np.random.rand(cov.shape[1] + intercept) / 100 print( "Generating y with fixed effects: {} and sigmas : {}".format( fe, sigs ) ) y = simulate_phenotype(covariance_matrices, cov, sigs, fe, intercept) np.save(os.path.join(output_folder, "y.npy"), y) if y is not None: y = np.load(y) if he: print(compute_HE(y, cov, covariance_matrices, intercept)) if lmm: print( LMM( SparseCholesky(), covariance_matrices, cov, y, with_intercept=intercept, reml=reml, sim_num=sim_num, ) )
help='Gen size = gen_exp X prev gen size') parser.add_argument( '--init_keep_rate', dest='init_keep_rate', type=float, default=0.8, help='1 - number of edges to remove before iteration begins') parser.add_argument('--save_folder', dest='save_folder', type=str, default='.', help='which folder it should save the output to.') args = parser.parse_args() if args.sample_size <= 0: raise Exception("Sample size should be a positive number") if (args.sparsity_factor <= 0) or (args.sparsity_factor >= 1): raise Exception("Sparsity factor is within the range (0, 1)") if args.gen_exp <= 0: raise Exception("gen_exp is a positive number") if (args.init_keep_rate <= 0) or (args.init_keep_rate > 1): raise Exception("init_keep_rate is within the range (0, 1)") rel = load_sparse_csr('rel.npz') print(count_IBD_nonzero(rel)) # rel, sex, gen_ind = simulate_tree(args.sample_size, args.sparsity_factor, args.gen_exp, args.init_keep_rate) # save_sparse_csr(os.path.join(args.save_folder, 'rel.npz'), rel) # np.save(os.path.join(args.save_folder, 'sex.npy'), sex) # np.save(os.path.join(args.save_folder, 'gen_ind.npy'), gen_ind)
dest="init_keep_rate", type=float, default=0.8, help="1 - number of edges to remove before iteration begins", ) parser.add_argument( "--save_folder", dest="save_folder", type=str, default=".", help="which folder it should save the output to.", ) args = parser.parse_args() if args.sample_size <= 0: raise Exception("Sample size should be a positive number") if (args.sparsity_factor <= 0) or (args.sparsity_factor >= 1): raise Exception("Sparsity factor is within the range (0, 1)") if args.gen_exp <= 0: raise Exception("gen_exp is a positive number") if (args.init_keep_rate <= 0) or (args.init_keep_rate > 1): raise Exception("init_keep_rate is within the range (0, 1)") rel = load_sparse_csr("rel.npz") print(count_IBD_nonzero(rel)) # rel, sex, gen_ind = simulate_tree(args.sample_size, args.sparsity_factor, args.gen_exp, args.init_keep_rate) # save_sparse_csr(os.path.join(args.save_folder, 'rel.npz'), rel) # np.save(os.path.join(args.save_folder, 'sex.npy'), sex) # np.save(os.path.join(args.save_folder, 'gen_ind.npy'), gen_ind)