def test_genotypes_exact(): """ Test whether genotype knockoffs with true HMM are accurate """ p = 10 K = 3 M = 3 n_train = 1000 n_test = 100000 pInit, Q, pEmit = generate_HMM(p, K, M) modelX = models.HMM(pInit, Q, pEmit) X = modelX.sample(n_train) _, Xfp_file = tempfile.mkstemp() fp.writeXtoInp(X, Xfp_file) fastphase = "fastphase" # Name of fastPhase executable _, out_path = tempfile.mkstemp() fp.runFastPhase(Xfp_file, out_path, fastphase=fastphase, K=5, numit=25) r_file = out_path + "_rhat.txt" alpha_file = out_path + "_alphahat.txt" theta_file = out_path + "_thetahat.txt" char_file = out_path + "_origchars" hmm = fp.loadHMM(r_file, alpha_file, theta_file, char_file) knockoffs = knockoffGenotypes(hmm["r"], hmm["alpha"], hmm["theta"], seed=123) hmm_hat = fp.loadHMM(r_file, alpha_file, theta_file, char_file, compact=False) modelX_hat = models.HMM(hmm_hat["pInit"], hmm_hat["Q"], hmm_hat["pEmit"]) X_new = modelX_hat.sample(n_test) Xk_new = knockoffs.sample(X_new) verify_exchangeability(X_new, Xk_new, tolerance=1e-3)
def test_haplotypes_fastphase(): """ Test whether haplotype knockoffs with HMM fitted by fastPHASE are accurate """ p = 10 K = 3 M = 2 n = 1000 pInit, Q, pEmit = generate_HMM(p, K, M) modelX = models.HMM(pInit, Q, pEmit) X = modelX.sample(n) _, Xfp_file = tempfile.mkstemp() fp.writeXtoInp(X, Xfp_file, phased=True) fastphase = "fastphase" # Name of fastPhase executable _, out_path = tempfile.mkstemp() fp.runFastPhase(Xfp_file, out_path, fastphase=fastphase, phased=True, K=5, numit=25) r_file = out_path + "_rhat.txt" alpha_file = out_path + "_alphahat.txt" theta_file = out_path + "_thetahat.txt" char_file = out_path + "_origchars" hmm = fp.loadHMM(r_file, alpha_file, theta_file, char_file, phased=True) knockoffs = knockoffHaplotypes(hmm["r"], hmm["alpha"], hmm["theta"], seed=123) Xk = knockoffs.sample(X) verify_exchangeability(X, Xk, tolerance=1e-1)
def test_haplotypes_hmm(): """ Test whether specialized haplotype knockoff algorithm agrees with special case """ p = 10 K = 5 M = 2 n_train = 1000 n_test = 100000 pInit, Q, pEmit = generate_HMM(p, K, M) modelX = models.HMM(pInit, Q, pEmit) X = modelX.sample(n_train) _, Xfp_file = tempfile.mkstemp() fp.writeXtoInp(X, Xfp_file, phased=True) fastphase = "fastphase" # Name of fastPhase executable _, out_path = tempfile.mkstemp() fp.runFastPhase(Xfp_file, out_path, fastphase=fastphase, phased=True, K=5, numit=25) r_file = out_path + "_rhat.txt" alpha_file = out_path + "_alphahat.txt" theta_file = out_path + "_thetahat.txt" char_file = out_path + "_origchars" groups = np.repeat(np.arange(p), 3)[:p] hmm_compact = fp.loadHMM(r_file, alpha_file, theta_file, char_file) hmm = fp.loadHMM(r_file, alpha_file, theta_file, char_file, compact=False, phased=True) knockoffs = knockoffHMM(hmm["pInit"], hmm["Q"], hmm["pEmit"], groups=groups, seed=123) knockoffs_hap = knockoffHaplotypes(hmm_compact["r"], hmm_compact["alpha"], hmm_compact["theta"], \ groups=groups, seed=123) hmm_hat = fp.loadHMM(r_file, alpha_file, theta_file, char_file, compact=False, phased=True) Xk = knockoffs.sample(X) Xk_compact = knockoffs_hap.sample(X) assert np.array_equal( Xk, Xk_compact), "Knockoffs with trivial groups do not match"
def test_HMM(): """ Test whether the HMM knockoff generation is correct """ p = 10 K = 4 M = 5 n = 100000 pInit, Q, pEmit = generate_HMM(p, K, M) modelX = models.HMM(pInit, Q, pEmit) X = modelX.sample(n) knockoffs = knockoffHMM(pInit, Q, pEmit, seed=123) Xk = knockoffs.sample(X) verify_exchangeability(X, Xk)
def test_HMM_groups(): """ Test whether the HMM knockoff generation is correct """ p = 10 K = 4 M = 5 n = 100000 pInit, Q, pEmit = generate_HMM(p, K, M) modelX = models.HMM(pInit, Q, pEmit) X = modelX.sample(n) groups = np.repeat(np.arange(p), 3)[:p] knockoffs = knockoffHMM(pInit, Q, pEmit, groups=groups, seed=123) Xk = knockoffs.sample(X) verify_exchangeability(X, Xk, groups=groups)
def __init__(self, num_samples, num_dim, num_hidden_states, num_emission_states=3, **sampler_x_args): self.n_samples = num_samples self.dim = num_dim self.n_hidden_states = num_hidden_states self.n_emission_states = num_emission_states self.parameters = sample_parameters(self.dim, self.n_hidden_states, self.n_emission_states) self.modelX = models.HMM(self.parameters['pInit'], self.parameters['Q'], self.parameters['pEmit']) self.sample()
def test_HMM_basic(): """ Test whether the HMM knockoff generation function does not crash """ p = 50 K = 4 M = 5 n = 100 pInit, Q, pEmit = generate_HMM(p, K, M) modelX = models.HMM(pInit, Q, pEmit) X = modelX.sample(n) knockoffs = knockoffHMM(pInit, Q, pEmit, seed=123) Xk = knockoffs.sample(X) groups = np.arange(p) knockoffs_g = knockoffHMM(pInit, Q, pEmit, groups=groups, seed=123) Xk_g = knockoffs_g.sample(X) assert np.array_equal(Xk, Xk_g), "Knockoffs with trivial groups do not match" assert np.isfinite(Xk).all(), "Knockoffs are not finite"
pEmit = np.zeros((p,M,K)) gamma = np.random.uniform(low=0, high=10, size=p) for j in range(p-1): Q[j,:,:] = np.resize(np.random.uniform(size=K*K),(K,K)) Q[j,:,:] += np.diag([gamma[j]]*K) Q[j,:,:] /= np.sum(Q[j,:,:],1)[:,None] for j in range(p): pEmit[j,:,:] = np.resize(np.random.uniform(size=M*K),(M,K)) pEmit[j,:,:] += np.diag([gamma[j]]*K) pEmit[j,:,:] /= np.sum(pEmit[j,:,:],0) pInit = np.zeros((K,)) pInit[0] = 1 # Sample X n=10000 modelX = models.HMM(pInit, Q, pEmit) X = modelX.sample(n) # Generate the knockoffs knockoffs = knockoffHMM(pInit, Q, pEmit) Xk = knockoffs.sample(X) # Plot paths util.plotPaths(X,Xk) # Compare original variables and knockoffs util.compare_marginals(X,Xk) util.compare_cons_corr(X,Xk) util.compare_cross_corr(X,Xk) util.compare_cross_corr(X,Xk,dist=0)
from SNPknock.fastphase import loadHMM from SNPknock import models from SNPknock import knockoffHMM, knockoffHaplotypes, knockoffGenotypes import util, pdb # Load HMM r_file = "data/haplotypes_rhat.txt" alpha_file = "data/haplotypes_alphahat.txt" theta_file = "data/haplotypes_thetahat.txt" char_file = "data/haplotypes_origchars" hmm = loadHMM(r_file, alpha_file, theta_file, char_file, compact=True) hmm_full = loadHMM(r_file, alpha_file, theta_file, char_file, compact=False) # Sample X n = 10 modelX = models.HMM(hmm_full['pInit'], hmm_full['Q'], hmm_full['pEmit']) X = modelX.sample(n) # Generate the knockoffs #knockoffs = knockoffHMM(hmm_full['pInit'], hmm_full['Q'], hmm_full['pEmit']) #Xk = knockoffs.sample(X) #print("Generated knockoffs") # Generate the knockoffs (genotypes) knockoffs_gen = knockoffGenotypes(hmm['r'], hmm['alpha'], hmm['theta']) Xk = knockoffs_gen.sample(X) print("Generated knockoffs (genotypes)") # Generate the knockoffs (haplotypes) H = X H[X == 2] = 1