def test_replacement(self): logging.info("Start test test_replacement") # case 1 out = replace_missing_values.replace_missing(self.TEST_STR_IND, "NA", 0.42, 0.42, " ") data = loadtxt(self.TEST_042) assert data.shape == out.shape for i in range(data.shape[0]): assert tools.correlation( data[i, :], out[i, :], 1e-2 ) #-2 because we fist generated data with float64 then we changed it to use float32 # case 2 out = replace_missing_values.replace_missing(self.TEST_FLOAT_IND, "9.0", 0.5, 0.5, " ") data = loadtxt(self.TEST_05) assert data.shape == out.shape for i in range(data.shape[0]): assert tools.correlation(data[i, :], out[i, :], 1e-12) #case 3 data = loadtxt(self.TEST_042_05) out = replace_missing_values.replace_missing(self.TEST_FLOAT_IND, 9, 0.42, 0.5, " ") assert data.shape == out.shape for i in range(data.shape[0]): assert tools.correlation(data[i, :], out[i, :], 1e-12) logging.info("PASS")
def _test_regress_out(self): """ check linear regression (y-x) """ logging.info("Testing linear regression: regress_out") y = loadtxt(self.LIN_REG_Y) x = loadtxt(self.LIN_REG_X) orig_residuals = loadtxt(self.LIN_REG_RESIDUALS) if y.ndim != 1: raise ("TEST WASNT IMPLEMENTED") return # test 1 dim residuals = LinearRegression.regress_out(y, x) y_2dim = y.reshape(-1, 1) residuals2 = LinearRegression.regress_out(y_2dim, x) assert tools.correlation(orig_residuals, residuals) assert tools.correlation(residuals2, residuals) assert len(residuals) == len(x) # test 2 dim y2 = column_stack((y, y)) residuals = LinearRegression.regress_out(y2, x) for i in range(len(y2[0])): assert tools.correlation(orig_residuals, residuals[:, i]) assert len(residuals[:, i]) == len(x) logging.info("PASS")
def test_impute(self): logging.info("Testing test_impute...") imputed = loadtxt(self.MANUALLY_IMPUTED_FILE) self.module.impute(self.MIN_SCORE, self.SNPS_FILE, self.GENO_FILE, self.IND_FILE, self.MIN_MISSING_VALUES) assert (imputed.shape[0] == self.module.site_imputation.shape[0]) assert (imputed.shape[1] == self.module.site_imputation.shape[1]) for i in range(imputed.shape[1]): assert (tools.correlation(imputed[:, i], self.module.site_imputation[:, i])) for i in range(imputed.shape[0]): assert (tools.correlation(imputed[i, :], self.module.site_imputation[i, :])) logging.info("PASS")
def test_euclidean_distance(self): logging.info("Testing euclidean distance...") euc_dist = loadtxt(self.EUC_DIST) out = tools.euclidean_distance(self.meth_data.data[:500,:].transpose(), self.meth_data.data[500:,:].transpose()) assert tests_tools.correlation(out, euc_dist) logging.info("PASS")
def test_exclude_sites_with_low_mean(self): logging.info("Testing excluded mean below 0.5...") data = self.meth_data.copy() data.exclude_sites_with_low_mean(self.MIN_MEAN_TH) res = loadtxt(self.FAKE_DATA_MIN_MEANS) for i in range(res.shape[0]): assert tools.correlation(res[i, :], data.data[i, :], 1e-14) logging.info("PASS")
def test_remove_lowest_std_sites(): logging.info("Testing stdth again...") data_after_std = methylation_data.MethylationDataLoader( datafile=DATA_STDTH) data_after_std.remove_lowest_std_sites(0.02) data = loadtxt(STDTH_RES) for i in range(data.shape[0]): assert tools.correlation(data[i, :], data_after_std.data[i, :], 1e-12) logging.info("PASS")
def test_low_rank_approx(self): logging.info("Testing low rank approximation") low_rank_approx = loadtxt(self.LOW_RANK_APPROX) lra_met_data = self.meth_data.copy() res = tools.low_rank_approximation(lra_met_data.data.transpose(), self.K) res = res.transpose() for i in range(lra_met_data.sites_size): assert tests_tools.correlation(res[i,:], low_rank_approx[i,:]) logging.info("PASS")
def __init__(self): logging.info("Testing Started on PCATester") pca_res_p = loadtxt(self.PCA_P_RES) meth_data = methylation_data.MethylationDataLoader( datafile=self.DATA_FILE) pca_out = pca.PCA(meth_data.data.transpose()) for i in range(10): assert tools.correlation(pca_out.P[:, i], pca_res_p[:, i]) logging.info("PASS") logging.info("Testing Finished on PCATester")
def test_components(self): logging.info("Testing houseman components...") comp = loadtxt(self.OUT_COMP) module = houseman.Houseman( self.meth_data, open(houseman_parser.HOUSEMAN_DEFAULT_REFERENCE, 'r')) module.components assert module.components.shape == comp.shape for i in range(module.components.shape[1]): assert tools.correlation(module.components[:, i], comp[:, i], 1e-2) logging.info("PASS")
def test_remove_covariates(self): logging.info("Testing removing covariates...") covar_meth_data = self.meth_data.copy() covar_meth_data.regress_out( self.meth_data.covar) # regress out all covariates coavr, covarnames = covar_meth_data._load_and_validate_covar( [self.DEMO_COVAR], covar_meth_data.samples_size, covar_meth_data.samples_ids) # remove "manually" for i, site in enumerate(self.meth_data.data): residuals = LinearRegression.regress_out(site, coavr) assert len(residuals) == len(site) assert tools.correlation(residuals, covar_meth_data.data[i]) logging.info("PASS")
def test_senario1(self): logging.info("Testing clean refactor components...") refactor_meth_data = self.meth_data.copy() comp = loadtxt(self.RES1) ranked = loadtxt(self.RES1_RANKED, dtype=str) module = refactor.Refactor(methylation_data=refactor_meth_data, k=5, t=500, stdth=0, bad_probes_list=self.bad_probes, use_phenos=None, use_covars=None) module.run() assert self._validate_ranked_lists(module, ranked) assert module.components.shape == comp.shape for i in range(module.components.shape[1]): assert tools.correlation(module.components[:, i], comp[:, i]) logging.info("PASS")
def test_senario3(self): logging.info("Testing controls feature selection...") refactor_meth_data = self.meth_data.copy() comp = loadtxt(self.RES3) ranked = loadtxt(self.RES3_RANKED, dtype=str) module = refactor.Refactor(methylation_data=refactor_meth_data, k=5, t=500, stdth=0, bad_probes_list=self.bad_probes, feature_selection='controls', use_phenos=['p2'], use_covars=[]) module.run() assert self._validate_ranked_lists(module, ranked) assert module.components.shape == comp.shape for i in range(module.components.shape[1]): assert tools.correlation(module.components[:, i], comp[:, i]) logging.info("PASS")
def test_get_mean_per_site(self): logging.info("Testing mean oer site...") meth_data_means = self.meth_data.get_mean_per_site() means = loadtxt(self.FAKE_DATA_MEANS) assert tools.correlation(means, meth_data_means) logging.info("PASS")