def test_exclude_bad_probes(self): logging.info("Testing removing bad probes...") probes_meth_data = self.meth_data.copy() data_no_bad_probes = methylation_data.MethylationDataLoader( datafile=self.DEMO_DATA_NO_BAD_PROBES) bad_probes = load(open(self.BAD_PROBES, 'r')) module = refactor.Refactor(methylation_data=probes_meth_data, k=5, bad_probes_list=bad_probes) module._exclude_bad_probes() assert array_equal(data_no_bad_probes.data, module.meth_data.data) # tests sites list has changed remove_count = len(bad_probes) orig_sites_before = [] orig_sites_before.extend(self.meth_data.cpgnames) orig_sites_after = [] orig_sites_after.extend(module.meth_data.cpgnames) for i in bad_probes: try: orig_sites_before.remove(i) except: remove_count -= 1 assert orig_sites_after == orig_sites_before # test sites size assert self.meth_data.sites_size - remove_count == module.meth_data.sites_size logging.info("PASS")
def test_low_rank_approx_distances(self): """ tests that number of distances is as the number of sites (distance for every site) """ logging.info("Testing low rank approx distances...") dis_meth_data = self.meth_data.copy() module = refactor.Refactor(methylation_data=dis_meth_data, k=5) distances = module._calc_low_rank_approx_distances(dis_meth_data) assert distances.size == dis_meth_data.sites_size, "there must be distances as the number of sites" logging.info("PASS")
def test_senario1(self): logging.info("Testing clean refactor components...") refactor_meth_data = self.meth_data.copy() comp = loadtxt(self.RES1) ranked = loadtxt(self.RES1_RANKED, dtype=str) module = refactor.Refactor(methylation_data=refactor_meth_data, k=5, t=500, stdth=0, bad_probes_list=self.bad_probes, use_phenos=None, use_covars=None) module.run() assert self._validate_ranked_lists(module, ranked) assert module.components.shape == comp.shape for i in range(module.components.shape[1]): assert tools.correlation(module.components[:, i], comp[:, i]) logging.info("PASS")
def test_senario3(self): logging.info("Testing controls feature selection...") refactor_meth_data = self.meth_data.copy() comp = loadtxt(self.RES3) ranked = loadtxt(self.RES3_RANKED, dtype=str) module = refactor.Refactor(methylation_data=refactor_meth_data, k=5, t=500, stdth=0, bad_probes_list=self.bad_probes, feature_selection='controls', use_phenos=['p2'], use_covars=[]) module.run() assert self._validate_ranked_lists(module, ranked) assert module.components.shape == comp.shape for i in range(module.components.shape[1]): assert tools.correlation(module.components[:, i], comp[:, i]) logging.info("PASS")
def run(self, args, meth_data, output_perfix = None): try: if args.pheno is not None and meth_data.phenotype is None: common.terminate("There is no phenotype in the data, use --phenofile to add phenotype.") if not output_perfix: output_perfix = "output" bad_probes_list = set() [bad_probes_list.update(loadtxt(probes_file, dtype=str)) for probes_file in BAD_PROBES_FILES] bad_probes_list = array(list(bad_probes_list)) self.module = refactor.Refactor(methylation_data = meth_data, k = args.k, t = args.t, stdth = args.stdth, feature_selection = args.fs.lower().strip(), num_components = args.numcomp, use_covars = args.covar, use_phenos = args.pheno, bad_probes_list = bad_probes_list, ranked_output_filename = output_perfix + "." + refactor.RANKED_FILENAME, components_output_filename = output_perfix + "." + refactor.COMPONENTS_FILENAME) self.module.run() except Exception : logging.exception("in refactor") raise