def too_slow_test_peertopeer(self): logging.info("test_peertopeer") output_file = self.file_name("peertopeer") def id_and_path_function(): from pysnptools.util.filecache import ip_address_pid ip_pid = ip_address_pid() #Need to put the 'cache_top' here explicitly. return ip_pid, 'peertopeer/{0}'.format(ip_pid) storage = PeerToPeer(common_directory='peertopeer/common', id_and_path_function=id_and_path_function) test_snps_cache = storage.join('test_snps') test_snps_cache.rmtree() test_snps = DistributedBed.write(test_snps_cache, self.bed, piece_per_chrom_count=2) runner = LocalMultiProc( taskcount=5) #Run on 5 additional Python processes for clear_cache in (True, False): if clear_cache: storage.join('cache').rmtree() results_df = single_snp_scale(test_snps=test_snps, pheno=self.phen_fn, covar=self.cov_fn, cache=storage.join('cache'), output_file_name=output_file, runner=runner) self.compare_files(results_df, "old")
def test_one_chrom(self): logging.info("test_one_chrom") output_file = self.file_name("one_chrom") storage = LocalCache("local_cache/one_chrom") test_storage = storage.join('test_snps') test_storage.rmtree('') test_snps3 = self.bed[:, self.bed.pos[:, 0] == 3] # Test only on chromosome 3 test_snps3_dist = DistributedBed.write(test_storage, test_snps3, piece_per_chrom_count=2) for test_snps, ref, clear_cache, name in ( (test_snps3, "old_one", True, "Run with just chrom3"), (test_snps3_dist, "old_one", True, "Run with distributed test SNPs"), (test_snps3, "old_one", False, "Run with just chrom3 (use cache)"), (test_snps3_dist, "old_one", False, "Run with distributed test SNPs (use cache)"), ): logging.info("=========== " + name + " ===========") results_df = single_snp_scale( test_snps=test_snps, pheno=self.phen_fn, covar=self.cov_fn, K0=self.bed, cache=self._cache_dict(storage, clear_cache=clear_cache), output_file_name=output_file, ) self.compare_files(results_df, ref)
def test_local_distribute(self): logging.info("test_local_distribute") force_python_only = False output_file = self.file_name("local_distribute") storage = LocalCache("local_cache/local_distribute") test_storage = storage.join('test_snps') test_storage.rmtree('') test_snps = DistributedBed.write(test_storage, self.bed, piece_per_chrom_count=2) results_df = single_snp_scale(test_snps=test_snps, pheno=self.phen_fn, covar=self.cov_fn, G0=self.bed, cache=self._cache_dict(storage, clear_cache=True), output_file_name=output_file, force_python_only=force_python_only) self.compare_files(results_df, "old") results_df = single_snp_scale(test_snps=self.bed, pheno=self.phen_fn, covar=self.cov_fn, G0=self.bed, cache=self._cache_dict( storage, clear_cache=False), output_file_name=output_file) self.compare_files(results_df, "old")
def snpsA(seed, iid_count, sid_count, use_distributed): import numpy as np from pysnptools.snpreader import Bed from pysnptools.snpreader import DistributedBed from pysnptools.snpreader import SnpGen chrom_count = 10 global top_cache if use_distributed: test_snp_path = ( cache_top / f"snpsA_{seed}_{chrom_count}_{iid_count}_{sid_count}_db") else: test_snp_path = ( cache_top / f"snpsA_{seed}_{chrom_count}_{iid_count}_{sid_count}.bed") count_A1 = False if not test_snp_path.exists(): snpgen = SnpGen( seed=seed, iid_count=iid_count, sid_count=sid_count, chrom_count=chrom_count, block_size=1000, ) if use_distributed: test_snps = DistributedBed.write(str(test_snp_path), snpgen) else: test_snps = Bed.write(str(test_snp_path), snpgen.read(dtype="float32"), count_A1=count_A1) else: if use_distributed: test_snps = DistributedBed(str(test_snp_path)) else: test_snps = Bed(str(test_snp_path), count_A1=count_A1) from pysnptools.snpreader import SnpData np.random.seed(seed) pheno = SnpData( iid=test_snps.iid, sid=["pheno"], val=np.random.randn(test_snps.iid_count, 1) * 3 + 2, ) covar = SnpData( iid=test_snps.iid, sid=["covar1", "covar2"], val=np.random.randn(test_snps.iid_count, 2) * 2 - 3, ) return test_snps, pheno, covar
def test_one_fast(self): logging.info("test_one_fast") output_file = self.file_name("one_fast") storage = LocalCache("local_cache") test_storage = storage.join('one_fast') test_storage.rmtree() test_snps3 = self.bed[:, self.bed.pos[:, 0] == 3] # Test only on chromosome 3 test_snps3_dist = DistributedBed.write(test_storage, test_snps3, piece_per_chrom_count=2) results_df = single_snp_scale(test_snps=test_snps3_dist, pheno=self.phen_fn, covar=self.cov_fn, G0=self.bed, output_file_name=output_file) self.compare_files(results_df, "old_one")
def test1(self): logging.info("in TestDistributedBed test1") from pysnptools.snpreader import SnpGen, DistributedBed snpgen = SnpGen(seed=0, iid_count=100, sid_count=100) temp_dir = 'tempdir/distributed_bed_test1' if os.path.exists(temp_dir): shutil.rmtree(temp_dir) distributed_bed = DistributedBed.write(temp_dir, snpgen, piece_per_chrom_count=2) snpdata = distributed_bed.read() ref1 = DistributedBed( os.path.dirname(os.path.realpath(__file__)) + '/../../tests/datasets/distributed_bed_test1').read() assert (snpdata.allclose(ref1, equal_nan=True)) ref2 = Bed(os.path.dirname(os.path.realpath(__file__)) + '/../../tests/datasets/distributed_bed_test1_X', count_A1=False).read() assert (snpdata.allclose(ref2, equal_nan=True))
test_suite = unittest.TestSuite([]) test_suite.addTests( unittest.TestLoader().loadTestsFromTestCase(TestDistributedBed)) return test_suite if __name__ == "__main__": import doctest logging.basicConfig(level=logging.INFO) if False: from pysnptools.snpreader import DistributedBed, Bed import shutil directory = 'tempdir/toydataSkip10.distributedbed' if os.path.exists(directory): shutil.rmtree(directory) snpreader = Bed( '../examples/toydata.5chrom.bed', count_A1=False)[:, ::10] # Read every 10 snps from Bed format DistributedBed.write( directory, snpreader, piece_per_chrom_count=5) # Write data in DistributedBed format result = doctest.testmod(optionflags=doctest.ELLIPSIS) assert result.failed == 0, "failed doc test: " + __file__ suites = getTestSuite() r = unittest.TextTestRunner(failfast=True) ret = r.run(suites) assert ret.wasSuccessful()