def download_precomputed_taxonomy(self): """ Copy the online TSV taxonomy DB to the pathname output """ Logger.instance().debug( "file: {}; line: {}; Downloading taxonomy tsv".format( __file__, inspect.currentframe().f_lineno, )) ############################################################################################ # # Download sorted reads dataset # ############################################################################################ taxonomy_tsv_gz_path = '{}.gz'.format(self.taxonomy_tsv_path) # Test first in local dir, otherwise in the remote URLs if not os.path.isfile(self.taxonomy_tsv_path) or pathlib.Path( self.taxonomy_tsv_path).stat().st_size < 1000000: try: # urllib.request.urlretrieve(taxonomy_tsv_gz_url1, taxonomy_tsv_gz_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(taxonomy_tsv_gz_url1)) urllib.request.urlretrieve(taxonomy_tsv_gz_url1, taxonomy_tsv_gz_path, reporthook=tqdm_hook(t)) except Exception: try: # urllib.request.urlretrieve(taxonomy_tsv_gz_url2, taxonomy_tsv_gz_path, # MyProgressBar()) with tqdm(...) as t: t.set_description( os.path.basename(taxonomy_tsv_gz_url2)) urllib.request.urlretrieve(taxonomy_tsv_gz_url2, taxonomy_tsv_gz_path, reporthook=tqdm_hook(t)) except Exception: # urllib.request.urlretrieve(taxonomy_tsv_gz_url3, taxonomy_tsv_gz_path, # MyProgressBar()) with tqdm(...) as t: t.set_description( os.path.basename(taxonomy_tsv_gz_url1)) urllib.request.urlretrieve(taxonomy_tsv_gz_url3, taxonomy_tsv_gz_path, reporthook=tqdm_hook(t)) with gzip.open(taxonomy_tsv_gz_path, 'rb') as fin: with open(self.taxonomy_tsv_path, 'wb') as fout: shutil.copyfileobj(fin, fout) try: pathlib.Path(taxonomy_tsv_gz_path).unlink() except FileNotFoundError: pass
def setUpClass(cls): cmd = '{} -m pip install . -q --upgrade --use-feature=in-tree-build'.format( sys.executable) if sys.platform.startswith("win"): args = cmd else: args = shlex.split(cmd) subprocess.run(args=args, cwd=PathManager.get_project_path()) cls.package_path = os.path.join(PathManager.get_package_path()) cls.test_path = os.path.join(PathManager.get_test_path()) cls.outdir_path = os.path.join(cls.test_path, 'outdir') cls.outdir_data_path = os.path.join(cls.outdir_path, 'data') shutil.rmtree(cls.outdir_path, ignore_errors=True) pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True) ############################################################################################ # # Download sorted reads dataset # ############################################################################################ sorted_tar_path = os.path.join(cls.outdir_data_path, "sorted.tar.gz") pathlib.Path(os.path.dirname(sorted_tar_path)).mkdir(parents=True, exist_ok=True) # Test first in local dir, otherwise in the remote URLs if not os.path.isfile(sorted_tar_path) or pathlib.Path( sorted_tar_path).stat().st_size < 1000000: try: # urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, reporthook=tqdm_hook(t)) except Exception: try: # urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, reporthook=tqdm_hook(t)) except Exception: # urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, reporthook=tqdm_hook(t)) tar = tarfile.open(sorted_tar_path, "r:gz") tar.extractall(path=cls.outdir_data_path) tar.close()
def setUpClass(cls): ######################################################################## # # These tests need the vtam command in the path # ######################################################################## pip_install_vtam_for_tests() cls.package_path = os.path.join(PathManager.get_package_path()) cls.test_path = os.path.join(PathManager.get_test_path()) cls.outdir_path = os.path.join(cls.test_path, 'outdir') cls.outdir_data_path = os.path.join(cls.outdir_path, 'data') shutil.rmtree(cls.outdir_path, ignore_errors=True) pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True) ############################################################################################ # # Download sorted reads dataset (Updated Oct 10, 2020) # ############################################################################################ sorted_tar_path = os.path.join(cls.outdir_data_path, "sorted.tar.gz") # Test first in local dir, otherwise in the remote URLs if not os.path.isfile(sorted_tar_path) or pathlib.Path( sorted_tar_path).stat().st_size < 1000000: try: # urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, reporthook=tqdm_hook(t)) except Exception: try: # urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, reporthook=tqdm_hook(t)) except Exception: # urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, reporthook=tqdm_hook(t)) tar = tarfile.open(sorted_tar_path, "r:gz") tar.extractall(path=cls.outdir_data_path) tar.close()
def __download_ncbi_taxonomy_dump(self): # Download files remotefile = "ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz" new_taxdump_path = os.path.join(self.tempdir, os.path.basename(remotefile)) Logger.instance().debug( "file: {}; line: {}; Downloading NCBI taxonomy dump".format( __file__, inspect.currentframe().f_lineno)) if not os.path.isfile(new_taxdump_path): Logger.instance().info("Downloading NCBI taxonomy dump") # urllib.request.urlretrieve(remotefile, new_taxdump_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(new_taxdump_path)) urllib.request.urlretrieve(remotefile, new_taxdump_path, reporthook=tqdm_hook(t)) return new_taxdump_path
def setUpClass(cls): ######################################################################## # # These tests need the vtam command in the path # ######################################################################## pip_install_vtam_for_tests() # vtam needs to be in the path cls.package_path = PathManager.get_package_path() cls.test_path = PathManager.get_test_path() cls.outdir_path = os.path.join(cls.test_path, 'outdir') shutil.rmtree(cls.outdir_path, ignore_errors=True) cls.outdir_data_path = os.path.join(cls.outdir_path, 'data') pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True) cls.outdir_download_path = os.path.join(cls.test_path, 'outdir_download') pathlib.Path(cls.outdir_download_path).mkdir(parents=True, exist_ok=True) cls.snakefile_tuto_data = os.path.join( cls.package_path, "data/snake.tuto.data_makeknownoccurrences.yml") ############################################################################################ # # Set command args # ############################################################################################ cls.args = {} cls.args['package_path'] = cls.package_path cls.args['snake_tuto_data'] = cls.snakefile_tuto_data ############################################################################################ # # Download fastq test dataset # ############################################################################################ fastq_tar_path = os.path.join(cls.outdir_download_path, "fastq.tar.gz") # Test first in local dir, otherwise in the remote URLs if not os.path.isfile(fastq_tar_path) or pathlib.Path( fastq_tar_path).stat().st_size < 1000000: try: # urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, reporthook=tqdm_hook(t)) except Exception: try: # urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, reporthook=tqdm_hook(t)) except Exception: # urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, reporthook=tqdm_hook(t)) tar = tarfile.open(fastq_tar_path, "r:gz") tar.extractall(path=cls.outdir_path) tar.close() ############################################################################################ # # Copy data to directory tree # ############################################################################################ cmd = "snakemake --cores 1 -s {snake_tuto_data} --config MARKER=mfzr " \ "PROJECT=asper1 PACKAGE_PATH={package_path} --until all_one_marker_makeknownoccurrences".format(**cls.args) if sys.platform.startswith("win"): args = cmd else: args = shlex.split(cmd) subprocess.run(args=args, check=True, cwd=cls.outdir_path)
def setUp(self): # vtam needs to be in the tsv_path cmd = '{} -m pip install . -q --upgrade --use-feature=in-tree-build'.format( sys.executable) if sys.platform.startswith("win"): args = cmd else: args = shlex.split(cmd) subprocess.run(args=args, cwd=PathManager.get_project_path()) self.package_path = os.path.join(PathManager.get_package_path()) self.test_path = os.path.join(PathManager.get_test_path()) self.outdir_path = os.path.join(self.test_path, 'outdir') self.outdir_data_path = os.path.join(self.outdir_path, 'data') # during development of the test, this prevents errors shutil.rmtree(self.outdir_path, ignore_errors=True) pathlib.Path(self.outdir_data_path).mkdir(parents=True, exist_ok=True) os.environ['VTAM_LOG_VERBOSITY'] = str(10) ############################################################################################ # # Download sorted fasta test dataset # ############################################################################################ sorted_tar_path = os.path.join(self.outdir_data_path, "sorted.tar.gz") # Test first in local dir, otherwise in the remote URLs if not os.path.isfile(sorted_tar_path) or pathlib.Path( sorted_tar_path).stat().st_size < 1000000: try: # urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, reporthook=tqdm_hook(t)) except Exception: try: # urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, reporthook=tqdm_hook(t)) except Exception: # urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, reporthook=tqdm_hook(t)) tar = tarfile.open(sorted_tar_path, "r:gz") tar.extractall(path=self.outdir_path) tar.close() ############################################################################################ # # Paths # ############################################################################################ self.asvtable_path = os.path.join(self.outdir_path, "asvtable_default.tsv") self.args = {} self.args['sortedinfo'] = os.path.join(os.path.dirname(__file__), "sortedinfo.tsv") self.args['params'] = os.path.join(os.path.dirname(__file__), "params_min_replicate_number1.yml") self.args['params_lfn_variant'] = os.path.join( os.path.dirname(__file__), "params_lfn_variant.yml") self.args['params_lfn_variant_replicate'] = os.path.join( os.path.dirname(__file__), "params_lfn_variant_replicate.yml")
def setUpClass(cls): pip_install_vtam_for_tests() # vtam needs to be in the path cls.package_path = PathManager.get_package_path() cls.test_path = os.path.join(PathManager.get_test_path()) cls.outdir_path = os.path.join(cls.test_path, 'outdir') cls.outdir_data_path = os.path.join(cls.outdir_path, 'data') shutil.rmtree(cls.outdir_path, ignore_errors=True ) # during development of the test, this prevents errors pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True) ############################################################################################ # # Download fastq test dataset # ############################################################################################ fastq_tar_path = os.path.join(cls.outdir_data_path, "fastq.tar.gz") # Test first in local dir, otherwise in the remote URLs if not os.path.isfile(fastq_tar_path) or pathlib.Path( fastq_tar_path).stat().st_size < 1000000: try: # urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, reporthook=tqdm_hook(t)) except Exception: try: # urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, reporthook=tqdm_hook(t)) except Exception: # urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, reporthook=tqdm_hook(t)) tar = tarfile.open(fastq_tar_path, "r:gz") tar.extractall(path=cls.outdir_path) tar.close() # Set test paths cls.fastqinfo_path = os.path.join(PathManager.get_package_path(), "data/example/fastqinfo.tsv") cls.fastqdir_path = os.path.join(cls.outdir_path, "fastq") cls.fastainfo_path = os.path.join(cls.outdir_path, "fastainfo.tsv") cls.fastadir_path = os.path.join(cls.outdir_path, "merged") cls.sorted_dir_path = os.path.join(cls.outdir_path, "sorted") cls.sortedinfo_path = os.path.join(cls.sorted_dir_path, "sortedinfo.tsv") cls.log_path = os.path.join(cls.outdir_path, "vtam.log") cls.asvtable_path = os.path.join(cls.outdir_path, "asvtable_default.tsv") cls.args = {} cls.args['fastqinfo'] = cls.fastqinfo_path cls.args['fastqdir'] = cls.fastqdir_path cls.args['fastainfo'] = cls.fastainfo_path cls.args['fastadir'] = cls.fastadir_path cls.args['sorted'] = cls.sorted_dir_path cls.args['db'] = os.path.join(cls.outdir_path, "db.sqlite") cls.args['sortedinfo'] = cls.sortedinfo_path cls.args['sorteddir'] = cls.sorted_dir_path cls.args['asvtable'] = cls.asvtable_path cls.args['log'] = cls.log_path ################################################################################################################ # # Command Merge # ################################################################################################################ cmd = "vtam merge --fastqinfo {fastqinfo} --fastqdir {fastqdir} --fastainfo {fastainfo} --fastadir {fastadir} " \ "-v --log {log}".format(**cls.args) if sys.platform.startswith("win"): args = cmd else: args = shlex.split(cmd) subprocess.run(args=args)
def main(outdir): package_path = PathManager.get_package_path() pathlib.Path(outdir).mkdir(parents=True, exist_ok=True) ####################################################################### # # Download fastq # ####################################################################### fastq_tar_path = os.path.join(outdir, "fastq.tar.gz") # Test first in local dir, otherwise in the remote URLs if not os.path.isfile(fastq_tar_path) or pathlib.Path(fastq_tar_path).stat().st_size < 1000000: try: # urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, reporthook=tqdm_hook(t)) except Exception: try: # urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, reporthook=tqdm_hook(t)) except Exception: # urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, reporthook=tqdm_hook(t)) tar = tarfile.open(fastq_tar_path, "r:gz") tar.extractall(path=outdir) tar.close() os.remove(fastq_tar_path) ####################################################################### # # Set command args # ####################################################################### args = {} args['package_path'] = package_path args['snake_tuto_data'] = os.path.join(package_path, "data/snake.tuto.data.yml") ####################################################################### # # Copy data to directory tree # ####################################################################### cmd = "snakemake --cores 1 -s {snake_tuto_data} --config MARKER=mfzr " \ "PROJECT=asper1 PACKAGE_PATH={package_path} --until all_one_marker".format(**args) if sys.platform.startswith("win"): args = cmd else: args = shlex.split(cmd) subprocess.run(args=args, check=True, cwd=outdir)