def main(outdir): package_path = PathManager.get_package_path() pathlib.Path(outdir).mkdir(parents=True, exist_ok=True) ####################################################################### # # Download fastq # ####################################################################### fastq_tar_path = os.path.join(outdir, "fastq.tar.gz") # Test first in local dir, otherwise in the remote URLs if not os.path.isfile(fastq_tar_path) or pathlib.Path(fastq_tar_path).stat().st_size < 1000000: try: # urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, reporthook=tqdm_hook(t)) except Exception: try: # urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, reporthook=tqdm_hook(t)) except Exception: # urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, reporthook=tqdm_hook(t)) tar = tarfile.open(fastq_tar_path, "r:gz") tar.extractall(path=outdir) tar.close() os.remove(fastq_tar_path) ####################################################################### # # Set command args # ####################################################################### args = {} args['package_path'] = package_path args['snake_tuto_data'] = os.path.join(package_path, "data/snake.tuto.data.yml") ####################################################################### # # Copy data to directory tree # ####################################################################### cmd = "snakemake --cores 1 -s {snake_tuto_data} --config MARKER=mfzr " \ "PROJECT=asper1 PACKAGE_PATH={package_path} --until all_one_marker".format(**args) if sys.platform.startswith("win"): args = cmd else: args = shlex.split(cmd) subprocess.run(args=args, check=True, cwd=outdir)
def setUpClass(cls): cmd = '{} -m pip install . -q --upgrade --use-feature=in-tree-build'.format( sys.executable) if sys.platform.startswith("win"): args = cmd else: args = shlex.split(cmd) subprocess.run(args=args, cwd=PathManager.get_project_path()) cls.package_path = os.path.join(PathManager.get_package_path()) cls.test_path = os.path.join(PathManager.get_test_path()) cls.outdir_path = os.path.join(cls.test_path, 'outdir') cls.outdir_data_path = os.path.join(cls.outdir_path, 'data') shutil.rmtree(cls.outdir_path, ignore_errors=True) pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True) ############################################################################################ # # Download sorted reads dataset # ############################################################################################ sorted_tar_path = os.path.join(cls.outdir_data_path, "sorted.tar.gz") pathlib.Path(os.path.dirname(sorted_tar_path)).mkdir(parents=True, exist_ok=True) # Test first in local dir, otherwise in the remote URLs if not os.path.isfile(sorted_tar_path) or pathlib.Path( sorted_tar_path).stat().st_size < 1000000: try: # urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, reporthook=tqdm_hook(t)) except Exception: try: # urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, reporthook=tqdm_hook(t)) except Exception: # urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, reporthook=tqdm_hook(t)) tar = tarfile.open(sorted_tar_path, "r:gz") tar.extractall(path=cls.outdir_data_path) tar.close()
def setUpClass(cls): ######################################################################## # # These tests need the vtam command in the path # ######################################################################## pip_install_vtam_for_tests() cls.package_path = os.path.join(PathManager.get_package_path()) cls.test_path = os.path.join(PathManager.get_test_path()) cls.outdir_path = os.path.join(cls.test_path, 'outdir') cls.outdir_data_path = os.path.join(cls.outdir_path, 'data') shutil.rmtree(cls.outdir_path, ignore_errors=True) pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True) ############################################################################################ # # Download sorted reads dataset (Updated Oct 10, 2020) # ############################################################################################ sorted_tar_path = os.path.join(cls.outdir_data_path, "sorted.tar.gz") # Test first in local dir, otherwise in the remote URLs if not os.path.isfile(sorted_tar_path) or pathlib.Path( sorted_tar_path).stat().st_size < 1000000: try: # urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, reporthook=tqdm_hook(t)) except Exception: try: # urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, reporthook=tqdm_hook(t)) except Exception: # urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, reporthook=tqdm_hook(t)) tar = tarfile.open(sorted_tar_path, "r:gz") tar.extractall(path=cls.outdir_data_path) tar.close()
def setUp(self): self.package_path = PathManager.get_package_path() test_path = PathManager.get_test_path() # Minimal merge command foopaths = {} foopaths['foofile'] = os.path.relpath(__file__, self.package_path) foopaths['foodir'] = os.path.relpath(os.path.dirname(__file__), self.package_path) foopaths['outdir'] = 'tests/output' foopaths['sortedinfo_tsv'] = "data/example/sortedinfo_mfzr.tsv" foopaths['tsv_path'] = "data/example/sortedinfo_mfzr.tsv" foopaths['known_occurrences'] = "data/example/known_occurrences.tsv" self.foopaths = foopaths
def test_wopmars_runner_filter_with_cutoff_specific(self): cmd = 'filter --sortedinfo {sortedinfo_tsv} --sorteddir {foodir} --asvtable asvtableoutput.tsv' \ ' --cutoff_specific {optimize_lfn_variant_specific}'.format(**self.foopaths) cwd = os.getcwd() os.chdir(self.package_path) args = ArgParser.get_main_arg_parser().parse_args(cmd.split(" ")) os.chdir(cwd) wopmars_runner = RunnerWopmars(command='filter', cli_args_dic=vars(args)) wopfile_path = os.path.relpath( os.path.join(PathManager.get_package_path(), "tests/output/wopfile"), PathManager.get_package_path()) wopfile_path, wopfile_content = wopmars_runner.create_wopfile( path=wopfile_path) self.assertTrue( yaml.load(wopfile_content, Loader=yaml.SafeLoader) ['rule FilterLFN']['params']['lfn_variant_specific_cutoff'] == self.foopaths['optimize_lfn_variant_specific'])
def setUpClass(cls): cls.package_path = PathManager.get_package_path() cls.test_path = PathManager.get_test_path() foopaths = {} foopaths['foofile'] = os.path.relpath(__file__, cls.package_path) foopaths['foodir'] = os.path.relpath(os.path.dirname(__file__), cls.package_path) foopaths['sorteddir'] = 'output' foopaths['sortedinfo_tsv'] = "data/example/sortedinfo_mfzr.tsv" foopaths[ 'optimize_lfn_variant_specific'] = "tests/test_files_dryad.f40v5_small/run1_mfzr_zfzr/optimize_lfn_variant_specific.tsv" cls.foopaths = foopaths cls.minseqlength_value_32 = 32 cls.minseqlength_value_40 = 40 cls.lfn_variant_replicate_cutoff = 0.002
def __init__(self, taxonomy_tsv=None): """ :param taxonomy_tsv: Path to the taxonomy_tsv. Default None :type taxonomy_tsv: str :rtype: None """ if taxonomy_tsv is None: # If None, download to current wdir self.taxonomy_tsv_path = os.path.join(os.getcwd(), "taxonomy.tsv") else: # Download to tsv_path self.taxonomy_tsv_path = taxonomy_tsv pathlib.Path(os.path.dirname(taxonomy_tsv)).mkdir(parents=True, exist_ok=True) self.tempdir = PathManager.instance().get_tempdir() package_path = os.path.join(PathManager.get_package_path()) self.taxonomy_tsv_gz_path = os.path.join(package_path, "..", "data", "taxonomy.tsv.gz")
def setUpClass(cls): ######################################################################## # # These tests need the vtam command in the path # ######################################################################## pip_install_vtam_for_tests() # vtam needs to be in the path cls.package_path = PathManager.get_package_path() cls.test_path = PathManager.get_test_path() cls.outdir_path = os.path.join(cls.test_path, 'outdir') shutil.rmtree(cls.outdir_path, ignore_errors=True) cls.outdir_data_path = os.path.join(cls.outdir_path, 'data') pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True) cls.outdir_download_path = os.path.join(cls.test_path, 'outdir_download') pathlib.Path(cls.outdir_download_path).mkdir(parents=True, exist_ok=True) cls.snakefile_tuto_data = os.path.join( cls.package_path, "data/snake.tuto.data_makeknownoccurrences.yml") ############################################################################################ # # Set command args # ############################################################################################ cls.args = {} cls.args['package_path'] = cls.package_path cls.args['snake_tuto_data'] = cls.snakefile_tuto_data ############################################################################################ # # Download fastq test dataset # ############################################################################################ fastq_tar_path = os.path.join(cls.outdir_download_path, "fastq.tar.gz") # Test first in local dir, otherwise in the remote URLs if not os.path.isfile(fastq_tar_path) or pathlib.Path( fastq_tar_path).stat().st_size < 1000000: try: # urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, reporthook=tqdm_hook(t)) except Exception: try: # urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, reporthook=tqdm_hook(t)) except Exception: # urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, reporthook=tqdm_hook(t)) tar = tarfile.open(fastq_tar_path, "r:gz") tar.extractall(path=cls.outdir_path) tar.close() ############################################################################################ # # Copy data to directory tree # ############################################################################################ cmd = "snakemake --cores 1 -s {snake_tuto_data} --config MARKER=mfzr " \ "PROJECT=asper1 PACKAGE_PATH={package_path} --until all_one_marker_makeknownoccurrences".format(**cls.args) if sys.platform.startswith("win"): args = cmd else: args = shlex.split(cmd) subprocess.run(args=args, check=True, cwd=cls.outdir_path)
def setUp(self): pip_install_vtam_for_tests() self.test_path = PathManager.get_test_path() self.package_path = PathManager.get_package_path() self.outdir_path = os.path.join(self.test_path, 'outdir') shutil.rmtree(self.outdir_path, ignore_errors=True) pathlib.Path(self.outdir_path).mkdir(parents=True, exist_ok=True) self.args = {} self.args['runmarker'] = os.path.join(self.package_path, "data", "example", "pool_run_marker.tsv") self.args['db'] = os.path.join(self.outdir_path, "db.sqlite") ############################################################################################ # # Init DB # ############################################################################################ filter_codon_stop_path = os.path.join(self.test_path, "test_files_dryad.f40v5_small", "run1_mfzr_zfzr", "filter_codon_stop.tsv") variant_path = os.path.join(self.test_path, "test_files_dryad.f40v5_small", "run1_mfzr_zfzr", "variant_filter_codon_stop.tsv") sample_information_path = os.path.join(self.test_path, "test_files_dryad.f40v5_small", "run1_mfzr_zfzr", "sample_information.tsv") self.engine = sqlalchemy.create_engine('sqlite:///{}'.format( self.args['db']), echo=False) sample_information_df = pandas.read_csv(sample_information_path, sep="\t", header=0) sample_information_df.to_sql(name=SampleInformation.__tablename__, con=self.engine.connect(), if_exists='replace') run_df = pandas.DataFrame({'name': ['run1']}, index=range(1, 2)) run_df.to_sql(name=Run.__tablename__, con=self.engine.connect(), index_label='id', if_exists='replace') marker_df = pandas.DataFrame({'name': ['MFZR', 'ZFZR']}, index=range(1, 3)) marker_df.to_sql(name=Marker.__tablename__, con=self.engine.connect(), index_label='id', if_exists='replace') sample_df = pandas.DataFrame( {'name': ['tpos1_run1', 'tnegtag_run1', '14ben01', '14ben02']}, index=range(1, 5)) sample_df.to_sql(name=Sample.__tablename__, con=self.engine.connect(), index_label='id', if_exists='replace') variant_df = pandas.read_csv(variant_path, sep="\t", header=0, index_col='id') variant_df.to_sql(name=Variant.__tablename__, con=self.engine.connect(), index_label='id', if_exists='replace') filter_codon_stop_df = pandas.read_csv(filter_codon_stop_path, sep="\t", header=0) filter_codon_stop_df.to_sql(name=FilterCodonStop.__tablename__, con=self.engine.connect(), if_exists='replace') filter_chimera_borderline_path = os.path.join( self.test_path, "test_files_dryad.f40v5_small", "run1_mfzr_zfzr", "filter_chimera_borderline_and_filter_codon_stop.tsv") filter_chimera_borderline_db = pandas.read_csv( filter_chimera_borderline_path, sep="\t", header=0) filter_chimera_borderline_db.to_sql( name=FilterChimeraBorderline.__tablename__, con=self.engine.connect(), if_exists='replace') self.sample_list = ['tpos1_run1', 'tnegtag_run1', '14ben01', '14ben02']
def setUp(self): # vtam needs to be in the tsv_path cmd = '{} -m pip install . -q --upgrade --use-feature=in-tree-build'.format( sys.executable) if sys.platform.startswith("win"): args = cmd else: args = shlex.split(cmd) subprocess.run(args=args, cwd=PathManager.get_project_path()) self.package_path = os.path.join(PathManager.get_package_path()) self.test_path = os.path.join(PathManager.get_test_path()) self.outdir_path = os.path.join(self.test_path, 'outdir') self.outdir_data_path = os.path.join(self.outdir_path, 'data') # during development of the test, this prevents errors shutil.rmtree(self.outdir_path, ignore_errors=True) pathlib.Path(self.outdir_data_path).mkdir(parents=True, exist_ok=True) os.environ['VTAM_LOG_VERBOSITY'] = str(10) ############################################################################################ # # Download sorted fasta test dataset # ############################################################################################ sorted_tar_path = os.path.join(self.outdir_data_path, "sorted.tar.gz") # Test first in local dir, otherwise in the remote URLs if not os.path.isfile(sorted_tar_path) or pathlib.Path( sorted_tar_path).stat().st_size < 1000000: try: # urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, reporthook=tqdm_hook(t)) except Exception: try: # urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, reporthook=tqdm_hook(t)) except Exception: # urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(sorted_tar_path)) urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, reporthook=tqdm_hook(t)) tar = tarfile.open(sorted_tar_path, "r:gz") tar.extractall(path=self.outdir_path) tar.close() ############################################################################################ # # Paths # ############################################################################################ self.asvtable_path = os.path.join(self.outdir_path, "asvtable_default.tsv") self.args = {} self.args['sortedinfo'] = os.path.join(os.path.dirname(__file__), "sortedinfo.tsv") self.args['params'] = os.path.join(os.path.dirname(__file__), "params_min_replicate_number1.yml") self.args['params_lfn_variant'] = os.path.join( os.path.dirname(__file__), "params_lfn_variant.yml") self.args['params_lfn_variant_replicate'] = os.path.join( os.path.dirname(__file__), "params_lfn_variant_replicate.yml")
def setUpClass(cls): pip_install_vtam_for_tests() # vtam needs to be in the path cls.package_path = PathManager.get_package_path() cls.test_path = os.path.join(PathManager.get_test_path()) cls.outdir_path = os.path.join(cls.test_path, 'outdir') cls.outdir_data_path = os.path.join(cls.outdir_path, 'data') shutil.rmtree(cls.outdir_path, ignore_errors=True ) # during development of the test, this prevents errors pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True) ############################################################################################ # # Download fastq test dataset # ############################################################################################ fastq_tar_path = os.path.join(cls.outdir_data_path, "fastq.tar.gz") # Test first in local dir, otherwise in the remote URLs if not os.path.isfile(fastq_tar_path) or pathlib.Path( fastq_tar_path).stat().st_size < 1000000: try: # urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, reporthook=tqdm_hook(t)) except Exception: try: # urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, reporthook=tqdm_hook(t)) except Exception: # urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, MyProgressBar()) with tqdm(...) as t: t.set_description(os.path.basename(fastq_tar_path)) urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, reporthook=tqdm_hook(t)) tar = tarfile.open(fastq_tar_path, "r:gz") tar.extractall(path=cls.outdir_path) tar.close() # Set test paths cls.fastqinfo_path = os.path.join(PathManager.get_package_path(), "data/example/fastqinfo.tsv") cls.fastqdir_path = os.path.join(cls.outdir_path, "fastq") cls.fastainfo_path = os.path.join(cls.outdir_path, "fastainfo.tsv") cls.fastadir_path = os.path.join(cls.outdir_path, "merged") cls.sorted_dir_path = os.path.join(cls.outdir_path, "sorted") cls.sortedinfo_path = os.path.join(cls.sorted_dir_path, "sortedinfo.tsv") cls.log_path = os.path.join(cls.outdir_path, "vtam.log") cls.asvtable_path = os.path.join(cls.outdir_path, "asvtable_default.tsv") cls.args = {} cls.args['fastqinfo'] = cls.fastqinfo_path cls.args['fastqdir'] = cls.fastqdir_path cls.args['fastainfo'] = cls.fastainfo_path cls.args['fastadir'] = cls.fastadir_path cls.args['sorted'] = cls.sorted_dir_path cls.args['db'] = os.path.join(cls.outdir_path, "db.sqlite") cls.args['sortedinfo'] = cls.sortedinfo_path cls.args['sorteddir'] = cls.sorted_dir_path cls.args['asvtable'] = cls.asvtable_path cls.args['log'] = cls.log_path ################################################################################################################ # # Command Merge # ################################################################################################################ cmd = "vtam merge --fastqinfo {fastqinfo} --fastqdir {fastqdir} --fastainfo {fastainfo} --fastadir {fastadir} " \ "-v --log {log}".format(**cls.args) if sys.platform.startswith("win"): args = cmd else: args = shlex.split(cmd) subprocess.run(args=args)
def setUp(self): self.parser = ArgParser.get_main_arg_parser() package_path = PathManager.get_package_path() test_path = PathManager.get_test_path() self.test_path = test_path outdir_path = os.path.join(test_path, "outdir") self.foopaths = {} self.foopaths['filedoesnotexist'] = "filedoesnotexist" self.foopaths['dirdoesnotexist'] = "dirdoesnotexist" self.foopaths['fileisempty'] = os.path.join(test_path, "test_files", "emptyfile") self.foopaths['filenottsv'] = __file__ self.foopaths['fastainfo_tsv'] = os.path.join( test_path, "data/example/sortedinfo_mfzr.tsv") self.foopaths['sortedinfo_tsv'] = os.path.join( package_path, "data/example/sortedinfo_mfzr.tsv") self.foopaths['sortedinfo_duplicated_sample_names'] = os.path.join( test_path, "test_files", "sortedinfo_mfzr_duplicated_sample_names.tsv") self.foopaths['params_yml'] = os.path.join( package_path, "data/example/params_mfzr.yml") self.foopaths['params_wrong_yml'] = os.path.join( test_path, "test_params_file/params_wrong.yml") self.foopaths['known_occurrences'] = os.path.join( package_path, "data/example/known_occurrences.tsv") self.foopaths['asvtable_tsv'] = os.path.join( test_path, "test_files_dryad.f40v5_small", "run1_mfzr_zfzr/asvtable_default.tsv") self.foopaths['fastqinfo'] = os.path.join(self.test_path, "test_files", "fastqinfo.tsv") self.foopaths['fastqinfo_duplicated_sample_names'] = os.path.join( self.test_path, "test_files", "mergedinfo_duplicated_sample_names.tsv") self.foopaths['fastqdir'] = os.path.join(self.test_path, "test_files", "fastq") self.foopaths['mergedinfo'] = os.path.join(self.test_path, "test_files", "mergedinfo.tsv") self.foopaths['mergedinfo_duplicated_sample_names'] = os.path.join( self.test_path, "test_files", "mergedinfo_duplicated_sample_names.tsv") self.foopaths['mergeddir'] = os.path.join(self.test_path, "test_files", "merged") self.foopaths['runmarker_tsv'] = os.path.join(package_path, "data/example", "pool_run_marker.tsv") self.foopaths['taxonomy_tsv'] = os.path.join( PathManager.get_test_path(), "test_files_dryad.f40v5_small", "taxonomy.tsv") self.foopaths['foodir'] = package_path self.foopaths['sorteddir'] = outdir_path self.foopaths['emptydir'] = os.path.join(outdir_path, 'emptydir') pathlib.Path(os.path.join(self.foopaths['emptydir'])).mkdir( parents=True, exist_ok=True) self.foopaths['blastdb'] = os.path.relpath( os.path.join(PathManager.get_test_path(), 'test_files', 'blastdb'), PathManager.get_package_path())
def setUp(self): pip_install_vtam_for_tests() self.test_path = PathManager.get_test_path() self.package_path = PathManager.get_package_path() self.outdir_path = os.path.join(self.test_path, 'outdir') shutil.rmtree(self.outdir_path, ignore_errors=True) pathlib.Path(self.outdir_path).mkdir(exist_ok=True, parents=True) marker_str = "id name\n1 IIICBR" run_str = "id name\n1 TAS2" sample_str = "id name\n1 S21" variant_str = """id sequence 1 ATTGTCAGACACTCCGTACCATTAGGGTGCTGCAGTCGACTAGTCTATTTTAAGCTTACACGTAGCCGGAATTAGTTCATTACTGGGGTCAATTAATATCATAACAACGATCATTAACTAGAGGGCCCCAGGAATGACCTGGGAGAACTTACCGTTATTCGTGTGGGCTGTATTTATTACAGCGTGGTTACTTGTACTGTCTTTACCAGTACTAGCTGGTGCGATTACCATGCTGCTAACAGATAGGAACTAGAATACTAGTTTCTACGACCCGAACGGAGGAGGAGATCCTCTGCTATACCAGCATCTATTC""" filter_indel_str = """id run_id marker_id variant_id sample_id replicate read_count filter_delete 1 1 1 1 1 1 50 0""" from sqlalchemy import create_engine db_path = os.path.join(self.outdir_path, 'db.sqlite') self.engine = create_engine('sqlite:///{}'.format(db_path), echo=True) from wopmars.Base import Base Session = sqlalchemy.orm.sessionmaker(bind=self.engine) self.session = Session() Base.metadata.create_all(self.engine) from io import StringIO run_df = pandas.read_csv(StringIO(run_str), sep=" ") run_df.to_sql(name=Run.__tablename__, con=self.engine.connect(), if_exists='append', index=False) marker_df = pandas.read_csv(StringIO(marker_str), sep=" ") marker_df.to_sql(name=Marker.__tablename__, con=self.engine.connect(), if_exists='append', index=False) sample_df = pandas.read_csv(StringIO(sample_str), sep=" ") sample_df.to_sql(name=Sample.__tablename__, con=self.engine.connect(), if_exists='append', index=False) filter_indel_df = pandas.read_csv(StringIO(filter_indel_str), sep=" ") filter_indel_df.to_sql(name=FilterIndel.__tablename__, con=self.engine.connect(), if_exists='append', index=False) variant_df = pandas.read_csv(StringIO(variant_str), sep=" ") variant_df.to_sql(name=Variant.__tablename__, con=self.engine.connect(), if_exists='append', index=False) pathlib.Path(os.path.join(self.outdir_path, "params.yml")).touch() sortereadinfo_str = """run marker sample replicate sortedfasta TAS2 IIICBR S21 1 TAS2-R1_S1_L001_R1_001_000.fasta""" with open(os.path.join(self.outdir_path, "sortedinfo.tsv"), 'w') as fout: fout.write(sortereadinfo_str)