def ul_cmd(sample_name): """ Upload reads to remote storage """ lsample = TenxSample(base_path=TenxApp.config.get("TENX_DATA_PATH"), name=sample_name) rsample = TenxSample(base_path=TenxApp.config.get("TENX_REMOTE_URL"), name=sample_name) ul(lsample, rsample)
def test5_run_cleanup(self, call_patch, check_call_patch, check_output_patch): call_patch.return_value = 0 check_call_patch.return_value = 0 check_output_patch.return_value = b'0' pwd = os.getcwd() err = io.StringIO() sys.stderr = err sample = TenxSample(name="TESTER", base_path=TenxApp.config.get("TENX_REMOTE_URL")) asm = sample.assembly() with self.assertRaisesRegex( Exception, "Failed to find 4 mkoutput fasta files. Refusing to remove post assembly files" ): assembly.run_cleanup(asm) check_output_patch.return_value = b'a.fasta.gz\na.fasta.gz\na.fasta.gz\na.fasta.gz\n' err.seek(0, 0) assembly.run_cleanup(asm) self.maxDiff = 10000 expected_err = "Cleanup assembly for TESTER ...\nAssembly remote URL: gs://data/TESTER/assembly\nChecking if gsutil is installed...\nRUNNING: which gsutil\nChecking mkfastq files exist.\nRUNNING: gsutil ls gs://data/TESTER/assembly/mkoutput/*fasta.gz\nRemoving ASSEMBLER_CS logs path.\nRUNNING: gsutil -m rm -r gs://data/TESTER/assembly/ASSEMBLER_CS\nMoving outs / assembly / stats to outs.\nRUNNING: gsutil -m mv gs://data/TESTER/assembly/outs/assembly/stats gs://data/TESTER/assembly/outs\nRemoving outs / assembly path\nRUNNING: gsutil -m rm -r gs://data/TESTER/assembly/outs/assembly\nCleanup assembly ... OK\n" self.assertEqual(err.getvalue(), expected_err) self.assertEqual(os.getcwd(), pwd)
def test10_alignment(self): sample = TenxSample(name="TESTER", base_path=TenxApp.config['TENX_DATA_PATH']) aln = sample.alignment(self.ref) self.assertEqual(aln.ref, self.ref) self.assertEqual(os.path.join(sample.path, "alignment"), aln.path) self.assertEqual(os.path.join(aln.path, "outs"), aln.outs_path)
def setUp(self): self.data_dn = os.path.join(os.path.dirname(__file__), "data", "app") self.temp_d = tempfile.TemporaryDirectory() self.sample = TenxSample(name="__TEST__", base_path=self.temp_d.name) self.asm = self.sample.assembly() self.ref = TenxReference(name="__REF__") self.aln = self.sample.alignment(ref=self.ref) TenxApp.config = None
def aln_align_cmd(sample_name, ref_name): """ Create alignments with longranger. """ assert bool(TenxApp.config) is True, "Must provide tenx yaml config file!" sample = TenxSample(name=sample_name, base_path=TenxApp.config["TENX_DATA_PATH"]) ref = TenxReference(name=ref_name) aln = sample.alignment(ref=ref) alignment.run_align(aln)
def test11_is_successful(self): sample = TenxSample(name="TESTER", base_path=TenxApp.config.get("TENX_DATA_PATH")) asm = sample.assembly() os.makedirs(asm.path) self.assertFalse(asm.is_successful()) os.makedirs(os.path.join(asm.path, "outs", "assembly")) self.assertTrue(asm.is_successful())
def asm_download_cmd(sample_name): """ Download an Assembly """ assert bool(TenxApp.config) is True, "Must provide tenx yaml config file!" remote = TenxSample( name=sample_name, base_path=TenxApp.config["TENX_REMOTE_URL"]).assembly() local = TenxSample(name=sample_name, base_path=TenxApp.config["TENX_DATA_PATH"]).assembly() asm_download(remote, local)
def asm_upload_cmd(sample_name): """ Upload an assembly from local disk to cloud storage. """ assert bool(TenxApp.config) is True, "Must provide tenx yaml config file!" local = TenxSample(name=sample_name, base_path=TenxApp.config["TENX_DATA_PATH"]).assembly() remote = TenxSample( name=sample_name, base_path=TenxApp.config["TENX_REMOTE_URL"]).assembly() run_upload(local, remote)
def setUp(self): self.temp_d = tempfile.TemporaryDirectory() sample = TenxSample(name="__SAMPLE__", base_path=self.temp_d.name) self.asm = sample.assembly() os.makedirs(os.path.join(self.asm.path)) tenx.app.TenxApp.config = { "TENX_DATA_PATH": os.path.join(self.temp_d.name, "__SAMPLE__", "assembly"), "TENX_REMOTE_URL": "gs://data", "TENX_CROMWELL_PATH": os.path.join(os.path.dirname(__file__), "data", "app"), }
def test1_run_assemble_fails_without_supernova(self): err = io.StringIO() sys.stderr = err sample = TenxSample(name="TESTER", base_path=TenxApp.config.get("TENX_DATA_PATH")) asm = sample.assembly() with self.assertRaisesRegex(Exception, "No such file or directory"): run_assemble(asm) self.assertFalse(os.path.exists(sample.path)) expected_err = "Checking if supernova is in PATH...\nRUNNING: supernova --help\n" self.assertEqual(err.getvalue(), expected_err) sys.stderr = sys.__stderr__
def setUp(self): self.temp_d = tempfile.TemporaryDirectory() if TenxApp.config is None: TenxApp() TenxApp.config['TENX_DATA_PATH'] = self.temp_d.name TenxApp.config['TENX_REMOTE_URL'] = 'gs://data' self.lsample = TenxSample( base_path=TenxApp.config.get("TENX_DATA_PATH"), name='TEST-001') os.makedirs(self.lsample.reads_path) with open(os.path.join(self.lsample.reads_path, 'read1.fastq'), 'w') as f: f.write("FASTQ\n") # a fastq file self.rsample = TenxSample( base_path=TenxApp.config.get("TENX_REMOTE_URL"), name='TEST-001')
def test2_download_no_fastqs(self, test_patch): test_patch.return_value = 1 lsample = TenxSample(base_path=TenxApp.config.get("TENX_DATA_PATH"), name='TESTER') rsample = TenxSample(base_path=TenxApp.config.get("TENX_REMOTE_URL"), name='TESTER') err = io.StringIO() sys.stderr = err with self.assertRaisesRegex(Exception, 'Failed to download read fastqs'): reads.download(lsample, rsample) self.assertTrue(os.path.exists(lsample.reads_path)) expected_err = "Fetching {} fastqs from the object store...\nEntering {}\nChecking for sample reads at {} ...\n".format(rsample.name, lsample.reads_path, rsample.reads_path) self.assertEqual(err.getvalue(), expected_err) sys.stderr = sys.__stderr__
def test31_run_mkoutput_fails_with_incomplete_assembly( self, check_call_patch): check_call_patch.return_value = '0' err = io.StringIO() sys.stderr = err sample = TenxSample(name="TESTER", base_path=TenxApp.config.get("TENX_DATA_PATH")) asm = sample.assembly() with self.assertRaisesRegex( Exception, "Assembly is not complete! Cannot run mkoutput!"): assembly.run_mkoutput(asm) expected_err = "Running mkoutput for TESTER...\n" self.assertEqual(err.getvalue(), expected_err) sys.stderr = sys.__stderr__
def setUp(self): self.temp_d = tempfile.TemporaryDirectory() sample = TenxSample(name="__SAMPLE__", base_path=self.temp_d.name) ref = TenxReference(name="__REF__") self.aln = sample.alignment(ref=ref) os.makedirs(os.path.join(self.aln.path)) tenx.app.TenxApp.config = { "TENX_DATA_PATH": os.path.join(self.temp_d.name), "TENX_REMOTE_URL": "gs://data", "TENX_REMOTE_REFSU_URL": "gs://resources/refs", "TENX_CROMWELL_PATH": os.path.join(os.path.dirname(__file__), "data", "app"), }
def test3_run_assemble_success(self, check_call_patch): check_call_patch.return_value = '0' err = io.StringIO() sys.stderr = err sample = TenxSample(name="TESTER", base_path=TenxApp.config.get("TENX_DATA_PATH")) asm = sample.assembly() os.makedirs(asm.outs_assembly_path) run_assemble(asm) self.assertTrue(os.path.exists(sample.path)) expected_err = "Checking if supernova is in PATH...\nRUNNING: supernova --help\nRUNNING: supernova run --id=assembly --fastqs={} --uiport=18080 --nodebugmem --localcores=2 --localmem=2\n".format( sample.reads_path) self.assertEqual(err.getvalue(), expected_err) sys.stderr = sys.__stderr__
def test3_download(self, test_patch): test_patch.return_value = 1 lsample = TenxSample(base_path=TenxApp.config.get("TENX_DATA_PATH"), name='TESTER') rsample = TenxSample(base_path=TenxApp.config.get("TENX_REMOTE_URL"), name='TESTER') os.makedirs(lsample.reads_path) with open(os.path.join(lsample.reads_path, 'read1.fastq'), 'w') as f: f.write("FASTQ\n") # a fastq file err = io.StringIO() sys.stderr = err reads.download(lsample, rsample) expected_err = "Fetching {} fastqs from the object store...\nEntering {}\nChecking for sample reads at {} ...\nFetching fastqs from the object store...OK\n".format(lsample.name, lsample.reads_path, rsample.reads_path) self.assertEqual(err.getvalue(), expected_err) sys.stderr = sys.__stderr__
def test2_run_align(self, test_patch): test_patch.return_value = '0' err = io.StringIO() sys.stderr = err sample = TenxSample(name="TEST_SUCCESS", base_path=TenxApp.config['TENX_DATA_PATH']) aln = sample.alignment(self.ref) os.makedirs(os.path.join(aln.path, "outs")) with open(os.path.join(aln.path, "outs", "summary.csv"), "w") as f: f.write("SUCCESS!") alignment.run_align(aln) expected_err = "Creating alignments for TEST_SUCCESS\nEntering {}\nRunning longranger wgs --id=alignment --sample=TEST_SUCCESS --reference={} --fastqs={} --vcmode=freebayes --disable-ui --jobmode=local --localmem=6 --localcores=1 ...\n".format( aln.sample.path, self.ref.directory(), aln.sample.reads_path) self.assertEqual(err.getvalue(), expected_err) sys.stderr = sys.__stderr__
def test1_aln_asm(self): base_path = TenxApp.config.get("TENX_DATA_PATH") sample = TenxSample(base_path=base_path, name="TEST-001") ref = TenxReference(name='refdata-GRCh38-2.1.0') aln = sample.alignment(ref=ref) self.assertTrue(bool(aln)) self.assertEqual(aln.__class__.__name__, "TenxAlignment") self.assertEqual(os.path.join(sample.path, "alignment"), aln.path) self.assertEqual(sample, aln.sample) self.assertEqual(ref, aln.ref) asm = sample.assembly() self.assertTrue(bool(asm)) self.assertEqual(asm.__class__.__name__, "TenxAssembly") self.assertEqual(os.path.join(sample.path, "assembly"), asm.path) self.assertEqual(sample, asm.sample)
def test_run_pipeline(self, subprocess_check_call_p): s = io.StringIO() sys.stderr = s asm = TenxSample(name="blah", base_path=self.temp_d.name).assembly() run_pipeline(self.asm) subprocess_check_call_p.assert_called_once() self.assertRegex(s.getvalue(), "RUNNING: java -Dconfig.file") sys.stderr = sys.__stderr__
def test0_sample(self): for key in "TENX_DATA_PATH", "TENX_REMOTE_URL": base_path = TenxApp.config.get(key) s = TenxSample(base_path=base_path, name="TEST-001") self.assertEqual(s.base_path, base_path) self.assertEqual(s.name, "TEST-001") self.assertEqual(s.path, os.path.join(base_path, "TEST-001")) self.assertEqual(s.reads_path, os.path.join(base_path, "TEST-001", "reads")) self.assertEqual(s.pipeline_path, os.path.join(base_path, "TEST-001", "pipeline"))
def test32_run_mkoutput_fails_without_all_fasta_files( self, check_call_patch): check_call_patch.return_value = '0' err = io.StringIO() sys.stderr = err sample = TenxSample(name="TESTER", base_path=TenxApp.config.get("TENX_DATA_PATH")) asm = sample.assembly() outs_asm_d = asm.outs_assembly_path os.makedirs(outs_asm_d) with self.assertRaisesRegex( Exception, "Expected 4 assembly fasta.gz files in " + asm.mkoutput_path + " after running mkoutput, but found 0"): assembly.run_mkoutput(asm) expected_err = "Running mkoutput for TESTER...\nChecking if supernova is in PATH...\nRUNNING: supernova --help\nEntering {ASM_D}/mkoutput\nRUNNING: supernova mkoutput --asmdir={ASM_D}/outs/assembly --outprefix=TESTER.raw --style=raw\nRUNNING: supernova mkoutput --asmdir={ASM_D}/outs/assembly --outprefix=TESTER.megabubbles --style=megabubbles\nRUNNING: supernova mkoutput --asmdir={ASM_D}/outs/assembly --outprefix=TESTER.pseudohap2 --style=pseudohap2\n".format( ASM_D=asm.path) self.assertEqual(err.getvalue(), expected_err) sys.stderr = sys.__stderr__
def test2_run_assemble_fails_when_no_outs_assembly_dir( self, check_call_patch): check_call_patch.return_value = '0' err = io.StringIO() sys.stderr = err TenxApp.config['TENX_ASM_PARAMS'] = "--maxreads='all'" sample = TenxSample(name="TESTER", base_path=TenxApp.config.get("TENX_DATA_PATH")) asm = sample.assembly() with self.assertRaisesRegex( Exception, "Ran supernova script, but {} was not found".format( asm.outs_assembly_path)): run_assemble(asm) self.assertTrue(os.path.exists(sample.path)) expected_err = "Checking if supernova is in PATH...\nRUNNING: supernova --help\nRUNNING: supernova run --id=assembly --fastqs={} --uiport=18080 --nodebugmem --localcores=2 --localmem=2 --maxreads='all'\n".format( sample.reads_path) self.assertEqual(err.getvalue(), expected_err) sys.stderr = sys.__stderr__
def asm_cleanup_cmd(sample_name): """ Cleanup Assembly This command removes unneeded post assembly file including the "outs/assembly" and "ASSEMBLER_CS" directories. It first ensures the 4 mkoutput files are generated. Additionally, the "outs/assembly/stats" directory is relocated to "outs". """ assert bool(TenxApp.config) is True, "Must provide tenx yaml config file!" tenx.assembly.run_cleanup(TenxSample(name=sample_name, base_path=TenxApp.config.get("TENX_REMOTE_URL")).assembly())
def test10_assembly(self): for base_path in TenxApp.config["TENX_DATA_PATH"], TenxApp.config[ "TENX_REMOTE_URL"]: sample = TenxSample(name="TESTER", base_path=base_path) asm = sample.assembly() self.assertEqual(asm.path, os.path.join(sample.path, "assembly")) self.assertEqual(asm.path, os.path.join(base_path, 'TESTER', 'assembly')) self.assertEqual( asm.mkoutput_path, os.path.join(base_path, 'TESTER', 'assembly', 'mkoutput')) self.assertEqual( asm.outs_path, os.path.join(base_path, 'TESTER', 'assembly', 'outs')) self.assertEqual( asm.outs_assembly_path, os.path.join(base_path, 'TESTER', 'assembly', 'outs', 'assembly')) self.assertEqual( asm.outs_assembly_stats_path, os.path.join(base_path, 'TESTER', 'assembly', 'outs', 'assembly', 'stats'))
def test33_run_mkoutput_success(self, check_call_patch): check_call_patch.return_value = '0' err = io.StringIO() sys.stderr = err sample = TenxSample(name="TESTER", base_path=TenxApp.config.get("TENX_DATA_PATH")) asm = sample.assembly() outs_asm_d = asm.outs_assembly_path os.makedirs(outs_asm_d) mkoutput_d = asm.mkoutput_path os.makedirs(mkoutput_d) for n in range(4): with open(os.path.join(mkoutput_d, "{}.fasta.gz".format(n)), "w") as f: f.write(">SEQ1\nATGC") f.flush() assembly.run_mkoutput(asm) expected_err = "Running mkoutput for TESTER...\nChecking if supernova is in PATH...\nRUNNING: supernova --help\nEntering {ASM_D}/mkoutput\nRUNNING: supernova mkoutput --asmdir={ASM_D}/outs/assembly --outprefix=TESTER.raw --style=raw\nRUNNING: supernova mkoutput --asmdir={ASM_D}/outs/assembly --outprefix=TESTER.megabubbles --style=megabubbles\nRUNNING: supernova mkoutput --asmdir={ASM_D}/outs/assembly --outprefix=TESTER.pseudohap2 --style=pseudohap2\n".format( ASM_D=asm.path) self.assertEqual(err.getvalue(), expected_err) sys.stderr = sys.__stderr__
def asm_pipeline_cmd(sample_name): """ Run the Assembly Pipeline with Cromwell Process includes: downloading reads, running supernova, mkoutput, and then uploading the assembly. """ assert bool(TenxApp.config) is True, "Must provide tenx yaml config file!" sys.stderr.write("Run assembly pipeline for {}\n".format(sample_name)) hostname = socket.gethostname() notifications.slack("{} START {}".format(sample_name, hostname)) sample = TenxSample(base_path=TenxApp.config["TENX_DATA_PATH"], name=sample_name) asm = sample.assembly() try: run_pipeline(asm) except BaseException as ex: sys.stderr.write("Exception: {}\n".format(ex)) sys.stderr.write( "Exception encountered, sending notifications if configured...\n") notifications.slack("{} FAILED {}".format(sample_name, socket.gethostname())) raise notifications.slack("{} SUCCESS {}".format(sample_name, hostname))
def test_ul_no_fastqs(self, check_call_p): check_call_p.return_value = 1 err = io.StringIO() sys.stderr = err blah_dn = os.path.join(self.temp_d.name, "blah") lsample = TenxSample(base_path=blah_dn, name=self.lsample.name) with self.assertRaisesRegex(Exception, 'Sample reads path does not exist!'): tenx.reads_ul.ul(lsample, self.rsample) os.makedirs(lsample.reads_path) with self.assertRaisesRegex(Exception, "Did find any fastqs in reads path!"): tenx.reads_ul.ul(lsample, self.rsample)
def aln_upload_cmd(sample_name): """ Upload an assembly from local disk to cloud storage. """ assert bool(TenxApp.config) is True, "Must provide tenx yaml config file!" sample = TenxSample(name=sample_name, base_path=TenxApp.config["TENX_DATA_PATH"]) aln = sample.alignment() rsample = TenxSample(name=sample_name, base_path=TenxApp.config["TENX_REMOTE_URL"]) raln = rsample.alignment() alignment.run_upload(aln, raln)
def setUp(self): self.temp_d = tempfile.TemporaryDirectory() self.rurl = "gs://data" os.chdir(self.temp_d.name) sample = TenxSample(name='TESTER', base_path=self.temp_d.name) self.asm = sample.assembly() rsample = TenxSample(name='TESTER', base_path=self.rurl) self.remote_asm = rsample.assembly() if TenxApp.config is None: TenxApp() TenxApp.config['TENX_DATA_PATH'] = self.temp_d.name TenxApp.config['TENX_REMOTE_URL'] = self.rurl
def test11_is_successful(self): sample = TenxSample(name="TEST_SUCCESS", base_path=TenxApp.config['TENX_DATA_PATH']) aln = sample.alignment() os.makedirs(os.path.join(aln.outs_path)) with open(os.path.join(aln.outs_path, "summary.csv"), "w") as f: f.write("SUCCESS!") self.assertTrue(aln.is_successful()) sample = TenxSample(name="TEST_FAIL", base_path=TenxApp.config['TENX_DATA_PATH']) aln = sample.alignment() os.makedirs(aln.path) self.assertFalse(aln.is_successful())