def test_setup_samples(self): """Test setting up samples, changing genome to rn4""" flist = find_samples(j_doe_00_05) for f in flist: setup_sample(f, **{'analysis':'Align_standard_seqcap', 'genome_build':'rn4', 'dry_run':False, 'baits':'rat_baits.interval_list', 'targets':'rat_targets.interval_list', 'num_cores':8, 'distributed':False}) for f in flist: with open(f, "r") as fh: config = yaml.load(fh) if config["details"][0].get("multiplex", None): self.assertEqual(config["details"][0]["multiplex"][0]["genome_build"], "rn4") else: self.assertEqual(config["details"][0]["genome_build"], "rn4") with open(f.replace("-bcbb-config.yaml", "-post_process.yaml")) as fh: config = yaml.load(fh) self.assertEqual(config["custom_algorithms"][ANALYSIS_TYPE]["hybrid_bait"], 'rat_baits.interval_list') self.assertEqual(config["custom_algorithms"][ANALYSIS_TYPE]["hybrid_target"], 'rat_targets.interval_list') self.assertEqual(config["algorithm"]["num_cores"], 8) for f in flist: setup_sample(f, **{'analysis':ANALYSIS_TYPE, 'genome_build':'rn4', 'dry_run':False, 'no_only_run':True, 'google_report':True, 'dry_run':False, 'baits':'rat_baits.interval_list', 'targets':'rat_targets.interval_list', 'amplicon':True, 'num_cores':8, 'distributed':False}) with open(f, "r") as fh: config = yaml.load(fh) if config["details"][0].get("multiplex", None): self.assertEqual(config["details"][0]["multiplex"][0]["genome_build"], "rn4") else: self.assertEqual(config["details"][0]["genome_build"], "rn4") with open(f.replace("-bcbb-config.yaml", "-post_process.yaml")) as fh: config = yaml.load(fh) self.assertEqual(config["algorithm"]["mark_duplicates"], False) self.assertEqual(config["custom_algorithms"][ANALYSIS_TYPE]["mark_duplicates"], False)
def run(self): if not self._check_pargs(["project"]): return if self.pargs.post_process: self.pargs.post_process = os.path.abspath(self.pargs.post_process) basedir = os.path.abspath(os.path.join(self.app.controller._meta.root_path, self.app.controller._meta.path_id)) if self.pargs.from_ssheet: [ samplesheet_csv_to_yaml(fn) for fn in find_samples(basedir, pattern="SampleSheet.csv$", **vars(self.pargs)) ] flist = find_samples(basedir, **vars(self.pargs)) # Add filtering on flowcell if necessary self._meta.pattern = ".*" flist = [x for x in flist if self._filter_fn(x)] if self.pargs.merged: ## Setup merged samples and append to flist if new list longer flist = setup_merged_samples(flist, **vars(self.pargs)) if not len(flist) > 0: self.log.info("No sample configuration files found") return if len(flist) > 0 and not query_yes_no( "Going to start {} jobs... Are you sure you want to continue?".format(len(flist)), force=self.pargs.force ): return # Make absolutely sure analysis directory is a *subdirectory* of the working directory validate_sample_directories(flist, basedir) orig_dir = os.path.abspath(os.getcwd()) for run_info in flist: os.chdir(os.path.abspath(os.path.dirname(run_info))) setup_sample(run_info, **vars(self.pargs)) os.chdir(orig_dir) if self.pargs.only_setup: return if self.pargs.only_failed: status = {x: self._sample_status(x) for x in flist} flist = [x for x in flist if self._sample_status(x) == "FAIL"] ## Here process files again, removing if requested, and running the pipeline for run_info in flist: self.app.log.info("Running analysis defined by config file {}".format(run_info)) os.chdir(os.path.abspath(os.path.dirname(run_info))) if self.app.cmd.monitor(work_dir=os.path.dirname(run_info)): self.app.log.warn("Not running job") continue if self.pargs.restart: self.app.log.info("Removing old analysis files in {}".format(os.path.dirname(run_info))) remove_files(run_info, **vars(self.pargs)) (cl, platform_args) = run_bcbb_command(run_info, **vars(self.pargs)) self.app.cmd.command( cl, **{"platform_args": platform_args, "saveJobId": True, "workingDirectory": os.path.dirname(run_info)} ) os.chdir(orig_dir)
def test_bcbb_command(self): """Test output from command, changing analysis to amplicon and setting targets and baits""" flist = find_samples(j_doe_00_05) for f in flist: setup_sample(f, **{'analysis':ANALYSIS_TYPE, 'genome_build':'rn4', 'dry_run':False, 'no_only_run':False, 'google_report':False, 'dry_run':False, 'baits':'rat_baits.interval_list', 'targets':'rat_targets.interval_list', 'amplicon':True, 'num_cores':8, 'distributed':False}) with open(f.replace("-bcbb-config.yaml", "-bcbb-command.txt")) as fh: cl = fh.read().split() (cl, platform_args) = run_bcbb_command(f) self.assertIn("automated_initial_analysis.py",cl) setup_sample(f, **{'analysis':ANALYSIS_TYPE, 'genome_build':'rn4', 'dry_run':False, 'no_only_run':False, 'google_report':False, 'dry_run':False, 'baits':'rat_baits.interval_list', 'targets':'rat_targets.interval_list', 'amplicon':True, 'num_cores':8, 'distributed':True}) with open(f.replace("-bcbb-config.yaml", "-bcbb-command.txt")) as fh: cl = fh.read().split() (cl, platform_args) = run_bcbb_command(f) self.assertIn("distributed_nextgen_pipeline.py",cl)
def test_bcbb_command(self): """Test output from command, changing analysis to amplicon and setting targets and baits""" flist = find_samples(j_doe_00_05) for f in flist: setup_sample( f, **{ 'analysis': ANALYSIS_TYPE, 'genome_build': 'rn4', 'dry_run': False, 'no_only_run': False, 'google_report': False, 'dry_run': False, 'baits': 'rat_baits.interval_list', 'targets': 'rat_targets.interval_list', 'amplicon': True, 'num_cores': 8, 'distributed': False }) with open(f.replace("-bcbb-config.yaml", "-bcbb-command.txt")) as fh: cl = fh.read().split() (cl, platform_args) = run_bcbb_command(f) self.assertIn("automated_initial_analysis.py", cl) setup_sample( f, **{ 'analysis': ANALYSIS_TYPE, 'genome_build': 'rn4', 'dry_run': False, 'no_only_run': False, 'google_report': False, 'dry_run': False, 'baits': 'rat_baits.interval_list', 'targets': 'rat_targets.interval_list', 'amplicon': True, 'num_cores': 8, 'distributed': True }) with open(f.replace("-bcbb-config.yaml", "-bcbb-command.txt")) as fh: cl = fh.read().split() (cl, platform_args) = run_bcbb_command(f) self.assertIn("distributed_nextgen_pipeline.py", cl)
def run(self): if not self._check_pargs(["project"]): return if self.pargs.post_process: self.pargs.post_process = os.path.abspath(self.pargs.post_process) basedir = os.path.abspath( os.path.join(self.app.controller._meta.root_path, self.app.controller._meta.path_id)) if self.pargs.from_ssheet: [ samplesheet_csv_to_yaml(fn) for fn in find_samples( basedir, pattern="SampleSheet.csv$", **vars(self.pargs)) ] flist = find_samples(basedir, **vars(self.pargs)) # Add filtering on flowcell if necessary self._meta.pattern = ".*" flist = [x for x in flist if self._filter_fn(x)] if self.pargs.merged: ## Setup merged samples and append to flist if new list longer flist = setup_merged_samples(flist, **vars(self.pargs)) if not len(flist) > 0: self.log.info("No sample configuration files found") return if len(flist) > 0 and not query_yes_no( "Going to start {} jobs... Are you sure you want to continue?". format(len(flist)), force=self.pargs.force): return # Make absolutely sure analysis directory is a *subdirectory* of the working directory validate_sample_directories(flist, basedir) orig_dir = os.path.abspath(os.getcwd()) for run_info in flist: os.chdir(os.path.abspath(os.path.dirname(run_info))) setup_sample(run_info, **vars(self.pargs)) os.chdir(orig_dir) if self.pargs.only_setup: return if self.pargs.only_failed: status = {x: self._sample_status(x) for x in flist} flist = [x for x in flist if self._sample_status(x) == "FAIL"] ## Here process files again, removing if requested, and running the pipeline for run_info in flist: self.app.log.info( "Running analysis defined by config file {}".format(run_info)) os.chdir(os.path.abspath(os.path.dirname(run_info))) if self.app.cmd.monitor(work_dir=os.path.dirname(run_info)): self.app.log.warn("Not running job") continue if self.pargs.restart: self.app.log.info("Removing old analysis files in {}".format( os.path.dirname(run_info))) remove_files(run_info, **vars(self.pargs)) (cl, platform_args) = run_bcbb_command(run_info, **vars(self.pargs)) self.app.cmd.command( cl, **{ 'platform_args': platform_args, 'saveJobId': True, 'workingDirectory': os.path.dirname(run_info) }) os.chdir(orig_dir)
def test_setup_samples(self): """Test setting up samples, changing genome to rn4""" flist = find_samples(j_doe_00_05) for f in flist: setup_sample( f, **{ 'analysis': 'Align_standard_seqcap', 'genome_build': 'rn4', 'dry_run': False, 'baits': 'rat_baits.interval_list', 'targets': 'rat_targets.interval_list', 'num_cores': 8, 'distributed': False }) for f in flist: with open(f, "r") as fh: config = yaml.load(fh) if config["details"][0].get("multiplex", None): self.assertEqual( config["details"][0]["multiplex"][0]["genome_build"], "rn4") else: self.assertEqual(config["details"][0]["genome_build"], "rn4") with open(f.replace("-bcbb-config.yaml", "-post_process.yaml")) as fh: config = yaml.load(fh) self.assertEqual( config["custom_algorithms"][ANALYSIS_TYPE]["hybrid_bait"], 'rat_baits.interval_list') self.assertEqual( config["custom_algorithms"][ANALYSIS_TYPE]["hybrid_target"], 'rat_targets.interval_list') self.assertEqual(config["algorithm"]["num_cores"], 8) for f in flist: setup_sample( f, **{ 'analysis': ANALYSIS_TYPE, 'genome_build': 'rn4', 'dry_run': False, 'no_only_run': True, 'google_report': True, 'dry_run': False, 'baits': 'rat_baits.interval_list', 'targets': 'rat_targets.interval_list', 'amplicon': True, 'num_cores': 8, 'distributed': False }) with open(f, "r") as fh: config = yaml.load(fh) if config["details"][0].get("multiplex", None): self.assertEqual( config["details"][0]["multiplex"][0]["genome_build"], "rn4") else: self.assertEqual(config["details"][0]["genome_build"], "rn4") with open(f.replace("-bcbb-config.yaml", "-post_process.yaml")) as fh: config = yaml.load(fh) self.assertEqual(config["algorithm"]["mark_duplicates"], False) self.assertEqual( config["custom_algorithms"][ANALYSIS_TYPE]["mark_duplicates"], False)