def demultiplex_flowcell( context: CGConfig, dry_run: bool, flowcell_id: str, bcl_converter: str, ): """Demultiplex a flowcell on slurm using CG flowcell-id is the flowcell run directory name, e.g. '201203_A00689_0200_AHVKJCDRXX' """ LOG.info("Running cg demultiplex flowcell, using %s.", bcl_converter) flowcell_directory: Path = Path(context.demultiplex.run_dir) / flowcell_id demultiplex_api: DemultiplexingAPI = context.demultiplex_api demultiplex_api.set_dry_run(dry_run=dry_run) LOG.info(f"SETTING FLOWCELL ID TO {flowcell_id}") LOG.info(f"SETTING OUT DIR TO {demultiplex_api.out_dir}") try: flowcell_obj = Flowcell(flowcell_path=flowcell_directory, bcl_converter=bcl_converter) except FlowcellError as e: raise click.Abort from e delete_demux_api: DeleteDemuxAPI = DeleteDemuxAPI( config=context, demultiplex_base=demultiplex_api.out_dir, dry_run=dry_run, run_path=flowcell_directory, ) delete_demux_api.delete_flow_cell( cg_stats=True, demultiplexing_dir=True, run_dir=False, housekeeper=True, init_files=True, status_db=False, ) if not demultiplex_api.is_demultiplexing_possible( flowcell=flowcell_obj) and not dry_run: LOG.warning("Can not start demultiplexing!") return if not flowcell_obj.validate_sample_sheet(): LOG.warning( "Malformed sample sheet. Run cg demultiplex samplesheet validate %s", flowcell_obj.sample_sheet_path, ) raise click.Abort slurm_job_id: int = demultiplex_api.start_demultiplexing( flowcell=flowcell_obj) tb_api: TrailblazerAPI = context.trailblazer_api demultiplex_api.add_to_trailblazer(tb_api=tb_api, slurm_job_id=slurm_job_id, flowcell=flowcell_obj)
def demultiplex_all(context: CGConfig, bcl_converter: str, flowcells_directory: click.Path, dry_run: bool): """Demultiplex all flowcells that are ready under the flowcells_directory""" LOG.info("Running cg demultiplex all, using %s.", bcl_converter) if flowcells_directory: flowcells_directory: Path = Path(str(flowcells_directory)) else: flowcells_directory: Path = Path(context.demultiplex.run_dir) demultiplex_api: DemultiplexingAPI = context.demultiplex_api demultiplex_api.set_dry_run(dry_run=dry_run) tb_api: TrailblazerAPI = context.trailblazer_api LOG.info("Search for flowcells ready to demultiplex in %s", flowcells_directory) for sub_dir in flowcells_directory.iterdir(): if not sub_dir.is_dir(): continue LOG.info("Found directory %s", sub_dir) try: flowcell_obj = Flowcell(flowcell_path=sub_dir, bcl_converter=bcl_converter) except FlowcellError: continue if not demultiplex_api.is_demultiplexing_possible( flowcell=flowcell_obj) and not dry_run: continue if not flowcell_obj.validate_sample_sheet(): LOG.warning( "Malformed sample sheet. Run cg demultiplex samplesheet validate %s", flowcell_obj.sample_sheet_path, ) continue delete_demux_api: DeleteDemuxAPI = DeleteDemuxAPI( config=context, demultiplex_base=demultiplex_api.out_dir, dry_run=dry_run, run_path=(flowcells_directory / sub_dir), ) delete_demux_api.delete_flow_cell( cg_stats=False, demultiplexing_dir=True, run_dir=False, housekeeper=True, init_files=False, status_db=False, ) slurm_job_id: int = demultiplex_api.start_demultiplexing( flowcell=flowcell_obj) demultiplex_api.add_to_trailblazer(tb_api=tb_api, slurm_job_id=slurm_job_id, flowcell=flowcell_obj)
def fixture_flowcell_working_directory( novaseq_dir: Path, flowcell_runs_working_directory: Path) -> Path: """Return the path to a working directory that will be deleted after test is run This is a path to a flowcell directory with the run parameters present """ working_dir: Path = flowcell_runs_working_directory / novaseq_dir.name working_dir.mkdir(parents=True) existing_flowcell: Flowcell = Flowcell(flowcell_path=novaseq_dir) working_flowcell: Flowcell = Flowcell(flowcell_path=working_dir) shutil.copy(str(existing_flowcell.run_parameters_path), str(working_flowcell.run_parameters_path)) return working_dir
def test_demultiplex_flowcell_dry_run( cli_runner: testing.CliRunner, demultiplex_ready_flowcell: Path, demultiplex_context: CGConfig, caplog, ): caplog.set_level(logging.INFO) # GIVEN that all files are present for demultiplexing flowcell: Flowcell = Flowcell(demultiplex_ready_flowcell) # GIVEN a out dir that does not exist demux_api: DemultiplexingAPI = demultiplex_context.demultiplex_api assert demux_api.is_demultiplexing_possible(flowcell=flowcell) demux_dir: Path = demux_api.flowcell_out_dir_path(flowcell) unaligned_dir: Path = demux_dir / "Unaligned" assert demux_dir.exists() is False assert unaligned_dir.exists() is False # WHEN starting demultiplexing from the CLI with dry run flag result: testing.Result = cli_runner.invoke( demultiplex_flowcell, [str(demultiplex_ready_flowcell), "--dry-run"], obj=demultiplex_context, ) # THEN assert the command exits without problems assert result.exit_code == 0 # THEN assert no results folder was created since it is run in dry run mode assert demux_dir.exists() is False assert unaligned_dir.exists() is False
def finish_flowcell(self, flowcell_name: str, bcl_converter: str, force: bool = False) -> None: """Go through the post processing steps for a flowcell Force is used to finish a flowcell even if the files are renamed already """ LOG.info("Check demuxed flowcell %s", flowcell_name) try: flowcell: Flowcell = Flowcell( flowcell_path=self.demux_api.run_dir / flowcell_name, bcl_converter=bcl_converter) except FlowcellError: return if not self.demux_api.is_demultiplexing_completed(flowcell=flowcell): LOG.warning("Demultiplex is not ready for %s", flowcell_name) return demux_results: DemuxResults = DemuxResults( demux_dir=self.demux_api.out_dir / flowcell_name, flowcell=flowcell, bcl_converter=bcl_converter, ) if not demux_results.results_dir.exists(): LOG.warning("Could not find results directory %s", demux_results.results_dir) LOG.info("Can not finish flowcell %s", flowcell_name) return if demux_results.files_renamed(): LOG.warning("Flowcell is already finished!") if not force: return LOG.info("Post processing flowcell anyway") self.post_process_flowcell(demux_results=demux_results)
def test_create_sample_sheet_no_run_parameters( cli_runner: testing.CliRunner, flowcell_working_directory_no_run_parameters: Path, sample_sheet_context: CGConfig, caplog, mocker, ): # GIVEN a folder with a non existing sample sheet flowcell_object: Flowcell = Flowcell( flowcell_working_directory_no_run_parameters) assert flowcell_object.run_parameters_path.exists() is False mocker.patch("cg.cli.demultiplex.sample_sheet.flowcell_samples", return_value=[{ "sample": 1 }]) demux_api: DemultiplexingAPI = sample_sheet_context.demultiplex_api demux_api.run_dir = flowcell_working_directory_no_run_parameters.parent sample_sheet_context.demultiplex_api_ = demux_api # WHEN running the create sample sheet command result: testing.Result = cli_runner.invoke( create_sheet, [flowcell_object.flowcell_full_name], obj=sample_sheet_context) # THEN assert it exits with a non zero exit code assert result.exit_code != 0 # THEN assert the correct information is communicated assert "Could not find run parameters file" in caplog.text
def test_start_demultiplexing_when_already_completed( cli_runner: testing.CliRunner, demultiplex_ready_flowcell: Path, demultiplex_context: CGConfig, caplog, mocker, ): caplog.set_level(logging.DEBUG) # GIVEN that all files are present for demultiplexing # flowcell: Flowcell = Flowcell(demultiplex_ready_flowcell_dragen, bcl_converter="dragen") flowcell: Flowcell = Flowcell(demultiplex_ready_flowcell) demux_api: DemultiplexingAPI = demultiplex_context.demultiplex_api # GIVEN that demultiplexing has started flowcell.demultiplexing_started_path.touch() # GIVEN a out dir that exist demux_api.flowcell_out_dir_path(flowcell).mkdir(parents=True) # GIVEN that demultiplexing is completed demux_api.demultiplexing_completed_path(flowcell=flowcell).touch() # WHEN starting demultiplexing from the CLI result: testing.Result = cli_runner.invoke( demultiplex_flowcell, [str(demultiplex_ready_flowcell), "-b", "bcl2fastq"], obj=demultiplex_context, ) # THEN assert the command exits without problems assert result.exit_code == 0 # THEN assert it was communicated that demultiplexing was completed assert f"Demultiplexing is already completed for flowcell {flowcell.flowcell_id}"
def test_create_dragen_sample_sheet( cli_runner: testing.CliRunner, flowcell_working_directory: Path, sample_sheet_context: CGConfig, lims_novaseq_dragen_samples: List[LimsFlowcellSampleDragen], mocker, ): # GIVEN a flowcell directory with some run parameters flowcell: Flowcell = Flowcell(flowcell_working_directory, bcl_converter="dragen") assert flowcell.run_parameters_path.exists() # GIVEN that there is no sample sheet present assert not flowcell.sample_sheet_exists() mocker.patch( "cg.cli.demultiplex.sample_sheet.flowcell_samples", return_value=lims_novaseq_dragen_samples, ) # GIVEN a lims api that returns some samples # WHEN creating a sample sheet result = cli_runner.invoke( create_sheet, [str(flowcell_working_directory), "-b", "dragen"], obj=sample_sheet_context) # THEN assert it exits with success assert result.exit_code == 0 # THEN assert that the sample sheet was created assert flowcell.sample_sheet_exists() # THEN assert that the sample sheet is on the correct format assert flowcell.validate_sample_sheet()
def test_demultiplex_all( cli_runner: testing.CliRunner, demultiplex_context: CGConfig, demultiplex_ready_flowcell: Path, caplog, mocker, ): caplog.set_level(logging.INFO) # GIVEN a context with the path to a directory where at least one flowcell is ready for demux demux_api: DemultiplexingAPI = demultiplex_context.demultiplex_api flowcell_object: Flowcell = Flowcell(flowcell_path=demultiplex_ready_flowcell) assert demux_api.run_dir == demultiplex_ready_flowcell.parent # WHEN running the demultiplex all command result: testing.Result = cli_runner.invoke( demultiplex_all, ["--dry-run"], obj=demultiplex_context ) # THEN assert it exits without problems assert result.exit_code == 0 # THEN assert it found the directory assert "Found directory" in caplog.text # THEN assert it found a flowcell that is ready for demultiplexing assert f"Flowcell {flowcell_object.flowcell_id} is ready for demultiplexing" in caplog.text
def fixture_demultiplex_ready_flowcell(flowcell_working_directory: Path, novaseq_dir: Path) -> Path: """Return the path to a working directory that is ready for demultiplexing This is a path to a flowcell directory with all the files necessary to start demultiplexing present """ existing_flowcell: Flowcell = Flowcell(flowcell_path=novaseq_dir) working_flowcell: Flowcell = Flowcell( flowcell_path=flowcell_working_directory) shutil.copy(str(existing_flowcell.sample_sheet_path), str(working_flowcell.sample_sheet_path)) shutil.copy( str(DemultiplexingAPI.get_stderr_logfile(existing_flowcell)), str(DemultiplexingAPI.get_stderr_logfile(working_flowcell)), ) working_flowcell.copy_complete_path.touch() working_flowcell.rta_complete_path.touch() return flowcell_working_directory
def create_all_sheets(context: CGConfig, bcl_converter: str, dry_run: bool): """Command to create sample sheets for all flowcells that lack a sample sheet Search flowcell directories for run parameters and create a sample sheets based on the information """ demux_api: DemultiplexingAPI = context.demultiplex_api flowcells: Path = demux_api.run_dir for sub_dir in flowcells.iterdir(): if not sub_dir.is_dir(): continue LOG.info("Found directory %s", sub_dir) try: flowcell_object = Flowcell(flowcell_path=sub_dir, bcl_converter=bcl_converter) except FlowcellError: continue if flowcell_object.sample_sheet_exists(): LOG.info("Sample sheet already exists") continue LOG.info("Creating sample sheet for flowcell %s", flowcell_object.flowcell_id) lims_samples: List[LimsFlowcellSample] = list( flowcell_samples( lims=context.lims_api, flowcell_id=flowcell_object.flowcell_id, bcl_converter=bcl_converter, ) ) if not lims_samples: LOG.warning("Could not find any samples in lims for %s", flowcell_object.flowcell_id) continue try: sample_sheet: str = create_sample_sheet( flowcell=flowcell_object, lims_samples=lims_samples, bcl_converter=bcl_converter ) except (FileNotFoundError, FileExistsError): continue if dry_run: click.echo(sample_sheet) return LOG.info("Writing sample sheet to %s", flowcell_object.sample_sheet_path.resolve()) with open(flowcell_object.sample_sheet_path, "w") as outfile: outfile.write(sample_sheet)
def test_flowcell_id(flowcell_path: Path): # GIVEN the path to a finished flowcell run # GIVEN the flowcell id flowcell_id: str = flowcell_path.name.split("_")[-1][1:] # WHEN instantiating a flowcell object flowcell_obj = Flowcell(flowcell_path) # THEN assert that the flowcell flowcell id is correcly parsed assert flowcell_obj.flowcell_id == flowcell_id
def test_flowcell_position(flowcell_path: Path): # GIVEN the path to a finished flowcell # GIVEN a flowcell object flowcell_obj = Flowcell(flowcell_path) # WHEN fetching the flowcell position position = flowcell_obj.flowcell_position # THEN assert it is A or B assert position in ["A", "B"]
def test_get_run_parameters_when_non_existing(fixtures_dir: Path): # GIVEN a flowcell object with a directory without run parameters flowcell_path: Path = (fixtures_dir / "apps" / "demultiplexing" / "demultiplexed-runs" / "201203_A00689_0200_AHVKJCDRXX") flowcell = Flowcell(flowcell_path=flowcell_path) assert flowcell.run_parameters_path.exists() is False # WHEN fetching the run parameters object with pytest.raises(FileNotFoundError): # THEN assert that a FileNotFound error is raised flowcell.run_parameters_object
def create_report_cmd(context: CGConfig, flowcell_name: str): """Generate a demux report and print to stdout""" LOG.info("Check demuxed flowcell %s", flowcell_name) demux_api: DemultiplexingAPI = context.demultiplex_api try: flowcell: Flowcell = Flowcell(flowcell_path=demux_api.run_dir / flowcell_name) except FlowcellError: raise click.Abort demux_results: DemuxResults = DemuxResults( demux_dir=demux_api.out_dir / flowcell_name, flowcell=flowcell ) conversion_stats: Path = demux_results.conversion_stats_path if not conversion_stats.exists(): LOG.warning("Could not find conversion stats file %s", conversion_stats) raise click.Abort report = create_demux_report( conversion_stats=ConversionStats(demux_results.conversion_stats_path) ) click.echo("\n".join(report))
def create_sheet(context: CGConfig, flowcell_name: str, bcl_converter: str, dry_run: bool): """Command to create a sample sheet flowcell-name is the flowcell run directory name, e.g. '201203_A00689_0200_AHVKJCDRXX' Search the flowcell in the directory specified in config """ LOG.info("Creating sample sheet for flowcell %s", flowcell_name) demultiplex_api: DemultiplexingAPI = context.demultiplex_api flowcell_path: Path = demultiplex_api.run_dir / flowcell_name if not flowcell_path.exists(): LOG.warning("Could not find flowcell %s", flowcell_path) raise click.Abort try: flowcell_object = Flowcell(flowcell_path=flowcell_path, bcl_converter=bcl_converter) except FlowcellError: raise click.Abort lims_samples: List[Union[LimsFlowcellSampleBcl2Fastq, LimsFlowcellSampleDragen]] = list( flowcell_samples( lims=context.lims_api, flowcell_id=flowcell_object.flowcell_id, bcl_converter=bcl_converter, ) ) if not lims_samples: LOG.warning("Could not find any samples in lims for %s", flowcell_object.flowcell_id) raise click.Abort try: sample_sheet: str = create_sample_sheet( flowcell=flowcell_object, lims_samples=lims_samples, bcl_converter=bcl_converter ) except (FileNotFoundError, FileExistsError): raise click.Abort if dry_run: click.echo(sample_sheet) return LOG.info("Writing sample sheet to %s", flowcell_object.sample_sheet_path.resolve()) with open(flowcell_object.sample_sheet_path, "w") as outfile: outfile.write(sample_sheet)
def test_demultiplex_dragen_flowcell( cli_runner: testing.CliRunner, demultiplex_ready_flowcell_dragen: Path, demultiplex_context: CGConfig, demultiplexed_flowcells_working_directory: Path, caplog, mocker, ): caplog.set_level(logging.INFO) # GIVEN that all files are present for dragen demultiplexing flowcell: Flowcell = Flowcell( flowcell_path=demultiplex_ready_flowcell_dragen, bcl_converter="dragen" ) # GIVEN a out dir that does not exist demux_api: DemultiplexingAPI = demultiplex_context.demultiplex_api demux_dir: Path = demux_api.flowcell_out_dir_path(flowcell) unaligned_dir: Path = demux_dir / "Unaligned" assert demux_api.is_demultiplexing_possible(flowcell=flowcell) assert demux_dir.exists() is False assert unaligned_dir.exists() is False mocker.patch("cg.apps.tb.TrailblazerAPI.add_pending_analysis") # WHEN starting demultiplexing from the CLI with dry run flag result: testing.Result = cli_runner.invoke( demultiplex_flowcell, [str(demultiplex_ready_flowcell_dragen), "-b", "dragen"], obj=demultiplex_context, ) # THEN assert the command exits without problems assert result.exit_code == 0 # THEN assert the results folder was created assert demux_dir.exists() assert unaligned_dir.exists() # THEN assert that the sbatch script was created assert demux_api.demultiplex_sbatch_path(flowcell).exists()
def add_flowcell_cmd(context: CGConfig, flowcell_id: str, bcl_converter: str): """Add a flowcell to the cgstats database""" stats_api: StatsAPI = context.cg_stats_api demultiplex_api: DemultiplexingAPI = context.demultiplex_api flowcell_run_path: Path = demultiplex_api.run_dir / flowcell_id if not flowcell_run_path.exists(): LOG.warning("Could not find flowcell path %s", flowcell_run_path) raise click.Abort demux_results_path: Path = demultiplex_api.out_dir / flowcell_id if not demux_results_path.exists(): LOG.warning("Could not find demultiplex result path %s", demux_results_path) raise click.Abort try: flowcell: Flowcell = Flowcell(flowcell_path=flowcell_run_path, bcl_converter=bcl_converter) except FlowcellError: raise click.Abort demux_results: DemuxResults = DemuxResults(demux_dir=demux_results_path, flowcell=flowcell, bcl_converter=bcl_converter) create_novaseq_flowcell(manager=stats_api, demux_results=demux_results)
def mock_flowcell(flowcell_full_name: str) -> Flowcell: return Flowcell(flowcell_path=Path(flowcell_full_name))
def fixture_flowcell_object(demux_run_dir: Path, flowcell_full_name: str) -> Flowcell: """Create a flowcell object with flowcell that is demultiplexed""" return Flowcell(flowcell_path=demux_run_dir / flowcell_full_name)
def fixture_dragen_flow_cell_object(dragen_flow_cell_path: Path) -> Flowcell: flow_cell = Flowcell(dragen_flow_cell_path) flow_cell.parse_flowcell_name() return Flowcell(dragen_flow_cell_path)