Пример #1
0
def demultiplex_flowcell(
    context: CGConfig,
    dry_run: bool,
    flowcell_id: str,
    bcl_converter: str,
):
    """Demultiplex a flowcell on slurm using CG

    flowcell-id is the flowcell run directory name, e.g. '201203_A00689_0200_AHVKJCDRXX'
    """

    LOG.info("Running cg demultiplex flowcell, using %s.", bcl_converter)
    flowcell_directory: Path = Path(context.demultiplex.run_dir) / flowcell_id

    demultiplex_api: DemultiplexingAPI = context.demultiplex_api
    demultiplex_api.set_dry_run(dry_run=dry_run)
    LOG.info(f"SETTING FLOWCELL ID TO {flowcell_id}")
    LOG.info(f"SETTING OUT DIR TO {demultiplex_api.out_dir}")

    try:
        flowcell_obj = Flowcell(flowcell_path=flowcell_directory,
                                bcl_converter=bcl_converter)
    except FlowcellError as e:
        raise click.Abort from e

    delete_demux_api: DeleteDemuxAPI = DeleteDemuxAPI(
        config=context,
        demultiplex_base=demultiplex_api.out_dir,
        dry_run=dry_run,
        run_path=flowcell_directory,
    )

    delete_demux_api.delete_flow_cell(
        cg_stats=True,
        demultiplexing_dir=True,
        run_dir=False,
        housekeeper=True,
        init_files=True,
        status_db=False,
    )

    if not demultiplex_api.is_demultiplexing_possible(
            flowcell=flowcell_obj) and not dry_run:
        LOG.warning("Can not start demultiplexing!")
        return

    if not flowcell_obj.validate_sample_sheet():
        LOG.warning(
            "Malformed sample sheet. Run cg demultiplex samplesheet validate %s",
            flowcell_obj.sample_sheet_path,
        )
        raise click.Abort

    slurm_job_id: int = demultiplex_api.start_demultiplexing(
        flowcell=flowcell_obj)
    tb_api: TrailblazerAPI = context.trailblazer_api
    demultiplex_api.add_to_trailblazer(tb_api=tb_api,
                                       slurm_job_id=slurm_job_id,
                                       flowcell=flowcell_obj)
Пример #2
0
def demultiplex_all(context: CGConfig, bcl_converter: str,
                    flowcells_directory: click.Path, dry_run: bool):
    """Demultiplex all flowcells that are ready under the flowcells_directory"""
    LOG.info("Running cg demultiplex all, using %s.", bcl_converter)
    if flowcells_directory:
        flowcells_directory: Path = Path(str(flowcells_directory))
    else:
        flowcells_directory: Path = Path(context.demultiplex.run_dir)
    demultiplex_api: DemultiplexingAPI = context.demultiplex_api
    demultiplex_api.set_dry_run(dry_run=dry_run)
    tb_api: TrailblazerAPI = context.trailblazer_api
    LOG.info("Search for flowcells ready to demultiplex in %s",
             flowcells_directory)
    for sub_dir in flowcells_directory.iterdir():
        if not sub_dir.is_dir():
            continue
        LOG.info("Found directory %s", sub_dir)
        try:
            flowcell_obj = Flowcell(flowcell_path=sub_dir,
                                    bcl_converter=bcl_converter)
        except FlowcellError:
            continue

        if not demultiplex_api.is_demultiplexing_possible(
                flowcell=flowcell_obj) and not dry_run:
            continue

        if not flowcell_obj.validate_sample_sheet():
            LOG.warning(
                "Malformed sample sheet. Run cg demultiplex samplesheet validate %s",
                flowcell_obj.sample_sheet_path,
            )
            continue

        delete_demux_api: DeleteDemuxAPI = DeleteDemuxAPI(
            config=context,
            demultiplex_base=demultiplex_api.out_dir,
            dry_run=dry_run,
            run_path=(flowcells_directory / sub_dir),
        )

        delete_demux_api.delete_flow_cell(
            cg_stats=False,
            demultiplexing_dir=True,
            run_dir=False,
            housekeeper=True,
            init_files=False,
            status_db=False,
        )

        slurm_job_id: int = demultiplex_api.start_demultiplexing(
            flowcell=flowcell_obj)
        demultiplex_api.add_to_trailblazer(tb_api=tb_api,
                                           slurm_job_id=slurm_job_id,
                                           flowcell=flowcell_obj)
Пример #3
0
def fixture_flowcell_working_directory(
        novaseq_dir: Path, flowcell_runs_working_directory: Path) -> Path:
    """Return the path to a working directory that will be deleted after test is run

    This is a path to a flowcell directory with the run parameters present
    """
    working_dir: Path = flowcell_runs_working_directory / novaseq_dir.name
    working_dir.mkdir(parents=True)
    existing_flowcell: Flowcell = Flowcell(flowcell_path=novaseq_dir)
    working_flowcell: Flowcell = Flowcell(flowcell_path=working_dir)
    shutil.copy(str(existing_flowcell.run_parameters_path),
                str(working_flowcell.run_parameters_path))
    return working_dir
def test_demultiplex_flowcell_dry_run(
    cli_runner: testing.CliRunner,
    demultiplex_ready_flowcell: Path,
    demultiplex_context: CGConfig,
    caplog,
):
    caplog.set_level(logging.INFO)

    # GIVEN that all files are present for demultiplexing
    flowcell: Flowcell = Flowcell(demultiplex_ready_flowcell)

    # GIVEN a out dir that does not exist
    demux_api: DemultiplexingAPI = demultiplex_context.demultiplex_api
    assert demux_api.is_demultiplexing_possible(flowcell=flowcell)
    demux_dir: Path = demux_api.flowcell_out_dir_path(flowcell)
    unaligned_dir: Path = demux_dir / "Unaligned"
    assert demux_dir.exists() is False
    assert unaligned_dir.exists() is False

    # WHEN starting demultiplexing from the CLI with dry run flag
    result: testing.Result = cli_runner.invoke(
        demultiplex_flowcell,
        [str(demultiplex_ready_flowcell), "--dry-run"],
        obj=demultiplex_context,
    )

    # THEN assert the command exits without problems
    assert result.exit_code == 0

    # THEN assert no results folder was created since it is run in dry run mode
    assert demux_dir.exists() is False
    assert unaligned_dir.exists() is False
Пример #5
0
    def finish_flowcell(self,
                        flowcell_name: str,
                        bcl_converter: str,
                        force: bool = False) -> None:
        """Go through the post processing steps for a flowcell

        Force is used to finish a flowcell even if the files are renamed already
        """
        LOG.info("Check demuxed flowcell %s", flowcell_name)
        try:
            flowcell: Flowcell = Flowcell(
                flowcell_path=self.demux_api.run_dir / flowcell_name,
                bcl_converter=bcl_converter)
        except FlowcellError:
            return
        if not self.demux_api.is_demultiplexing_completed(flowcell=flowcell):
            LOG.warning("Demultiplex is not ready for %s", flowcell_name)
            return
        demux_results: DemuxResults = DemuxResults(
            demux_dir=self.demux_api.out_dir / flowcell_name,
            flowcell=flowcell,
            bcl_converter=bcl_converter,
        )
        if not demux_results.results_dir.exists():
            LOG.warning("Could not find results directory %s",
                        demux_results.results_dir)
            LOG.info("Can not finish flowcell %s", flowcell_name)
            return
        if demux_results.files_renamed():
            LOG.warning("Flowcell is already finished!")
            if not force:
                return
            LOG.info("Post processing flowcell anyway")
        self.post_process_flowcell(demux_results=demux_results)
def test_create_sample_sheet_no_run_parameters(
    cli_runner: testing.CliRunner,
    flowcell_working_directory_no_run_parameters: Path,
    sample_sheet_context: CGConfig,
    caplog,
    mocker,
):
    # GIVEN a folder with a non existing sample sheet
    flowcell_object: Flowcell = Flowcell(
        flowcell_working_directory_no_run_parameters)
    assert flowcell_object.run_parameters_path.exists() is False
    mocker.patch("cg.cli.demultiplex.sample_sheet.flowcell_samples",
                 return_value=[{
                     "sample": 1
                 }])
    demux_api: DemultiplexingAPI = sample_sheet_context.demultiplex_api
    demux_api.run_dir = flowcell_working_directory_no_run_parameters.parent
    sample_sheet_context.demultiplex_api_ = demux_api

    # WHEN running the create sample sheet command
    result: testing.Result = cli_runner.invoke(
        create_sheet, [flowcell_object.flowcell_full_name],
        obj=sample_sheet_context)

    # THEN assert it exits with a non zero exit code
    assert result.exit_code != 0
    # THEN assert the correct information is communicated
    assert "Could not find run parameters file" in caplog.text
def test_start_demultiplexing_when_already_completed(
    cli_runner: testing.CliRunner,
    demultiplex_ready_flowcell: Path,
    demultiplex_context: CGConfig,
    caplog,
    mocker,
):
    caplog.set_level(logging.DEBUG)

    # GIVEN that all files are present for demultiplexing
    # flowcell: Flowcell = Flowcell(demultiplex_ready_flowcell_dragen, bcl_converter="dragen")
    flowcell: Flowcell = Flowcell(demultiplex_ready_flowcell)
    demux_api: DemultiplexingAPI = demultiplex_context.demultiplex_api

    # GIVEN that demultiplexing has started
    flowcell.demultiplexing_started_path.touch()

    # GIVEN a out dir that exist
    demux_api.flowcell_out_dir_path(flowcell).mkdir(parents=True)

    # GIVEN that demultiplexing is completed
    demux_api.demultiplexing_completed_path(flowcell=flowcell).touch()

    # WHEN starting demultiplexing from the CLI
    result: testing.Result = cli_runner.invoke(
        demultiplex_flowcell,
        [str(demultiplex_ready_flowcell), "-b", "bcl2fastq"],
        obj=demultiplex_context,
    )

    # THEN assert the command exits without problems
    assert result.exit_code == 0

    # THEN assert it was communicated that demultiplexing was completed
    assert f"Demultiplexing is already completed for flowcell {flowcell.flowcell_id}"
def test_create_dragen_sample_sheet(
    cli_runner: testing.CliRunner,
    flowcell_working_directory: Path,
    sample_sheet_context: CGConfig,
    lims_novaseq_dragen_samples: List[LimsFlowcellSampleDragen],
    mocker,
):
    # GIVEN a flowcell directory with some run parameters
    flowcell: Flowcell = Flowcell(flowcell_working_directory,
                                  bcl_converter="dragen")
    assert flowcell.run_parameters_path.exists()
    # GIVEN that there is no sample sheet present
    assert not flowcell.sample_sheet_exists()
    mocker.patch(
        "cg.cli.demultiplex.sample_sheet.flowcell_samples",
        return_value=lims_novaseq_dragen_samples,
    )
    # GIVEN a lims api that returns some samples

    # WHEN creating a sample sheet
    result = cli_runner.invoke(
        create_sheet, [str(flowcell_working_directory), "-b", "dragen"],
        obj=sample_sheet_context)

    # THEN assert it exits with success
    assert result.exit_code == 0
    # THEN assert that the sample sheet was created
    assert flowcell.sample_sheet_exists()
    # THEN assert that the sample sheet is on the correct format
    assert flowcell.validate_sample_sheet()
def test_demultiplex_all(
    cli_runner: testing.CliRunner,
    demultiplex_context: CGConfig,
    demultiplex_ready_flowcell: Path,
    caplog,
    mocker,
):
    caplog.set_level(logging.INFO)

    # GIVEN a context with the path to a directory where at least one flowcell is ready for demux
    demux_api: DemultiplexingAPI = demultiplex_context.demultiplex_api
    flowcell_object: Flowcell = Flowcell(flowcell_path=demultiplex_ready_flowcell)

    assert demux_api.run_dir == demultiplex_ready_flowcell.parent

    # WHEN running the demultiplex all command
    result: testing.Result = cli_runner.invoke(
        demultiplex_all, ["--dry-run"], obj=demultiplex_context
    )

    # THEN assert it exits without problems
    assert result.exit_code == 0

    # THEN assert it found the directory
    assert "Found directory" in caplog.text

    # THEN assert it found a flowcell that is ready for demultiplexing
    assert f"Flowcell {flowcell_object.flowcell_id} is ready for demultiplexing" in caplog.text
Пример #10
0
def fixture_demultiplex_ready_flowcell(flowcell_working_directory: Path,
                                       novaseq_dir: Path) -> Path:
    """Return the path to a working directory that is ready for demultiplexing

    This is a path to a flowcell directory with all the files necessary to start demultiplexing present
    """
    existing_flowcell: Flowcell = Flowcell(flowcell_path=novaseq_dir)
    working_flowcell: Flowcell = Flowcell(
        flowcell_path=flowcell_working_directory)
    shutil.copy(str(existing_flowcell.sample_sheet_path),
                str(working_flowcell.sample_sheet_path))
    shutil.copy(
        str(DemultiplexingAPI.get_stderr_logfile(existing_flowcell)),
        str(DemultiplexingAPI.get_stderr_logfile(working_flowcell)),
    )
    working_flowcell.copy_complete_path.touch()
    working_flowcell.rta_complete_path.touch()
    return flowcell_working_directory
Пример #11
0
def create_all_sheets(context: CGConfig, bcl_converter: str, dry_run: bool):
    """Command to create sample sheets for all flowcells that lack a sample sheet

    Search flowcell directories for run parameters and create a sample sheets based on the
    information
    """
    demux_api: DemultiplexingAPI = context.demultiplex_api
    flowcells: Path = demux_api.run_dir
    for sub_dir in flowcells.iterdir():
        if not sub_dir.is_dir():
            continue
        LOG.info("Found directory %s", sub_dir)
        try:
            flowcell_object = Flowcell(flowcell_path=sub_dir, bcl_converter=bcl_converter)
        except FlowcellError:
            continue
        if flowcell_object.sample_sheet_exists():
            LOG.info("Sample sheet already exists")
            continue
        LOG.info("Creating sample sheet for flowcell %s", flowcell_object.flowcell_id)
        lims_samples: List[LimsFlowcellSample] = list(
            flowcell_samples(
                lims=context.lims_api,
                flowcell_id=flowcell_object.flowcell_id,
                bcl_converter=bcl_converter,
            )
        )
        if not lims_samples:
            LOG.warning("Could not find any samples in lims for %s", flowcell_object.flowcell_id)
            continue

        try:
            sample_sheet: str = create_sample_sheet(
                flowcell=flowcell_object, lims_samples=lims_samples, bcl_converter=bcl_converter
            )
        except (FileNotFoundError, FileExistsError):
            continue

        if dry_run:
            click.echo(sample_sheet)
            return
        LOG.info("Writing sample sheet to %s", flowcell_object.sample_sheet_path.resolve())
        with open(flowcell_object.sample_sheet_path, "w") as outfile:
            outfile.write(sample_sheet)
Пример #12
0
def test_flowcell_id(flowcell_path: Path):
    # GIVEN the path to a finished flowcell run
    # GIVEN the flowcell id
    flowcell_id: str = flowcell_path.name.split("_")[-1][1:]

    # WHEN instantiating a flowcell object
    flowcell_obj = Flowcell(flowcell_path)

    # THEN assert that the flowcell flowcell id is correcly parsed
    assert flowcell_obj.flowcell_id == flowcell_id
Пример #13
0
def test_flowcell_position(flowcell_path: Path):
    # GIVEN the path to a finished flowcell
    # GIVEN a flowcell object
    flowcell_obj = Flowcell(flowcell_path)

    # WHEN fetching the flowcell position
    position = flowcell_obj.flowcell_position

    # THEN assert it is A or B
    assert position in ["A", "B"]
Пример #14
0
def test_get_run_parameters_when_non_existing(fixtures_dir: Path):
    # GIVEN a flowcell object with a directory without run parameters
    flowcell_path: Path = (fixtures_dir / "apps" / "demultiplexing" /
                           "demultiplexed-runs" /
                           "201203_A00689_0200_AHVKJCDRXX")
    flowcell = Flowcell(flowcell_path=flowcell_path)
    assert flowcell.run_parameters_path.exists() is False

    # WHEN fetching the run parameters object
    with pytest.raises(FileNotFoundError):
        # THEN assert that a FileNotFound error is raised
        flowcell.run_parameters_object
Пример #15
0
def create_report_cmd(context: CGConfig, flowcell_name: str):
    """Generate a demux report and print to stdout"""
    LOG.info("Check demuxed flowcell %s", flowcell_name)
    demux_api: DemultiplexingAPI = context.demultiplex_api
    try:
        flowcell: Flowcell = Flowcell(flowcell_path=demux_api.run_dir / flowcell_name)
    except FlowcellError:
        raise click.Abort
    demux_results: DemuxResults = DemuxResults(
        demux_dir=demux_api.out_dir / flowcell_name, flowcell=flowcell
    )
    conversion_stats: Path = demux_results.conversion_stats_path
    if not conversion_stats.exists():
        LOG.warning("Could not find conversion stats file %s", conversion_stats)
        raise click.Abort
    report = create_demux_report(
        conversion_stats=ConversionStats(demux_results.conversion_stats_path)
    )
    click.echo("\n".join(report))
Пример #16
0
def create_sheet(context: CGConfig, flowcell_name: str, bcl_converter: str, dry_run: bool):
    """Command to create a sample sheet
    flowcell-name is the flowcell run directory name, e.g. '201203_A00689_0200_AHVKJCDRXX'

    Search the flowcell in the directory specified in config
    """

    LOG.info("Creating sample sheet for flowcell %s", flowcell_name)
    demultiplex_api: DemultiplexingAPI = context.demultiplex_api
    flowcell_path: Path = demultiplex_api.run_dir / flowcell_name
    if not flowcell_path.exists():
        LOG.warning("Could not find flowcell %s", flowcell_path)
        raise click.Abort
    try:
        flowcell_object = Flowcell(flowcell_path=flowcell_path, bcl_converter=bcl_converter)
    except FlowcellError:
        raise click.Abort
    lims_samples: List[Union[LimsFlowcellSampleBcl2Fastq, LimsFlowcellSampleDragen]] = list(
        flowcell_samples(
            lims=context.lims_api,
            flowcell_id=flowcell_object.flowcell_id,
            bcl_converter=bcl_converter,
        )
    )
    if not lims_samples:
        LOG.warning("Could not find any samples in lims for %s", flowcell_object.flowcell_id)
        raise click.Abort

    try:
        sample_sheet: str = create_sample_sheet(
            flowcell=flowcell_object, lims_samples=lims_samples, bcl_converter=bcl_converter
        )
    except (FileNotFoundError, FileExistsError):
        raise click.Abort

    if dry_run:
        click.echo(sample_sheet)
        return
    LOG.info("Writing sample sheet to %s", flowcell_object.sample_sheet_path.resolve())
    with open(flowcell_object.sample_sheet_path, "w") as outfile:
        outfile.write(sample_sheet)
def test_demultiplex_dragen_flowcell(
    cli_runner: testing.CliRunner,
    demultiplex_ready_flowcell_dragen: Path,
    demultiplex_context: CGConfig,
    demultiplexed_flowcells_working_directory: Path,
    caplog,
    mocker,
):
    caplog.set_level(logging.INFO)

    # GIVEN that all files are present for dragen demultiplexing
    flowcell: Flowcell = Flowcell(
        flowcell_path=demultiplex_ready_flowcell_dragen, bcl_converter="dragen"
    )

    # GIVEN a out dir that does not exist
    demux_api: DemultiplexingAPI = demultiplex_context.demultiplex_api
    demux_dir: Path = demux_api.flowcell_out_dir_path(flowcell)
    unaligned_dir: Path = demux_dir / "Unaligned"
    assert demux_api.is_demultiplexing_possible(flowcell=flowcell)
    assert demux_dir.exists() is False
    assert unaligned_dir.exists() is False
    mocker.patch("cg.apps.tb.TrailblazerAPI.add_pending_analysis")

    # WHEN starting demultiplexing from the CLI with dry run flag
    result: testing.Result = cli_runner.invoke(
        demultiplex_flowcell,
        [str(demultiplex_ready_flowcell_dragen), "-b", "dragen"],
        obj=demultiplex_context,
    )

    # THEN assert the command exits without problems
    assert result.exit_code == 0

    # THEN assert the results folder was created
    assert demux_dir.exists()
    assert unaligned_dir.exists()

    # THEN assert that the sbatch script was created
    assert demux_api.demultiplex_sbatch_path(flowcell).exists()
Пример #18
0
def add_flowcell_cmd(context: CGConfig, flowcell_id: str, bcl_converter: str):
    """Add a flowcell to the cgstats database"""
    stats_api: StatsAPI = context.cg_stats_api
    demultiplex_api: DemultiplexingAPI = context.demultiplex_api
    flowcell_run_path: Path = demultiplex_api.run_dir / flowcell_id
    if not flowcell_run_path.exists():
        LOG.warning("Could not find flowcell path %s", flowcell_run_path)
        raise click.Abort
    demux_results_path: Path = demultiplex_api.out_dir / flowcell_id
    if not demux_results_path.exists():
        LOG.warning("Could not find demultiplex result path %s",
                    demux_results_path)
        raise click.Abort
    try:
        flowcell: Flowcell = Flowcell(flowcell_path=flowcell_run_path,
                                      bcl_converter=bcl_converter)
    except FlowcellError:
        raise click.Abort
    demux_results: DemuxResults = DemuxResults(demux_dir=demux_results_path,
                                               flowcell=flowcell,
                                               bcl_converter=bcl_converter)
    create_novaseq_flowcell(manager=stats_api, demux_results=demux_results)
Пример #19
0
 def mock_flowcell(flowcell_full_name: str) -> Flowcell:
     return Flowcell(flowcell_path=Path(flowcell_full_name))
Пример #20
0
def fixture_flowcell_object(demux_run_dir: Path,
                            flowcell_full_name: str) -> Flowcell:
    """Create a flowcell object with flowcell that is demultiplexed"""
    return Flowcell(flowcell_path=demux_run_dir / flowcell_full_name)
Пример #21
0
def fixture_dragen_flow_cell_object(dragen_flow_cell_path: Path) -> Flowcell:
    flow_cell = Flowcell(dragen_flow_cell_path)
    flow_cell.parse_flowcell_name()
    return Flowcell(dragen_flow_cell_path)