示例#1
0
def test_checkpoints_are_pipeline_unique(tmpdir):
    """ Names of checkpoint files depend on both stage and pipeline. """

    # Note: conceptually, this tests an underlying mechanistic aspect of the
    # checkpointing system.

    # Create two different pipelines.
    align_reads = get_read_aligner(tmpdir.strpath)
    call_peaks = get_peak_caller(tmpdir.strpath)

    # Get the stage names associated with each pipeline.
    alignment_stage_names = set(map(lambda s: s.name, align_reads.stages()))
    peak_call_stage_names = set(map(lambda s: s.name, call_peaks.stages()))

    # Check that we have one specific stage name shared between the pipelines.
    assert {"align_reads"} == alignment_stage_names & peak_call_stage_names
    assert align_reads.outfolder == call_peaks.outfolder

    # We begin with no checkpoint files.
    assert [] == list(fetch_checkpoint_files(align_reads.manager))
    assert [] == list(fetch_checkpoint_files(call_peaks.manager))

    # Run each pipeline.
    align_reads.run()
    call_peaks.run()

    # We expect a different checkpoint file for each stage of each pipeline.
    align_reads_expected = {
        checkpoint_filepath(s.name, align_reads)
        for s in align_reads.stages()
    }
    call_peaks_expected = {
        checkpoint_filepath(s.name, call_peaks)
        for s in call_peaks.stages()
    }

    # Pipeline names are unique here, and each checkpoint name includes
    # pipeline name for disambiguation, so even a pair of pipelines with a
    # nonempty stage name intersection has an empty checkpoint filenames
    # intersection, so long as the pipeline names are unique.
    assert set() == (align_reads_expected & call_peaks_expected)

    # When not setting start/stop parameters and beginning with no checkpoint
    # files in place, each pipeline generates its full set of checkpoint files.
    expected_checkpoints = align_reads_expected | call_peaks_expected
    observed_checkpoints = set(fetch_checkpoint_files(align_reads)) | \
                           set(fetch_checkpoint_files(call_peaks))

    # Verify satisfaction of expectation.
    try:
        assert expected_checkpoints == observed_checkpoints
    except AssertionError:
        only_exp = expected_checkpoints - observed_checkpoints
        exp_and_obs = expected_checkpoints & observed_checkpoints
        only_obs = observed_checkpoints - expected_checkpoints
        print("Only in expected:\n{}".format("\n".join(only_exp)))
        print("Expected and observed:\n{}".format("\n".join(exp_and_obs)))
        print("Only in observed:\n{}".format("\n".join(only_obs)))
        raise
示例#2
0
def test_pipeline_checkpoint_sensitivity_effect_perspective(pl_name, tmpdir):
    """ The pipeline skips execution of stages with extant checkpoint. """

    # Create the pipeline, then check creation of output file.
    pipeline = get_pipeline(pl_name, tmpdir.strpath)
    output_file = os.path.join(pipeline.outfolder, pipeline.name_output_file)
    assert not os.path.exists(output_file)
    pipeline.run()
    assert os.path.isfile(output_file)

    # Validate pipeline effects (output file content).
    with open(output_file, 'r') as f:
        lines = f.readlines()
    assert [s.name + os.linesep for s in pipeline.stages()] == lines

    # Verify presence of checkpoint files to support our expectation about
    # which stages should be skipped and which should be run during the second
    # time through the pipeline's execution.
    exp_cp_fpaths = set(
        checkpoint_filepath(s.name, pipeline.manager)
        for s in pipeline.stages())
    assert exp_cp_fpaths == set(fetch_checkpoint_files(pipeline.manager))
    final_stage = pipeline.stages()[-1]
    final_stage_fpath = checkpoint_filepath(final_stage.name, pipeline.manager)
    os.unlink(final_stage_fpath)

    # Verify the effect of the second execution of the pipeline.
    pipeline.run()
    with open(output_file, 'r') as f:
        lines = f.readlines()
    assert [final_stage.name + os.linesep] == lines
示例#3
0
    def test_two_retrospective_checkpointed_timestamps(self, test_type,
                                                       stage_pair, pm):
        """ Retrospective timestamp generates file for current checkpoint. """

        stage1, stage2 = stage_pair
        pm.timestamp(checkpoint=stage1, finished=True)
        pm.timestamp(checkpoint=stage2, finished=True)

        if test_type == FILES_TEST:
            checkpoint_files = fetch_checkpoint_files(pm)
            expected = [checkpoint_filepath(s, pm) for s in [stage1, stage2]]
            assert set(expected) == set(checkpoint_files)
        else:
            assert stage2 == pm.prev_checkpoint
            assert pm.curr_checkpoint is None
示例#4
0
    def test_two_prospective_checkpointed_timestamps(self, test_type,
                                                     stage_pair, pm):
        """ Prospective timestamp generates file for previous checkpoint. """

        stage1, stage2 = stage_pair
        pm.timestamp(checkpoint=stage1, finished=False)
        pm.timestamp(checkpoint=stage2, finished=False)

        if test_type == FILES_TEST:
            checkpoint_files = fetch_checkpoint_files(pm)
            expected = [checkpoint_filepath(stage1, pm)]
            assert set(expected) == set(checkpoint_files)
        else:
            assert stage1 == pm.prev_checkpoint
            assert stage2 == pm.curr_checkpoint
示例#5
0
    def test_retrospective_the_prospective_checkpointed_timestamps(
            self, test_type, stage_pair, pm):
        """ Test retrospective timestamp followed by prospective one. """

        stage1, stage2 = stage_pair
        pm.timestamp(checkpoint=stage1, finished=True)
        assert stage1 == pm.prev_checkpoint
        assert pm.curr_checkpoint is None
        pm.timestamp(checkpoint=stage2, finished=False)

        if test_type == FILES_TEST:
            expected = [checkpoint_filepath(stage1, pm)]
            assert set(expected) == set(fetch_checkpoint_files(pm))
        else:
            assert pm.prev_checkpoint is None
            assert stage2 == pm.curr_checkpoint
示例#6
0
    def test_prospective_then_retrospective_checkpointed_timestamps(
            self, test_type, stage_pair, pm):
        """ If a prospective checkpointed timestamp is followed by a
        retrospective one, there's only a file for the retrospective one. """

        stage1, stage2 = stage_pair
        pm.timestamp(checkpoint=stage1, finished=False)
        assert stage1 == pm.curr_checkpoint
        pm.timestamp(checkpoint=stage2, finished=True)

        if test_type == FILES_TEST:
            checkpoint_files = fetch_checkpoint_files(pm)
            expected = [checkpoint_filepath(stage2, pm)]
            assert set(expected) == set(checkpoint_files)
        else:
            # Current checkpoint will be reset by second (retrospective)
            # timestamp call.
            assert stage2 == pm.prev_checkpoint
            assert pm.curr_checkpoint is None
示例#7
0
def test_pipeline_checkpoint_respect_sensitivity_checkpoint_perspective(
        pl_name, tmpdir):
    """ Pipeline can skip past its stage(s) for which checkpoint exists. """

    # Create the pipeline.
    pipeline = get_pipeline(pl_name, tmpdir.strpath)

    # Negative control to start test, that we have no checkpoint files.
    assert [] == fetch_checkpoint_files(pipeline.manager)

    # Generate some checkpoints.
    pipeline.run()

    # Verify that we created each of the checkpoints.
    expected = [
        checkpoint_filepath(f.__name__, pipeline.manager)
        for f in pipeline.functions
    ]
    observed = fetch_checkpoint_files(pipeline.manager)
    assert set(expected) == set(observed)

    # Collect checkpoint file timestamps for comparison after second run.
    timestamps = {f: os.path.getmtime(f) for f in observed}

    # Remove the checkpoint for the final stage.
    last_aligner_stage = pipeline.functions[-1]
    last_aligner_checkfile = checkpoint_filepath(last_aligner_stage,
                                                 pipeline.manager)
    os.unlink(last_aligner_checkfile)

    # Verify removal of final stage checkpoint file.
    assert all([os.path.isfile(f) for f in expected[:-1]])
    assert not os.path.exists(last_aligner_checkfile)
    assert set(expected) != set(fetch_checkpoint_files(pipeline.manager))

    # Delay briefly so that we can more reliably compare checkpoint file
    # timestamps after a second pipeline run.
    time.sleep(0.05)

    # Repeat the pipeline's execution, but now with checkpoint file(s) for a
    # subset of its stages in place.
    pipeline.run()

    # Verify that we've restored the full collection of the pipeline's
    # checkpoint files to existence.
    observed = fetch_checkpoint_files(pipeline.manager)
    exp = set(expected)
    obs = set(observed)
    assert set(expected) == set(observed), \
            "Expected only:\n{}\nExpected and observed:\n{}\nObserved only:\n{}".format(
                    exp - obs, exp & obs, obs - exp)

    # Verify the we didn't recreate the checkpoint file for each skipped stage.
    for f in expected[:-1]:
        expected_timestamp = timestamps[f]
        observed_timestamp = os.path.getmtime(f)
        assert expected_timestamp == observed_timestamp

    # Verify the we did in fact recreate the checkpoint file for the stage
    # that was rerun.
    assert os.path.getmtime(last_aligner_checkfile) > \
           timestamps[last_aligner_checkfile], \
            "Recreated checkpoint file ('{}') should be newer than original".\
           format(last_aligner_checkfile)