def test_checkpoints_are_pipeline_unique(tmpdir): """ Names of checkpoint files depend on both stage and pipeline. """ # Note: conceptually, this tests an underlying mechanistic aspect of the # checkpointing system. # Create two different pipelines. align_reads = get_read_aligner(tmpdir.strpath) call_peaks = get_peak_caller(tmpdir.strpath) # Get the stage names associated with each pipeline. alignment_stage_names = set(map(lambda s: s.name, align_reads.stages())) peak_call_stage_names = set(map(lambda s: s.name, call_peaks.stages())) # Check that we have one specific stage name shared between the pipelines. assert {"align_reads"} == alignment_stage_names & peak_call_stage_names assert align_reads.outfolder == call_peaks.outfolder # We begin with no checkpoint files. assert [] == list(fetch_checkpoint_files(align_reads.manager)) assert [] == list(fetch_checkpoint_files(call_peaks.manager)) # Run each pipeline. align_reads.run() call_peaks.run() # We expect a different checkpoint file for each stage of each pipeline. align_reads_expected = { checkpoint_filepath(s.name, align_reads) for s in align_reads.stages() } call_peaks_expected = { checkpoint_filepath(s.name, call_peaks) for s in call_peaks.stages() } # Pipeline names are unique here, and each checkpoint name includes # pipeline name for disambiguation, so even a pair of pipelines with a # nonempty stage name intersection has an empty checkpoint filenames # intersection, so long as the pipeline names are unique. assert set() == (align_reads_expected & call_peaks_expected) # When not setting start/stop parameters and beginning with no checkpoint # files in place, each pipeline generates its full set of checkpoint files. expected_checkpoints = align_reads_expected | call_peaks_expected observed_checkpoints = set(fetch_checkpoint_files(align_reads)) | \ set(fetch_checkpoint_files(call_peaks)) # Verify satisfaction of expectation. try: assert expected_checkpoints == observed_checkpoints except AssertionError: only_exp = expected_checkpoints - observed_checkpoints exp_and_obs = expected_checkpoints & observed_checkpoints only_obs = observed_checkpoints - expected_checkpoints print("Only in expected:\n{}".format("\n".join(only_exp))) print("Expected and observed:\n{}".format("\n".join(exp_and_obs))) print("Only in observed:\n{}".format("\n".join(only_obs))) raise
def test_pipeline_checkpoint_sensitivity_effect_perspective(pl_name, tmpdir): """ The pipeline skips execution of stages with extant checkpoint. """ # Create the pipeline, then check creation of output file. pipeline = get_pipeline(pl_name, tmpdir.strpath) output_file = os.path.join(pipeline.outfolder, pipeline.name_output_file) assert not os.path.exists(output_file) pipeline.run() assert os.path.isfile(output_file) # Validate pipeline effects (output file content). with open(output_file, 'r') as f: lines = f.readlines() assert [s.name + os.linesep for s in pipeline.stages()] == lines # Verify presence of checkpoint files to support our expectation about # which stages should be skipped and which should be run during the second # time through the pipeline's execution. exp_cp_fpaths = set( checkpoint_filepath(s.name, pipeline.manager) for s in pipeline.stages()) assert exp_cp_fpaths == set(fetch_checkpoint_files(pipeline.manager)) final_stage = pipeline.stages()[-1] final_stage_fpath = checkpoint_filepath(final_stage.name, pipeline.manager) os.unlink(final_stage_fpath) # Verify the effect of the second execution of the pipeline. pipeline.run() with open(output_file, 'r') as f: lines = f.readlines() assert [final_stage.name + os.linesep] == lines
def test_two_retrospective_checkpointed_timestamps(self, test_type, stage_pair, pm): """ Retrospective timestamp generates file for current checkpoint. """ stage1, stage2 = stage_pair pm.timestamp(checkpoint=stage1, finished=True) pm.timestamp(checkpoint=stage2, finished=True) if test_type == FILES_TEST: checkpoint_files = fetch_checkpoint_files(pm) expected = [checkpoint_filepath(s, pm) for s in [stage1, stage2]] assert set(expected) == set(checkpoint_files) else: assert stage2 == pm.prev_checkpoint assert pm.curr_checkpoint is None
def test_two_prospective_checkpointed_timestamps(self, test_type, stage_pair, pm): """ Prospective timestamp generates file for previous checkpoint. """ stage1, stage2 = stage_pair pm.timestamp(checkpoint=stage1, finished=False) pm.timestamp(checkpoint=stage2, finished=False) if test_type == FILES_TEST: checkpoint_files = fetch_checkpoint_files(pm) expected = [checkpoint_filepath(stage1, pm)] assert set(expected) == set(checkpoint_files) else: assert stage1 == pm.prev_checkpoint assert stage2 == pm.curr_checkpoint
def test_retrospective_the_prospective_checkpointed_timestamps( self, test_type, stage_pair, pm): """ Test retrospective timestamp followed by prospective one. """ stage1, stage2 = stage_pair pm.timestamp(checkpoint=stage1, finished=True) assert stage1 == pm.prev_checkpoint assert pm.curr_checkpoint is None pm.timestamp(checkpoint=stage2, finished=False) if test_type == FILES_TEST: expected = [checkpoint_filepath(stage1, pm)] assert set(expected) == set(fetch_checkpoint_files(pm)) else: assert pm.prev_checkpoint is None assert stage2 == pm.curr_checkpoint
def test_prospective_then_retrospective_checkpointed_timestamps( self, test_type, stage_pair, pm): """ If a prospective checkpointed timestamp is followed by a retrospective one, there's only a file for the retrospective one. """ stage1, stage2 = stage_pair pm.timestamp(checkpoint=stage1, finished=False) assert stage1 == pm.curr_checkpoint pm.timestamp(checkpoint=stage2, finished=True) if test_type == FILES_TEST: checkpoint_files = fetch_checkpoint_files(pm) expected = [checkpoint_filepath(stage2, pm)] assert set(expected) == set(checkpoint_files) else: # Current checkpoint will be reset by second (retrospective) # timestamp call. assert stage2 == pm.prev_checkpoint assert pm.curr_checkpoint is None
def test_pipeline_checkpoint_respect_sensitivity_checkpoint_perspective( pl_name, tmpdir): """ Pipeline can skip past its stage(s) for which checkpoint exists. """ # Create the pipeline. pipeline = get_pipeline(pl_name, tmpdir.strpath) # Negative control to start test, that we have no checkpoint files. assert [] == fetch_checkpoint_files(pipeline.manager) # Generate some checkpoints. pipeline.run() # Verify that we created each of the checkpoints. expected = [ checkpoint_filepath(f.__name__, pipeline.manager) for f in pipeline.functions ] observed = fetch_checkpoint_files(pipeline.manager) assert set(expected) == set(observed) # Collect checkpoint file timestamps for comparison after second run. timestamps = {f: os.path.getmtime(f) for f in observed} # Remove the checkpoint for the final stage. last_aligner_stage = pipeline.functions[-1] last_aligner_checkfile = checkpoint_filepath(last_aligner_stage, pipeline.manager) os.unlink(last_aligner_checkfile) # Verify removal of final stage checkpoint file. assert all([os.path.isfile(f) for f in expected[:-1]]) assert not os.path.exists(last_aligner_checkfile) assert set(expected) != set(fetch_checkpoint_files(pipeline.manager)) # Delay briefly so that we can more reliably compare checkpoint file # timestamps after a second pipeline run. time.sleep(0.05) # Repeat the pipeline's execution, but now with checkpoint file(s) for a # subset of its stages in place. pipeline.run() # Verify that we've restored the full collection of the pipeline's # checkpoint files to existence. observed = fetch_checkpoint_files(pipeline.manager) exp = set(expected) obs = set(observed) assert set(expected) == set(observed), \ "Expected only:\n{}\nExpected and observed:\n{}\nObserved only:\n{}".format( exp - obs, exp & obs, obs - exp) # Verify the we didn't recreate the checkpoint file for each skipped stage. for f in expected[:-1]: expected_timestamp = timestamps[f] observed_timestamp = os.path.getmtime(f) assert expected_timestamp == observed_timestamp # Verify the we did in fact recreate the checkpoint file for the stage # that was rerun. assert os.path.getmtime(last_aligner_checkfile) > \ timestamps[last_aligner_checkfile], \ "Recreated checkpoint file ('{}') should be newer than original".\ format(last_aligner_checkfile)