def test_orc_datalad_pair_new_submodule(job_spec, dataset, shell): with chpwd(dataset.path): orc = orcs.DataladPairOrchestrator(shell, submission_type="local", job_spec=job_spec) orc.prepare_remote() orc.submit() orc.follow() orc.fetch() # prepare_remote() doesn't fail when a new subdataset is added after # the first run. sub = dataset.create("sub") dataset.save() job_spec["_resolved_command_str"] = "sh -c 'echo a >sub/a'" job_spec["inputs"] = [] job_spec["outputs"] = [] orc = orcs.DataladPairOrchestrator(shell, submission_type="local", job_spec=job_spec) orc.prepare_remote() orc.submit() orc.follow() orc.fetch() assert sub.repo.is_under_annex("a")
def test_orc_datalad_pair_need_follow_parent(job_spec, dataset, shell): # An example of a scenario that fails without DataLad's --follow=parentds with chpwd(dataset.path): dataset.create("sub") dataset.save() job_spec["_resolved_command_str"] = "sh -c 'echo baz >baz'" job_spec["inputs"] = [] job_spec["outputs"] = [] orc0 = orcs.DataladPairOrchestrator(shell, submission_type="local", job_spec=job_spec) orc0.prepare_remote() orc0.submit() orc0.follow() job_spec["_resolved_command_str"] = "sh -c 'echo bar >sub/bar'" output = op.join("sub", "bar") job_spec["outputs"] = [output] orc1 = orcs.DataladPairOrchestrator(shell, submission_type="local", job_spec=job_spec) orc1.prepare_remote() orc1.submit() orc1.follow() orc1.fetch() assert op.exists(output)
def test_orc_datalad_abort_if_dirty(job_spec, dataset, shell): with chpwd(dataset.path): orc0 = orcs.DataladPairOrchestrator( shell, submission_type="local", job_spec=job_spec) # Run one job so that we create the remote repository. orc0.prepare_remote() orc0.submit() orc0.follow() with chpwd(dataset.path): orc1 = orcs.DataladPairOrchestrator( shell, submission_type="local", job_spec=job_spec) create_tree(orc1.working_directory, {"dirty": ""}) with pytest.raises(OrchestratorError) as exc: orc1.prepare_remote() assert "dirty" in str(exc)
def test_orc_datalad_pair_submodule_conflict(caplog, job_spec, dataset, shell): # In this scenario, one job modifies a submodule, and before that change, # another job is launched that modifies the same submodule. This creates a # change that can't be brought in with `datalad update` because, even with # --follow=parentds, the top-level repo still brings in changes from the # remote, whose branch points to the first job. In a diagram, the remote # state is: # # ---- job 1 (branch) # base --| # ---- job 2 (detached) # # On fetch of job 2, we merge the job 2 ref. The `datalad update` call # fails trying to merge in branch. # # If this scenario ends up being common enough, we could consider modifying # `datalad update` to optionally not try to merge the remote state of the # top-level repo. with chpwd(dataset.path): dataset.create("sub") dataset.save() job_spec["_resolved_command_str"] = "sh -c 'echo baz >sub/baz'" job_spec["inputs"] = [] job_spec["outputs"] = [] orc0 = orcs.DataladPairOrchestrator(shell, submission_type="local", job_spec=job_spec) orc0.prepare_remote() orc0.submit() orc0.follow() job_spec["_resolved_command_str"] = "sh -c 'echo bar >sub/bar'" orc1 = orcs.DataladPairOrchestrator(shell, submission_type="local", job_spec=job_spec) orc1.prepare_remote() orc1.submit() orc1.follow() # swallow_logs() won't work here because it hard codes the logger and # the log message being checked is bubbled up by DataLad. caplog.clear() with caplog.at_level(logging.ERROR): orc1.fetch() assert "CONFLICT" in caplog.text assert dataset.repo.call_git(["ls-files", "--unmerged"]).strip()
def test_orc_datalad_abort_if_detached(job_spec, dataset, shell): dataset.repo.checkout("HEAD^{}") with chpwd(dataset.path): orc = orcs.DataladPairOrchestrator( shell, submission_type="local", job_spec=job_spec) with pytest.raises(OrchestratorError): orc.prepare_remote()
def test_orc_datalad_resurrect(job_spec, dataset, shell): for k in ["jobid", "working_directory", "root_directory", "local_directory"]: job_spec[k] = "doesn't matter" job_spec["head"] = "deadbee" with chpwd(dataset.path): orc = orcs.DataladPairOrchestrator( shell, submission_type="local", job_spec=job_spec, resurrection=True) assert orc.head == "deadbee"
def test_orc_datalad_pair(job_spec, dataset, shell): with chpwd(dataset.path): orc = orcs.DataladPairOrchestrator( shell, submission_type="local", job_spec=job_spec) orc.prepare_remote() orc.submit() orc.follow() orc.fetch() # The local fetch variant doesn't currently get the content, so just # check that the file is under annex. assert dataset.repo.is_under_annex("out")
def test_orc_datalad_pair_existing_remote(job_spec, dataset, shell): root_directory = job_spec["root_directory"] dataset.repo.add_remote("localshell", "i-dont-match") with chpwd(dataset.path): orc = orcs.DataladPairOrchestrator(shell, submission_type="local", job_spec=job_spec) # If a remote with the resource name exists, we abort if the # URL doesn't match the expected target... with pytest.raises(OrchestratorError): orc.prepare_remote() # ... and continue if it does. dataset.repo.set_remote_url("localshell", orc.working_directory) orc.prepare_remote()
def test_orc_datalad_pair_submodule(job_spec, dataset, shell): # Smoke test that triggers the failure from gh-499 with chpwd(dataset.path): dataset.create("sub") dataset.save() job_spec["_resolved_command_str"] = "sh -c 'echo foo >sub/foo'" job_spec["inputs"] = [] job_spec["outputs"] = [] orc = orcs.DataladPairOrchestrator(shell, submission_type="local", job_spec=job_spec) orc.prepare_remote() orc.submit() orc.follow() orc.fetch()
def test_orc_datalad_pair_merge_conflict(job_spec, dataset, shell): with chpwd(dataset.path): job_spec["_resolved_command_str"] = "sh -c 'echo baz >baz'" job_spec["inputs"] = [] job_spec["outputs"] = [] orc0 = orcs.DataladPairOrchestrator(shell, submission_type="local", job_spec=job_spec) orc0.prepare_remote() orc0.submit() orc0.follow() # Introduce a conflict. (dataset.pathobj / "baz").write_text("different") dataset.save() with swallow_logs(new_level=logging.WARNING) as logs: orc0.fetch() assert "Failed to merge in changes" in logs.out assert dataset.repo.call_git(["ls-files", "--unmerged"]).strip()