def __init__(self, resource, submission_type, job_spec=None, resurrection=False): external_versions.check("datalad", min_version="0.13") super(DataladOrchestrator, self).__init__( resource, submission_type, job_spec, resurrection=resurrection) from datalad.api import Dataset self.ds = Dataset(".") if not self.ds.id: raise OrchestratorError("orchestrator {} requires a local dataset" .format(self.name)) if self._resurrection: self.head = self.job_spec.get("_head") else: if self.ds.repo.dirty: raise OrchestratorError("Local dataset {} is dirty. " "Save or discard uncommitted changes" .format(self.ds.path)) self._configure_repo() self.head = self.ds.repo.get_hexsha() _datalad_check_container(self.ds, self.job_spec) _datalad_format_command(self.ds, self.job_spec) if isinstance(self.session, SSHSession) and resource.key_filename: # Make the identity file available to 'datalad sshrun' even # if it is not configured in .ssh/config. This is # particularly important for AWS keys. os.environ["DATALAD_SSH_IDENTITYFILE"] = resource.key_filename from datalad import cfg cfg.reload(force=True)
def __call__(module=None, verbose=False, nocapture=False, pdb=False, stop=False): if not module: from pkg_resources import iter_entry_points module = ['datalad'] module.extend(ep.module_name for ep in iter_entry_points('datalad.tests')) module = ensure_list(module) lgr.info('Starting test run for module(s): %s', module) # Exception (traceback) logging is disabled by default. However, as of # now we do test logging output in (too) great detail. Therefore enable # it here, so `datalad-test` doesn't fail by default. # Can be removed whenever the tests don't require it. from datalad import cfg as dlcfg from datalad.tests.utils import patch try: with patch.dict('os.environ', {'DATALAD_LOG_EXC': '1'}): dlcfg.reload() for mod in module: datalad.test(module=mod, verbose=verbose, nocapture=nocapture, pdb=pdb, stop=stop) finally: dlcfg.reload()
def test_plugin_config(path): # baseline behavior, empty datasets on create ds = create(dataset=opj(path, 'ds1')) eq_(sorted(os.listdir(ds.path)), ['.datalad', '.git', '.gitattributes']) # now we configure a plugin to run twice after `create` cfg.add('datalad.create.run-after', 'add_readme filename=after1.txt', where='global') cfg.add('datalad.create.run-after', 'add_readme filename=after2.txt', where='global') # force reload to pick up newly populated .gitconfig cfg.reload(force=True) assert_in('datalad.create.run-after', cfg) # and now we create a dataset and expect the two readme files # to be part of it ds = create(dataset=opj(path, 'ds')) ok_clean_git(ds.path) assert(exists(opj(ds.path, 'after1.txt'))) assert(exists(opj(ds.path, 'after2.txt'))) # cleanup cfg.unset( 'datalad.create.run-after', where='global') assert_not_in('datalad.create.run-after', cfg)
def test_ssh_custom_identity_file(): ifile = "/tmp/dl-test-ssh-id" # Travis if not op.exists(ifile): raise SkipTest("Travis-specific '{}' identity file does not exist" .format(ifile)) from datalad import cfg try: with patch.dict("os.environ", {"DATALAD_SSH_IDENTITYFILE": ifile}): cfg.reload(force=True) with swallow_logs(new_level=logging.DEBUG) as cml: manager = SSHManager() ssh = manager.get_connection('ssh://localhost') cmd_out, _ = ssh("echo blah") expected_socket = op.join( str(manager.socket_dir), get_connection_hash("localhost", identity_file=ifile, bundled=True)) ok_(exists(expected_socket)) manager.close() assert_in("-i", cml.out) assert_in(ifile, cml.out) finally: # Prevent overridden DATALAD_SSH_IDENTITYFILE from lingering. cfg.reload(force=True)
def test_ssh_custom_identity_file(): ifile = "/tmp/dl-test-ssh-id" # Travis if not op.exists(ifile): raise SkipTest("Travis-specific '{}' identity file does not exist" .format(ifile)) from datalad import cfg try: with patch.dict("os.environ", {"DATALAD_SSH_IDENTITYFILE": ifile}): cfg.reload(force=True) with swallow_logs(new_level=logging.DEBUG) as cml: manager = SSHManager() ssh = manager.get_connection('ssh://localhost') cmd_out, _ = ssh("echo blah") expected_socket = op.join( text_type(manager.socket_dir), get_connection_hash("localhost", identity_file=ifile, bundled=True)) ok_(exists(expected_socket)) manager.close() assert_in("-i", cml.out) assert_in(ifile, cml.out) finally: # Prevent overridden DATALAD_SSH_IDENTITYFILE from lingering. cfg.reload(force=True)
def check_integration1(login, keyring, path, organization=None, kwargs={}, oauthtokens=None): kwargs = kwargs.copy() if organization: kwargs['github_organization'] = organization ds = Dataset(path).create() if oauthtokens: for oauthtoken in assure_list(oauthtokens): ds.config.add('hub.oauthtoken', oauthtoken, where='local') # so we do not pick up local repo configuration/token repo_name = 'test_integration1' with chpwd(path): # ATM all the github goodness does not care about "this dataset" # so force "process wide" cfg to pick up our defined above oauthtoken cfg.reload(force=True) # everything works just nice, no conflicts etc res = ds.create_sibling_github(repo_name, **kwargs) if organization: url_fmt = 'https://{login}@github.com/{organization}/{repo_name}.git' else: url_fmt = 'https://github.com/{login}/{repo_name}.git' eq_(res, [(ds, url_fmt.format(**locals()), False)]) # but if we rerun - should kaboom since already has this sibling: with assert_raises(ValueError) as cme: ds.create_sibling_github(repo_name, **kwargs) assert_in("already has a configured sibling", str(cme.exception)) # but we can give it a new name, but it should kaboom since the remote one # exists already with assert_raises(ValueError) as cme: ds.create_sibling_github(repo_name, name="github2", **kwargs) assert_in("already exists on", str(cme.exception)) # we should not leave the broken sibling behind assert_not_in('github2', ds.repo.get_remotes()) # If we ask to reconfigure - should proceed normally ds.create_sibling_github(repo_name, existing='reconfigure', **kwargs) cfg.reload(force=True)
def test_CapturedException(): try: raise Exception("BOOM") except Exception as e: captured_exc = CapturedException(e) assert_re_in( r"BOOM \[test_captured_exception.py:test_CapturedException:[0-9]+\]", captured_exc.format_oneline_tb()) assert_re_in( r"^\[.*\]", captured_exc.format_oneline_tb(include_str=False)) # only traceback try: raise NotImplementedError except Exception as e: captured_exc = CapturedException(e) assert_re_in( r"NotImplementedError \[test_captured_exception.py:test_CapturedException:[0-9]+\]", captured_exc.format_oneline_tb()) def f(): def f2(): raise Exception("my bad again") try: f2() except Exception as e: # exception chain raise RuntimeError("new message") from e try: f() except Exception as e: captured_exc = CapturedException(e) # default limit: one level: estr1 = captured_exc.format_oneline_tb(limit=1) estr2 = captured_exc.format_oneline_tb(limit=2) # and we can control it via environ/config by default try: with patch.dict('os.environ', {'DATALAD_EXC_STR_TBLIMIT': '3'}): cfg.reload() estr3 = captured_exc.format_oneline_tb() with patch.dict('os.environ', {}, clear=True): cfg.reload() estr_ = captured_exc.format_oneline_tb() finally: cfg.reload() # make sure we don't have a side effect on other tests estr_full = captured_exc.format_oneline_tb(10) assert_re_in( r"new message \[test_captured_exception.py:test_CapturedException:[0-9]+,test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f2:[0-9]+\]", estr_full) assert_re_in( r"new message \[test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f2:[0-9]+\]", estr3) assert_re_in( r"new message \[test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f2:[0-9]+\]", estr2) assert_re_in(r"new message \[test_captured_exception.py:f2:[0-9]+\]", estr1) # default: no limit: assert_equal(estr_, estr_full) # standard output full_display = captured_exc.format_standard().splitlines() assert_equal(full_display[0], "Traceback (most recent call last):") # points in f and f2 for first exception with two lines each # (where is the line and what reads the line): assert_true(full_display[1].lstrip().startswith("File")) assert_equal(full_display[2].strip(), "f2()") assert_true(full_display[3].lstrip().startswith("File")) assert_equal(full_display[4].strip(), "raise Exception(\"my bad again\")") assert_equal(full_display[5].strip(), "Exception: my bad again") assert_equal( full_display[7].strip(), "The above exception was the direct cause of the following exception:") assert_equal(full_display[9], "Traceback (most recent call last):") # ... assert_equal(full_display[-1].strip(), "RuntimeError: new message") # CapturedException.__repr__: assert_re_in(r".*test_captured_exception.py:f2:[0-9]+\]$", captured_exc.__repr__())
def prepare_remote(self): """Prepare dataset sibling on remote. """ if not self.ds.repo.get_active_branch(): # publish() fails when HEAD is detached. raise OrchestratorError( "You must be on a branch to use the {} orchestrator".format( self.name)) if not self.session.exists(self.root_directory): self.session.mkdir(self.root_directory, parents=True) resource = self.resource session = self.session inputs = list(self.get_inputs()) if isinstance(session, SSHSession): if resource.key_filename: dl_version = external_versions["datalad"] if dl_version < "0.11.3": # Connecting will probably fail because `key_filename` is # set, but we have no way to tell DataLad about it. lgr.warning( "DataLad version %s detected. " "0.11.3 or greater is required to use an " "identity file not specified in ~/.ssh/config", dl_version) # Make the identity file available to 'datalad sshrun' even if # it is not configured in .ssh/config. This is particularly # important for AWS keys. os.environ["DATALAD_SSH_IDENTITYFILE"] = resource.key_filename from datalad import cfg cfg.reload(force=True) sshurl = _format_ssh_url( resource.user, # AWS resource does not have host attribute. getattr(resource, "host", None) or session.connection.host, getattr(resource, "port", None), self.working_directory) # TODO: Add one level deeper with reckless clone per job to deal # with concurrent jobs? if not session.exists(self.working_directory): remotes = self.ds.repo.get_remotes() if resource.name in remotes: raise OrchestratorError( "Remote '{}' unexpectedly exists. " "Either delete remote or rename resource.".format( resource.name)) self.ds.create_sibling(sshurl, name=resource.name, recursive=True) since = None # Avoid since="" for non-existing repo. else: remote_branch = "{}/{}".format( resource.name, self.ds.repo.get_active_branch()) if self.ds.repo.commit_exists(remote_branch): since = "" else: # If the remote branch doesn't exist yet, publish will fail # with since="". since = None from datalad.support.exceptions import IncompleteResultsError try: self.ds.publish(to=resource.name, since=since, recursive=True) except IncompleteResultsError: raise OrchestratorError( "'datalad publish' failed. Try running " "'datalad update -s {} --merge --recursive' first".format( resource.name)) self._fix_up_dataset() if inputs: lgr.info("Making inputs available") try: # TODO: Whether we try this `get` should be configurable. self._execute_in_wdir("datalad get {}".format( # FIXME: This should use something like # execute_command_batch. " ".join(map(shlex_quote, inputs)))) except OrchestratorError: # Should use --since for existing repo, but it doesn't seem # to sync wrt content. self.ds.publish(to=resource.name, path=inputs, recursive=True) elif resource.type == "shell": import datalad.api as dl if not session.exists(self.working_directory): dl.install(self.working_directory, source=self.ds.path) self.session.execute_command("git push '{}' HEAD:{}-base".format( self.working_directory, self.job_refname)) self._checkout_target() if inputs: installed_ds = dl.Dataset(self.working_directory) installed_ds.get(inputs) else: # TODO: Handle more types? raise OrchestratorError("Unsupported resource type {}".format( resource.type)) if not session.exists(self.meta_directory): session.mkdir(self.meta_directory, parents=True)
def prepare_remote(self): """Prepare dataset sibling on remote. """ repo = self.ds.repo if not repo.get_active_branch(): # publish() fails when HEAD is detached. raise OrchestratorError( "You must be on a branch to use the {} orchestrator" .format(self.name)) if not self.session.exists(self.root_directory): self.session.mkdir(self.root_directory, parents=True) resource = self.resource session = self.session inputs = list(self.get_inputs()) if isinstance(session, (SSHSession, ShellSession)): if isinstance(session, SSHSession): if resource.key_filename: # Make the identity file available to 'datalad sshrun' even # if it is not configured in .ssh/config. This is # particularly important for AWS keys. os.environ["DATALAD_SSH_IDENTITYFILE"] = resource.key_filename from datalad import cfg cfg.reload(force=True) target_path = _format_ssh_url( resource.user, # AWS resource does not have host attribute. getattr(resource, "host", None) or session.connection.host, getattr(resource, "port", None), self.working_directory) else: target_path = self.working_directory # TODO: Add one level deeper with reckless clone per job to deal # with concurrent jobs? target_exists = session.exists(self.working_directory) if not target_exists: since = None # Avoid since="" for non-existing repo. else: remote_branch = "{}/{}".format( resource.name, repo.get_active_branch()) if repo.commit_exists(remote_branch): since = "" else: # If the remote branch doesn't exist yet, publish will fail # with since="". since = None remotes = repo.get_remotes() if resource.name in remotes: if repo.get_remote_url(resource.name) != target_path: raise OrchestratorError( "Remote '{}' already exists with another URL. " "Either delete remote or rename resource." .format(resource.name)) elif not target_exists: lgr.debug( "Remote '%s' matches resource name " "and points to the expected target, " "which doesn't exist. " "Removing remote and recreating", resource.name) repo.remove_remote(resource.name) self.ds.create_sibling(target_path, name=resource.name, recursive=True, existing="skip") call_check_dl_results( self.ds.publish, "'datalad publish' failed", to=resource.name, since=since, recursive=True, on_failure="ignore") self._fix_up_dataset() if inputs: lgr.info("Making inputs available") try: # TODO: Whether we try this `get` should be configurable. self._execute_in_wdir("datalad get {}".format( # FIXME: This should use something like # execute_command_batch. " ".join(map(shlex_quote, inputs)))) except OrchestratorError: # Should use --since for existing repo, but it doesn't seem # to sync wrt content. self.ds.publish(to=resource.name, path=inputs, recursive=True) else: # TODO: Handle more types? raise OrchestratorError("Unsupported resource type {}" .format(resource.type)) if not session.exists(self.meta_directory): session.mkdir(self.meta_directory, parents=True)