def __init__(self, ctrl_path, sshri): """Create a connection handler The actual opening of the connection is performed on-demand. Parameters ---------- ctrl_path: str path to SSH controlmaster sshri: SSHRI SSH resource identifier (contains all connection-relevant info), or another resource identifier that can be converted into an SSHRI. """ self._runner = None from datalad.support.network import SSHRI, is_ssh if not is_ssh(sshri): raise ValueError( "Non-SSH resource identifiers are not supported for SSH " "connections: {}".format(sshri)) self.sshri = SSHRI(**{k: v for k, v in sshri.fields.items() if k in ('username', 'hostname', 'port')}) self.ctrl_path = ctrl_path self._ctrl_options = ["-o", "ControlPath=\"%s\"" % self.ctrl_path] if self.sshri.port: self._ctrl_options += ['-p', '{}'.format(self.sshri.port)] # essential properties of the remote system self._remote_props = {} self._opened_by_us = False
def _check_and_update_remote_server_info(ds, remote): # if we managed to copy to "http" url we should should try to trigger git # update-server-info hook on the remote if there was ssh annexurl defined # for it. Apparently we do that already in create_sibling ones, but here # we need more checks and preparation remote_url = ds.repo.config.get('remote.%s.url' % remote, None) if remote_url: remote_url = RI(remote_url) if isinstance(remote_url, URL) and remote_url.scheme in ('http', 'https'): remote_annexurl = ds.repo.config.get('remote.%s.annexurl' % remote, None) if remote_annexurl: remote_annexurl_ri = RI(remote_annexurl) if is_ssh(remote_annexurl_ri): ssh = ssh_manager.get_connection(remote_annexurl_ri) ssh('git -C {} update-server-info'.format( sh_quote(remote_annexurl_ri.path))) return True else: lgr.debug( "There is no annexurl defined but not ssh: %s, " "dunno if " "we could/should do anything", remote_annexurl) return False
def get_connection(self, url): """Get a singleton, representing a shared ssh connection to `url` Parameters ---------- url: str ssh url Returns ------- SSHConnection """ # parse url: from datalad.support.network import RI, is_ssh sshri = RI(url) if not is_ssh(sshri): raise ValueError( "Unsupported SSH URL: '{0}', use ssh://host/path or host:path syntax" .format(url)) # determine control master: ctrl_path = "%s/%s" % (self.socket_dir, sshri.hostname) if sshri.port: ctrl_path += ":%s" % sshri.port # do we know it already? if ctrl_path in self._connections: return self._connections[ctrl_path] else: c = SSHConnection(ctrl_path, sshri.hostname) self._connections[ctrl_path] = c return c
def _check_and_update_remote_server_info(ds, remote): # if we managed to copy to "http" url we should should try to trigger git # update-server-info hook on the remote if there was ssh annexurl defined # for it. Apparently we do that already in create_sibling ones, but here # we need more checks and preparation remote_url = ds.repo.config.get('remote.%s.url' % remote, None) if remote_url: remote_url = RI(remote_url) if isinstance(remote_url, URL) and remote_url.scheme in ( 'http', 'https'): remote_annexurl = ds.repo.config.get('remote.%s.annexurl' % remote, None) if remote_annexurl: remote_annexurl_ri = RI(remote_annexurl) if is_ssh(remote_annexurl_ri): ssh = ssh_manager.get_connection(remote_annexurl_ri) ssh('git -C {} update-server-info'.format( sh_quote(remote_annexurl_ri.path))) return True else: lgr.debug( "There is no annexurl defined but not ssh: %s, " "dunno if " "we could/should do anything", remote_annexurl ) return False
def get_connection(self, url): """Get a singleton, representing a shared ssh connection to `url` Parameters ---------- url: str ssh url Returns ------- SSHConnection """ # parse url: from datalad.support.network import RI, is_ssh sshri = RI(url) if not is_ssh(sshri): raise ValueError("Unsupported SSH URL: '{0}', use ssh://host/path or host:path syntax".format(url)) # determine control master: ctrl_path = "%s/%s" % (self.socket_dir, sshri.hostname) if sshri.port: ctrl_path += ":%s" % sshri.port # do we know it already? if ctrl_path in self._connections: return self._connections[ctrl_path] else: c = SSHConnection(ctrl_path, sshri.hostname) self._connections[ctrl_path] = c return c
def __init__(self, sshri, identity_file=None, use_remote_annex_bundle=None, force_ip=False): """Create a connection handler The actual opening of the connection is performed on-demand. Parameters ---------- sshri: SSHRI SSH resource identifier (contains all connection-relevant info), or another resource identifier that can be converted into an SSHRI. identity_file : str or None Value to pass to ssh's -i option. use_remote_annex_bundle : bool, optional If enabled, look for a git-annex installation on the remote and prefer its Git binaries in the search path (i.e. prefer a bundled Git over a system package). See also the configuration setting datalad.ssh.try-use-annex-bundled-git force_ip : {False, 4, 6} Force the use of IPv4 or IPv6 addresses with -4 or -6. .. versionchanged:: 0.16 The default for `use_remote_annex_bundle` changed from `True` to `None`. Instead of attempting to use a potentially available git-annex bundle on the remote host by default, this behavior is now conditional on the `datalad.ssh.try-use-annex-bundled-git` (off by default). """ self._runner = None self._ssh_executable = None from datalad.support.network import SSHRI, is_ssh if not is_ssh(sshri): raise ValueError( "Non-SSH resource identifiers are not supported for SSH " "connections: {}".format(sshri)) self.sshri = SSHRI( **{ k: v for k, v in sshri.fields.items() if k in ('username', 'hostname', 'port') }) # arguments only used for opening a connection self._ssh_open_args = [] # arguments for annex ssh invocation self._ssh_args = [] self._ssh_open_args.extend( ['-p', '{}'.format(self.sshri.port)] if self.sshri.port else []) if force_ip: self._ssh_open_args.append("-{}".format(force_ip)) if identity_file: self._ssh_open_args.extend(["-i", identity_file]) self._use_remote_annex_bundle = use_remote_annex_bundle # essential properties of the remote system self._remote_props = {}
def get_connection(self, url, use_remote_annex_bundle=True, force_ip=False): """Get a singleton, representing a shared ssh connection to `url` Parameters ---------- url: str ssh url force_ip : {False, 4, 6} Force the use of IPv4 or IPv6 addresses. Returns ------- SSHConnection """ # parse url: from datalad.support.network import RI, is_ssh if isinstance(url, RI): sshri = url else: if ':' not in url and '/' not in url: # it is just a hostname lgr.debug("Assuming %r is just a hostname for ssh connection", url) url += ':' sshri = RI(url) if not is_ssh(sshri): raise ValueError("Unsupported SSH URL: '{0}', use " "ssh://host/path or host:path syntax".format(url)) from datalad import cfg identity_file = cfg.get("datalad.ssh.identityfile") conhash = get_connection_hash( sshri.hostname, port=sshri.port, identity_file=identity_file or "", username=sshri.username, bundled=use_remote_annex_bundle, force_ip=force_ip, ) # determine control master: ctrl_path = self.socket_dir / conhash # do we know it already? if ctrl_path in self._connections: return self._connections[ctrl_path] else: c = SSHConnection(ctrl_path, sshri, identity_file=identity_file, use_remote_annex_bundle=use_remote_annex_bundle, force_ip=force_ip) self._connections[ctrl_path] = c return c
def __init__(self, ctrl_path, sshri, identity_file=None, use_remote_annex_bundle=True, force_ip=False): """Create a connection handler The actual opening of the connection is performed on-demand. Parameters ---------- ctrl_path: str path to SSH controlmaster sshri: SSHRI SSH resource identifier (contains all connection-relevant info), or another resource identifier that can be converted into an SSHRI. identity_file : str or None Value to pass to ssh's -i option. use_remote_annex_bundle : bool If set, look for a git-annex installation on the remote and prefer its binaries in the search path (i.e. prefer a bundled Git over a system package). force_ip : {False, 4, 6} Force the use of IPv4 or IPv6 addresses with -4 or -6. """ self._runner = None from datalad.support.network import SSHRI, is_ssh if not is_ssh(sshri): raise ValueError( "Non-SSH resource identifiers are not supported for SSH " "connections: {}".format(sshri)) self.sshri = SSHRI( **{ k: v for k, v in sshri.fields.items() if k in ('username', 'hostname', 'port') }) # on windows cmd args lists are always converted into a string using appropriate # quoting rules, on other platforms args lists are passed directly and we need # to take care of quoting ourselves ctrlpath_arg = "ControlPath={}".format( ctrl_path if on_windows else sh_quote(str(ctrl_path))) self._ssh_args = ["-o", ctrlpath_arg] self.ctrl_path = Path(ctrl_path) if self.sshri.port: self._ssh_args += ['-p', '{}'.format(self.sshri.port)] if force_ip: self._ssh_args.append("-{}".format(force_ip)) self._identity_file = identity_file self._use_remote_annex_bundle = use_remote_annex_bundle # essential properties of the remote system self._remote_props = {} self._opened_by_us = False
def test_is_ssh(): ssh_locators = [ "ssh://host", "ssh://host/some/where", "user@host:path/sp1", "user@host:/absolute/path/sp1", "host:path/sp1", "host:/absolute/path/sp1", "user@host" ] for ri in ssh_locators: ok_(is_ssh(ri), "not considered ssh (string): %s" % ri) ok_(is_ssh(RI(ri)), "not considered ssh (RI): %s" % ri) non_ssh_locators = [ "file://path/to", "/abs/path", "../rel/path", "http://example.com", "git://host/user/proj", "s3://bucket/save/?key=891" ] for ri in non_ssh_locators: ok_(not is_ssh(ri), "considered ssh (string): %s" % ri) ok_(not is_ssh(RI(ri)), "considered ssh (RI): %s" % ri)
def __init__(self, sshri, identity_file=None, use_remote_annex_bundle=True, force_ip=False): """Create a connection handler The actual opening of the connection is performed on-demand. Parameters ---------- sshri: SSHRI SSH resource identifier (contains all connection-relevant info), or another resource identifier that can be converted into an SSHRI. identity_file : str or None Value to pass to ssh's -i option. use_remote_annex_bundle : bool If set, look for a git-annex installation on the remote and prefer its binaries in the search path (i.e. prefer a bundled Git over a system package). force_ip : {False, 4, 6} Force the use of IPv4 or IPv6 addresses with -4 or -6. """ self._runner = None from datalad.support.network import SSHRI, is_ssh if not is_ssh(sshri): raise ValueError( "Non-SSH resource identifiers are not supported for SSH " "connections: {}".format(sshri)) self.sshri = SSHRI( **{ k: v for k, v in sshri.fields.items() if k in ('username', 'hostname', 'port') }) # arguments only used for opening a connection self._ssh_open_args = [] # arguments for annex ssh invocation self._ssh_args = [] self._ssh_open_args.extend( ['-p', '{}'.format(self.sshri.port)] if self.sshri.port else []) if force_ip: self._ssh_open_args.append("-{}".format(force_ip)) if identity_file: self._ssh_open_args.extend(["-i", identity_file]) self._use_remote_annex_bundle = use_remote_annex_bundle # essential properties of the remote system self._remote_props = {}
def __init__(self, ctrl_path, sshri, identity_file=None, use_remote_annex_bundle=True): """Create a connection handler The actual opening of the connection is performed on-demand. Parameters ---------- ctrl_path: str path to SSH controlmaster sshri: SSHRI SSH resource identifier (contains all connection-relevant info), or another resource identifier that can be converted into an SSHRI. identity_file : str or None Value to pass to ssh's -i option. use_remote_annex_bundle : bool If set, look for a git-annex installation on the remote and prefer its binaries in the search path (i.e. prefer a bundled Git over a system package). """ self._runner = None from datalad.support.network import SSHRI, is_ssh if not is_ssh(sshri): raise ValueError( "Non-SSH resource identifiers are not supported for SSH " "connections: {}".format(sshri)) self.sshri = SSHRI( **{ k: v for k, v in sshri.fields.items() if k in ('username', 'hostname', 'port') }) self._ctrl_options = ["-o", "ControlPath=\"%s\"" % ctrl_path] self.ctrl_path = Path(ctrl_path) if self.sshri.port: self._ctrl_options += ['-p', '{}'.format(self.sshri.port)] self._identity_file = identity_file self._use_remote_annex_bundle = use_remote_annex_bundle # essential properties of the remote system self._remote_props = {} self._opened_by_us = False
def get_connection(self, url): """Get a singleton, representing a shared ssh connection to `url` Parameters ---------- url: str ssh url Returns ------- SSHConnection """ # parse url: from datalad.support.network import RI, is_ssh if isinstance(url, RI): sshri = url else: if ':' not in url and '/' not in url: # it is just a hostname lgr.debug("Assuming %r is just a hostname for ssh connection", url) url += ':' sshri = RI(url) if not is_ssh(sshri): raise ValueError("Unsupported SSH URL: '{0}', use " "ssh://host/path or host:path syntax".format(url)) conhash = get_connection_hash( sshri.hostname, port=sshri.port, username=sshri.username) # determine control master: ctrl_path = "%s/%s" % (self.socket_dir, conhash) # do we know it already? if ctrl_path in self._connections: return self._connections[ctrl_path] else: c = SSHConnection(ctrl_path, sshri) self._connections[ctrl_path] = c return c
def _prep_connection_args(self, url): # parse url: from datalad.support.network import RI, is_ssh if isinstance(url, RI): sshri = url else: if ':' not in url and '/' not in url: # it is just a hostname lgr.debug("Assuming %r is just a hostname for ssh connection", url) url += ':' sshri = RI(url) if not is_ssh(sshri): raise ValueError("Unsupported SSH URL: '{0}', use " "ssh://host/path or host:path syntax".format(url)) from datalad import cfg identity_file = cfg.get("datalad.ssh.identityfile") return sshri, identity_file
def get_connection(self, url): """Get a singleton, representing a shared ssh connection to `url` Parameters ---------- url: str ssh url Returns ------- SSHConnection """ # parse url: from datalad.support.network import RI, is_ssh if isinstance(url, RI): sshri = url else: if ':' not in url and '/' not in url: # it is just a hostname lgr.debug("Assuming %r is just a hostname for ssh connection", url) url += ':' sshri = RI(url) if not is_ssh(sshri): raise ValueError("Unsupported SSH URL: '{0}', use " "ssh://host/path or host:path syntax".format(url)) conhash = get_connection_hash(sshri.hostname, port=sshri.port, username=sshri.username) # determine control master: ctrl_path = "%s/%s" % (self.socket_dir, conhash) # do we know it already? if ctrl_path in self._connections: return self._connections[ctrl_path] else: c = SSHConnection(ctrl_path, sshri) self._connections[ctrl_path] = c return c
def __call__(sshurl, *, name=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, recursion_limit=None, existing='error', shared=None, group=None, ui=False, as_common_datasrc=None, publish_by_default=None, publish_depends=None, annex_wanted=None, annex_group=None, annex_groupwanted=None, inherit=False, since=None): if ui: # the webui has been moved to the deprecated extension try: from datalad_deprecated.sibling_webui \ import upload_web_interface except Exception as e: # we could just test for ModuleNotFoundError (which should be # all that would happen with PY3.6+, but be a little more robust # and use the pattern from duecredit if type(e).__name__ not in ('ImportError', 'ModuleNotFoundError'): lgr.error( "Failed to import datalad_deprecated.sibling_webui " "due to %s", str(e)) raise RuntimeError( "The DataLad web UI has been moved to an extension " "package. Please install the Python package " "`datalad_deprecated` to be able to deploy it.") # push uses '^' to annotate the previous pushed committish, and None for default # behavior. '' was/is (to be deprecated) used in `publish` and 'create-sibling'. # Alert user about the mistake if since == '': # deprecation was added prior 0.16.0 import warnings warnings.warn("'since' should point to commitish or use '^'.", DeprecationWarning) since = '^' # # nothing without a base dataset # ds = require_dataset(dataset, check_installed=True, purpose='create sibling(s)') refds_path = ds.path # # all checks that are possible before we start parsing the dataset # if since and not recursive: raise ValueError("The use of 'since' requires 'recursive'") # possibly use sshurl to get the name in case if not specified if not sshurl: if not inherit: raise InsufficientArgumentsError( "needs at least an SSH URL, if no inherit option") if name is None: raise ValueError( "Neither SSH URL, nor the name of sibling to inherit from " "was specified") # It might well be that we already have this remote setup try: sshurl = CreateSibling._get_remote_url(ds, name) except Exception as exc: ce = CapturedException(exc) lgr.debug('%s does not know about url for %s: %s', ds, name, ce) elif inherit: raise ValueError( "For now, for clarity not allowing specifying a custom sshurl " "while inheriting settings") # may be could be safely dropped -- still WiP if not sshurl: # TODO: may be more back up before _prep? super_ds = ds.get_superdataset() if not super_ds: raise ValueError( "Could not determine super dataset for %s to inherit URL" % ds) super_url = CreateSibling._get_remote_url(super_ds, name) # for now assuming hierarchical setup # (TODO: to be able to distinguish between the two, probably # needs storing datalad.*.target_dir to have %RELNAME in there) sshurl = slash_join(super_url, relpath(refds_path, super_ds.path)) # check the login URL sibling_ri = RI(sshurl) ssh_sibling = is_ssh(sibling_ri) if not (ssh_sibling or isinstance(sibling_ri, PathRI)): raise ValueError( "Unsupported SSH URL or path: '{0}', " "use ssh://host/path, host:path or path syntax".format(sshurl)) if not name: name = sibling_ri.hostname if ssh_sibling else "local" lgr.info("No sibling name given. Using %s'%s' as sibling name", "URL hostname " if ssh_sibling else "", name) if since == '^': # consider creating siblings only since the point of # the last update # XXX here we assume one to one mapping of names from local branches # to the remote active_branch = ds.repo.get_active_branch() since = '%s/%s' % (name, active_branch) to_process = [] if recursive: # # parse the base dataset to find all subdatasets that need processing # cand_ds = [ Dataset(r['path']) for r in diff_dataset( ds, fr=since, to='HEAD', # w/o False we might not follow into new subdatasets # which do not have that remote yet setup, # see https://github.com/datalad/datalad/issues/6596 constant_refs=False, # save cycles, we are only looking for datasets annex=None, untracked='no', recursive=True, datasets_only=True, ) # not installed subdatasets would be 'clean' so we would skip them if r.get('type') == 'dataset' and r.get('state', None) != 'clean' ] if not since: # not only subdatasets cand_ds = [ds] + cand_ds else: # only the current ds cand_ds = [ds] # check remotes setup() for d in cand_ds: d_repo = d.repo if d_repo is None: continue checkds_remotes = d.repo.get_remotes() res = dict( action='create_sibling', path=d.path, type='dataset', ) if publish_depends: # make sure dependencies are valid # TODO: inherit -- we might want to automagically create # those dependents as well??? unknown_deps = set( ensure_list(publish_depends)).difference(checkds_remotes) if unknown_deps: yield dict( res, status='error', message=('unknown sibling(s) specified as publication ' 'dependency: %s', unknown_deps), ) continue if name in checkds_remotes and existing in ('error', 'skip'): yield dict( res, sibling_name=name, status='error' if existing == 'error' else 'notneeded', message=( "sibling '%s' already configured (specify alternative " "name, or force reconfiguration via --existing", name), ) continue to_process.append(res) if not to_process: # we ruled out all possibilities # TODO wait for gh-1218 and make better return values lgr.info("No datasets qualify for sibling creation. " "Consider different settings for --existing " "or --since if this is unexpected") return if ssh_sibling: # request ssh connection: lgr.info("Connecting ...") shell = ssh_manager.get_connection(sshurl) else: shell = _RunnerAdapter() sibling_ri.path = str(resolve_path(sibling_ri.path, dataset)) if target_dir: target_dir = opj(sibling_ri.path, target_dir) if target_dir is None: if sibling_ri.path: target_dir = sibling_ri.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = "%RELNAME" not in target_dir if not shell.get_annex_version(): raise MissingExternalDependency( 'git-annex', msg="It's required on the {} machine to create a sibling". format('remote' if ssh_sibling else 'local')) # # all checks done and we have a connection, now do something # # loop over all datasets, ordered from top to bottom to make test # below valid (existing directories would cause the machinery to halt) # But we need to run post-update hook in depth-first fashion, so # would only collect first and then run (see gh #790) yielded = set() remote_repos_to_run_hook_for = [] for currentds_ap in \ sorted(to_process, key=lambda x: x['path'].count('/')): current_ds = Dataset(currentds_ap['path']) path = _create_dataset_sibling( name, current_ds, refds_path, shell, replicate_local_structure, sibling_ri, target_dir, target_url, target_pushurl, existing, shared, group, publish_depends, publish_by_default, ui, as_common_datasrc, annex_wanted, annex_group, annex_groupwanted, inherit) currentds_ap["sibling_name"] = name if not path: # nothing new was created # TODO is 'notneeded' appropriate in this case? currentds_ap['status'] = 'notneeded' # TODO explain status in 'message' yield currentds_ap yielded.add(currentds_ap['path']) continue remote_repos_to_run_hook_for.append((path, currentds_ap)) # publish web-interface to root dataset on publication server if current_ds.path == refds_path and ui: from datalad_deprecated.sibling_webui import upload_web_interface lgr.info("Uploading web interface to %s", path) try: upload_web_interface(path, shell, shared, ui) except CommandError as e: ce = CapturedException(e) currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to push web interface to the remote datalad repository (%s)", ce) currentds_ap['exception'] = ce yield currentds_ap yielded.add(currentds_ap['path']) continue # in reverse order would be depth first lgr.info("Running post-update hooks in all created siblings") # TODO: add progressbar for path, currentds_ap in remote_repos_to_run_hook_for[::-1]: # Trigger the hook lgr.debug("Running hook for %s (if exists and executable)", path) try: shell( "cd {} " "&& ( [ -x hooks/post-update ] && hooks/post-update || true )" "".format(sh_quote(_path_(path, ".git")))) except CommandError as e: ce = CapturedException(e) currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to run post-update hook under remote path %s (%s)", path, ce) currentds_ap['exception'] = ce yield currentds_ap yielded.add(currentds_ap['path']) continue if not currentds_ap['path'] in yielded: # if we were silent until now everything is just splendid currentds_ap['status'] = 'ok' yield currentds_ap
def _create_dataset_sibling( name, ds, hierarchy_basepath, ssh, replicate_local_structure, ssh_url, target_dir, target_url, target_pushurl, existing, shared, group, publish_depends, publish_by_default, install_postupdate_hook, as_common_datasrc, annex_wanted, annex_group, annex_groupwanted, inherit ): """Everyone is very smart here and could figure out the combinatorial affluence among provided tiny (just slightly over a dozen) number of options and only a few pages of code """ localds_path = ds.path ds_name = relpath(localds_path, start=hierarchy_basepath) if not replicate_local_structure: ds_name = '' if ds_name == curdir \ else '-{}'.format(ds_name.replace("/", "-")) remoteds_path = target_dir.replace( "%RELNAME", ds_name) else: # TODO: opj depends on local platform, not the remote one. # check how to deal with it. Does windows ssh server accept # posix paths? vice versa? Should planned SSH class provide # tools for this issue? # see gh-1188 remoteds_path = normpath(opj(target_dir, ds_name)) # construct a would-be ssh url based on the current dataset's path ssh_url.path = remoteds_path ds_sshurl = ssh_url.as_str() # configure dataset's git-access urls ds_target_url = target_url.replace('%RELNAME', ds_name) \ if target_url else ds_sshurl # push, configure only if needed ds_target_pushurl = None if ds_target_url != ds_sshurl: # not guaranteed that we can push via the primary URL ds_target_pushurl = target_pushurl.replace('%RELNAME', ds_name) \ if target_pushurl else ds_sshurl lgr.info("Considering to create a target dataset {0} at {1} of {2}".format( localds_path, remoteds_path, ssh_url.hostname)) # Must be set to True only if exists and existing='reconfigure' # otherwise we might skip actions if we say existing='reconfigure' # but it did not even exist before only_reconfigure = False if remoteds_path != '.': # check if target exists # TODO: Is this condition valid for != '.' only? path_exists = True try: out, err = ssh("ls {}".format(sh_quote(remoteds_path))) except CommandError as e: if "No such file or directory" in e.stderr and \ remoteds_path in e.stderr: path_exists = False else: raise # It's an unexpected failure here if path_exists: _msg = "Target path %s already exists." % remoteds_path # path might be existing but be an empty directory, which should be # ok to remove try: lgr.debug( "Trying to rmdir %s on remote since might be an empty dir", remoteds_path ) # should be safe since should not remove anything unless an empty dir ssh("rmdir {}".format(sh_quote(remoteds_path))) path_exists = False except CommandError as e: # If fails to rmdir -- either contains stuff no permissions # TODO: fixup encode/decode dance again :-/ we should have got # unicode/str here by now. I guess it is the same as # https://github.com/ReproNim/niceman/issues/83 # where I have reused this Runner thing try: # ds_name is unicode which makes _msg unicode so we must be # unicode-ready err_str = text_type(e.stderr) except UnicodeDecodeError: err_str = e.stderr.decode(errors='replace') _msg += " And it fails to rmdir (%s)." % (err_str.strip(),) if path_exists: if existing == 'error': raise RuntimeError(_msg) elif existing == 'skip': lgr.info(_msg + " Skipping") return elif existing == 'replace': lgr.info(_msg + " Replacing") # enable write permissions to allow removing dir ssh("chmod +r+w -R {}".format(sh_quote(remoteds_path))) # remove target at path ssh("rm -rf {}".format(sh_quote(remoteds_path))) # if we succeeded in removing it path_exists = False # Since it is gone now, git-annex also should forget about it remotes = ds.repo.get_remotes() if name in remotes: # so we had this remote already, we should announce it dead # XXX what if there was some kind of mismatch and this name # isn't matching the actual remote UUID? should have we # checked more carefully? lgr.info( "Announcing existing remote %s dead to annex and removing", name ) if isinstance(ds.repo, AnnexRepo): ds.repo.set_remote_dead(name) ds.repo.remove_remote(name) elif existing == 'reconfigure': lgr.info(_msg + " Will only reconfigure") only_reconfigure = True else: raise ValueError( "Do not know how to handle existing={}".format( repr(existing))) if not path_exists: ssh("mkdir -p {}".format(sh_quote(remoteds_path))) delayed_super = _DelayedSuper(ds) if inherit and delayed_super.super: if shared is None: # here we must analyze current_ds's super, not the super_ds # inherit from the setting on remote end shared = CreateSibling._get_ds_remote_shared_setting( delayed_super, name, ssh) if not install_postupdate_hook: # Even though directive from above was False due to no UI explicitly # requested, we were asked to inherit the setup, so we might need # to install the hook, if super has it on remote install_postupdate_hook = CreateSibling._has_active_postupdate( delayed_super, name, ssh) if group: # Either repository existed before or a new directory was created for it, # set its group to a desired one if was provided with the same chgrp ssh("chgrp -R {} {}".format( sh_quote(text_type(group)), sh_quote(remoteds_path))) # don't (re-)initialize dataset if existing == reconfigure if not only_reconfigure: # init git and possibly annex repo if not CreateSibling.init_remote_repo( remoteds_path, ssh, shared, ds, description=target_url): return if target_url and not is_ssh(target_url): # we are not coming in via SSH, hence cannot assume proper # setup for webserver access -> fix ssh('git -C {} update-server-info'.format(sh_quote(remoteds_path))) else: # TODO -- we might still want to reconfigure 'shared' setting! pass # at this point we have a remote sibling in some shape or form # -> add as remote lgr.debug("Adding the siblings") # TODO generator, yield the now swallowed results Siblings.__call__( 'configure', dataset=ds, name=name, url=ds_target_url, pushurl=ds_target_pushurl, recursive=False, fetch=True, as_common_datasrc=as_common_datasrc, publish_by_default=publish_by_default, publish_depends=publish_depends, annex_wanted=annex_wanted, annex_group=annex_group, annex_groupwanted=annex_groupwanted, inherit=inherit ) # check git version on remote end lgr.info("Adjusting remote git configuration") if ssh.get_git_version() and ssh.get_git_version() >= LooseVersion("2.4"): # allow for pushing to checked out branch try: ssh("git -C {} config receive.denyCurrentBranch updateInstead".format( sh_quote(remoteds_path))) except CommandError as e: lgr.error("git config failed at remote location %s.\n" "You will not be able to push to checked out " "branch. Error: %s", remoteds_path, exc_str(e)) else: lgr.error("Git version >= 2.4 needed to configure remote." " Version detected on server: %s\nSkipping configuration" " of receive.denyCurrentBranch - you will not be able to" " publish updates to this repository. Upgrade your git" " and run with --existing=reconfigure", ssh.get_git_version()) if install_postupdate_hook: # enable metadata refresh on dataset updates to publication server lgr.info("Enabling git post-update hook ...") try: CreateSibling.create_postupdate_hook( remoteds_path, ssh, ds) except CommandError as e: lgr.error("Failed to add json creation command to post update " "hook.\nError: %s" % exc_str(e)) return remoteds_path
def __call__(sshurl, name=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, recursion_limit=None, existing='error', shared=None, group=None, ui=False, as_common_datasrc=None, publish_by_default=None, publish_depends=None, annex_wanted=None, annex_group=None, annex_groupwanted=None, inherit=False, since=None): # # nothing without a base dataset # ds = require_dataset(dataset, check_installed=True, purpose='creating a sibling') refds_path = ds.path # # all checks that are possible before we start parsing the dataset # # possibly use sshurl to get the name in case if not specified if not sshurl: if not inherit: raise InsufficientArgumentsError( "needs at least an SSH URL, if no inherit option" ) if name is None: raise ValueError( "Neither SSH URL, nor the name of sibling to inherit from " "was specified" ) # It might well be that we already have this remote setup try: sshurl = CreateSibling._get_remote_url(ds, name) except Exception as exc: lgr.debug('%s does not know about url for %s: %s', ds, name, exc_str(exc)) elif inherit: raise ValueError( "For now, for clarity not allowing specifying a custom sshurl " "while inheriting settings" ) # may be could be safely dropped -- still WiP if not sshurl: # TODO: may be more back up before _prep? super_ds = ds.get_superdataset() if not super_ds: raise ValueError( "Could not determine super dataset for %s to inherit URL" % ds ) super_url = CreateSibling._get_remote_url(super_ds, name) # for now assuming hierarchical setup # (TODO: to be able to destinguish between the two, probably # needs storing datalad.*.target_dir to have %RELNAME in there) sshurl = slash_join(super_url, relpath(ds.path, super_ds.path)) # check the login URL sshri = RI(sshurl) if not is_ssh(sshri): raise ValueError( "Unsupported SSH URL: '{0}', " "use ssh://host/path or host:path syntax".format(sshurl)) if not name: # use the hostname as default remote name name = sshri.hostname lgr.debug( "No sibling name given, use URL hostname '%s' as sibling name", name) if since == '': # consider creating siblings only since the point of # the last update # XXX here we assume one to one mapping of names from local branches # to the remote active_branch = ds.repo.get_active_branch() since = '%s/%s' % (name, active_branch) # # parse the base dataset to find all subdatasets that need processing # to_process = [] for ap in AnnotatePaths.__call__( dataset=refds_path, # only a single path! path=refds_path, recursive=recursive, recursion_limit=recursion_limit, action='create_sibling', # both next should not happen anyways unavailable_path_status='impossible', nondataset_path_status='error', modified=since, return_type='generator', on_failure='ignore'): if ap.get('status', None): # this is done yield ap continue if ap.get('type', None) != 'dataset' or ap.get('state', None) == 'absent': # this can happen when there is `since`, but we have no # use for anything but datasets here continue checkds_remotes = Dataset(ap['path']).repo.get_remotes() \ if ap.get('state', None) != 'absent' \ else [] if publish_depends: # make sure dependencies are valid # TODO: inherit -- we might want to automagically create # those dependents as well??? unknown_deps = set(assure_list(publish_depends)).difference(checkds_remotes) if unknown_deps: ap['status'] = 'error' ap['message'] = ( 'unknown sibling(s) specified as publication dependency: %s', unknown_deps) yield ap continue if name in checkds_remotes and existing in ('error', 'skip'): ap['status'] = 'error' if existing == 'error' else 'notneeded' ap['message'] = ( "sibling '%s' already configured (specify alternative name, or force " "reconfiguration via --existing", name) yield ap continue to_process.append(ap) if not to_process: # we ruled out all possibilities # TODO wait for gh-1218 and make better return values lgr.info("No datasets qualify for sibling creation. " "Consider different settings for --existing " "or --since if this is unexpected") return if target_dir is None: if sshri.path: target_dir = sshri.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = "%RELNAME" not in target_dir # request ssh connection: lgr.info("Connecting ...") assert(sshurl is not None) # delayed anal verification ssh = ssh_manager.get_connection(sshurl) if not ssh.get_annex_version(): raise MissingExternalDependency( 'git-annex', msg='on the remote system') # # all checks done and we have a connection, now do something # # loop over all datasets, ordered from top to bottom to make test # below valid (existing directories would cause the machinery to halt) # But we need to run post-update hook in depth-first fashion, so # would only collect first and then run (see gh #790) yielded = set() remote_repos_to_run_hook_for = [] for currentds_ap in \ sorted(to_process, key=lambda x: x['path'].count('/')): current_ds = Dataset(currentds_ap['path']) path = _create_dataset_sibling( name, current_ds, ds.path, ssh, replicate_local_structure, sshri, target_dir, target_url, target_pushurl, existing, shared, group, publish_depends, publish_by_default, ui, as_common_datasrc, annex_wanted, annex_group, annex_groupwanted, inherit ) if not path: # nothing new was created # TODO is 'notneeded' appropriate in this case? currentds_ap['status'] = 'notneeded' # TODO explain status in 'message' yield currentds_ap yielded.add(currentds_ap['path']) continue remote_repos_to_run_hook_for.append((path, currentds_ap)) # publish web-interface to root dataset on publication server if current_ds.path == ds.path and ui: lgr.info("Uploading web interface to %s" % path) try: CreateSibling.upload_web_interface(path, ssh, shared, ui) except CommandError as e: currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to push web interface to the remote datalad repository (%s)", exc_str(e)) yield currentds_ap yielded.add(currentds_ap['path']) continue # in reverse order would be depth first lgr.info("Running post-update hooks in all created siblings") # TODO: add progressbar for path, currentds_ap in remote_repos_to_run_hook_for[::-1]: # Trigger the hook lgr.debug("Running hook for %s (if exists and executable)", path) try: ssh("cd {} " "&& ( [ -x hooks/post-update ] && hooks/post-update || : )" "".format(sh_quote(_path_(path, ".git")))) except CommandError as e: currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to run post-update hook under remote path %s (%s)", path, exc_str(e)) yield currentds_ap yielded.add(currentds_ap['path']) continue if not currentds_ap['path'] in yielded: # if we were silent until now everything is just splendid currentds_ap['status'] = 'ok' yield currentds_ap