def __call__(url, dataset=None, recursive=False): # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) # if we have no dataset given, figure out which one we need to operate # on, based on the current working directory of the process: if ds is None: # try to find a dataset at or above PWD: dspath = GitRepo.get_toppath(getpwd()) if dspath is None: raise ValueError("No dataset found at %s." % getpwd()) ds = Dataset(dspath) assert (ds is not None) if not ds.is_installed(): raise ValueError("No installed dataset found at " "{0}.".format(ds.path)) assert (ds.repo is not None) repos_to_update = [ds.repo] if recursive: repos_to_update += [ GitRepo(opj(ds.path, sub_path)) for sub_path in ds.get_dataset_handles(recursive=True) ] for handle_repo in repos_to_update: parser = get_module_parser(handle_repo) for submodule_section in parser.sections(): submodule_name = submodule_section[11:-1] parser.set_value( submodule_section, "url", url.replace("%NAME", submodule_name.replace("/", "-"))) return # TODO: return value?
def __call__(url, dataset=None, recursive=False): # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) # if we have no dataset given, figure out which one we need to operate # on, based on the current working directory of the process: if ds is None: # try to find a dataset at or above PWD: dspath = GitRepo.get_toppath(getpwd()) if dspath is None: raise ValueError("No dataset found at %s." % getpwd()) ds = Dataset(dspath) assert(ds is not None) if not ds.is_installed(): raise ValueError("No installed dataset found at " "{0}.".format(ds.path)) assert(ds.repo is not None) repos_to_update = [ds.repo] if recursive: repos_to_update += [GitRepo(opj(ds.path, sub_path)) for sub_path in ds.get_dataset_handles(recursive=True)] for handle_repo in repos_to_update: parser = get_module_parser(handle_repo) for submodule_section in parser.sections(): submodule_name = submodule_section[11:-1] parser.set_value(submodule_section, "url", url.replace("%NAME", submodule_name.replace("/", "-"))) return # TODO: return value?
def __call__(dataset=None, path=None, data_only=True, recursive=False): # Note: copy logic from install to resolve dataset and path: # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) if not path: if ds is None: # no dataset, no target location, nothing to do raise ValueError( "insufficient information for uninstallation (needs at " "least a dataset or a path") elif isinstance(path, list): # TODO: not sure. might be possible to deal with that list directly return [ Uninstall.__call__(dataset=ds, path=p, data_only=data_only, recursive=recursive) for p in path ] # resolve the target location against the provided dataset if path is not None: path = resolve_path(path, ds) lgr.debug("Resolved uninstallation target: {0}".format(path)) # if we have no dataset given, figure out which one we need to operate # on, based on the resolved target location (that is now guaranteed to # be specified if ds is None: # try to find a dataset at or above the installation target dspath = GitRepo.get_toppath(abspath(path)) if dspath is None: # no top-level dataset found, use path as such dspath = path ds = Dataset(dspath) assert (ds is not None) lgr.debug("Resolved target dataset for uninstallation: {0}".format(ds)) if not ds.is_installed(): if not path or path == ds.path: # we want to uninstall the dataset itself, which is not # installed => nothing to do # TODO: consider `data` option! is_installed currently only # checks for a repository lgr.info("Dataset {0} not installed. Nothing to " "do.".format(ds.path)) return else: # we want to uninstall something from a not installed dataset # Doesn't make sense, does it? => fail raise ValueError("Dataset {0} is not installed.".format( ds.path)) assert (ds.repo is not None) if not path or path == ds.path: # uninstall the dataset `ds` # TODO: what to consider? # - whether it is a submodule of another dataset # - `data_only` ? # - `recursive` # - what to return in what case (data_only)? raise NotImplementedError("TODO: Uninstall dataset %s" % ds.path) # needed by the logic below assert (isabs(path)) # express the destination path relative to the root of this dataset relativepath = relpath(path, start=ds.path) if path.startswith(pardir): raise ValueError("uninstallation path outside dataset") lgr.debug( "Resolved uninstallation target relative to dataset {0}: {1}". format(ds, relativepath)) # figure out, what path actually is pointing to: if not exists(path): # nothing there, nothing to uninstall lgr.info("Nothing found to uninstall at %s" % path) return if relativepath in ds.get_dataset_handles(recursive=True): # it's a submodule # --recursive required or implied? raise NotImplementedError("TODO: uninstall submodule %s from " "dataset %s" % (relativepath, ds.path)) if isdir(path): # don't know what to do yet # in git vs. untracked? # recursive? raise NotImplementedError("TODO: uninstall directory %s from " "dataset %s" % (path, ds.path)) # we know, it's an existing file if isinstance(ds.repo, AnnexRepo): try: ds.repo.get_file_key(relativepath) except FileInGitError: # file directly in git _file_in_git = True except FileNotInAnnexError: # either an untracked file in this dataset, or something that # also actually exists in the file system but could be part of # a subdataset _untracked_or_within_submodule = True # it's an annexed file if data_only: ds.repo.annex_drop([path]) return path else: raise NotImplementedError("TODO: fully uninstall file %s " "(annex) from dataset %s" % (path, ds.path)) else: # plain git repo if relativepath in ds.repo.get_indexed_files(): # file directly in git _file_in_git = True else: # either an untracked file in this dataset, or something that # also actually exists in the file system but could be part of # a subdataset _untracked_or_within_submodule = True if _file_in_git: if data_only: raise ValueError("%s is not a file handle. Removing its " "data only doesn't make sense." % path) else: return ds.repo.git_remove([relativepath]) elif _untracked_or_within_submodule: subds = get_containing_subdataset(ds, relativepath) if ds.path != subds.path: # target path belongs to a subdataset, hand uninstallation # over to it return subds.uninstall(path=relpath(path, start=subds.path), data_only=data_only, recursive=recursive) # this must be an untracked/existing something # it wasn't installed, so we cannot uninstall it raise ValueError("Cannot uninstall %s" % path)
def __call__(dataset=None, name=None, url=None, pushurl=None, recursive=False, force=False): # TODO: Detect malformed URL and fail? if name is None or (url is None and pushurl is None): raise ValueError("""insufficient information to add a sibling (needs at least a dataset, a name and an URL).""") if url is None: url = pushurl # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) if ds is None: # try to find a dataset at or above CWD dspath = GitRepo.get_toppath(abspath(getpwd())) if dspath is None: raise ValueError("No dataset found at or above {0}.".format( getpwd())) ds = Dataset(dspath) lgr.debug("Resolved dataset for target creation: {0}".format(ds)) assert (ds is not None and name is not None and url is not None) if not ds.is_installed(): raise ValueError("Dataset {0} is not installed yet.".format(ds)) assert (ds.repo is not None) ds_basename = basename(ds.path) repos = {ds_basename: {'repo': ds.repo}} if recursive: for subds in ds.get_dataset_handles(recursive=True): sub_path = opj(ds.path, subds) repos[ds_basename + '/' + subds] = { # repos[subds] = { 'repo': GitRepo(sub_path, create=False) } # Note: This is copied from create_publication_target_sshwebserver # as it is the same logic as for its target_dir. # TODO: centralize and generalize template symbol handling # TODO: Check pushurl for template symbols too. Probably raise if only # one of them uses such symbols replicate_local_structure = False if "%NAME" not in url: replicate_local_structure = True for repo in repos: if not replicate_local_structure: repos[repo]['url'] = url.replace("%NAME", repo.replace("/", "-")) if pushurl: repos[repo]['pushurl'] = pushurl.replace( "%NAME", repo.replace("/", "-")) else: repos[repo]['url'] = url if pushurl: repos[repo]['pushurl'] = pushurl if repo != ds_basename: repos[repo]['url'] = _urljoin(repos[repo]['url'], repo[len(ds_basename) + 1:]) if pushurl: repos[repo]['pushurl'] = _urljoin( repos[repo]['pushurl'], repo[len(ds_basename) + 1:]) # collect existing remotes: already_existing = list() conflicting = list() for repo in repos: if name in repos[repo]['repo'].git_get_remotes(): already_existing.append(repo) lgr.debug("""Remote '{0}' already exists in '{1}'.""".format(name, repo)) existing_url = repos[repo]['repo'].git_get_remote_url(name) existing_pushurl = \ repos[repo]['repo'].git_get_remote_url(name, push=True) if repos[repo]['url'].rstrip('/') != existing_url.rstrip('/') \ or (pushurl and existing_pushurl and repos[repo]['pushurl'].rstrip('/') != existing_pushurl.rstrip('/')) \ or (pushurl and not existing_pushurl): conflicting.append(repo) if not force and conflicting: raise RuntimeError("Sibling '{0}' already exists with conflicting" " URL for {1} dataset(s). {2}".format( name, len(conflicting), conflicting)) runner = Runner() successfully_added = list() for repo in repos: if repo in already_existing: if repo not in conflicting: lgr.debug("Skipping {0}. Nothing to do.".format(repo)) continue # rewrite url cmd = ["git", "remote", "set-url", name, repos[repo]['url']] runner.run(cmd, cwd=repos[repo]['repo'].path) else: # add the remote cmd = ["git", "remote", "add", name, repos[repo]['url']] runner.run(cmd, cwd=repos[repo]['repo'].path) if pushurl: cmd = [ "git", "remote", "set-url", "--push", name, repos[repo]['pushurl'] ] runner.run(cmd, cwd=repos[repo]['repo'].path) successfully_added.append(repo) return successfully_added
def __call__(sshurl, target=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, existing='raise', shared=False): if sshurl is None: raise ValueError("""insufficient information for target creation (needs at least a dataset and a SSH URL).""") if target is None and (target_url is not None or target_pushurl is not None): raise ValueError("""insufficient information for adding the target as a sibling (needs at least a name)""") # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) if ds is None: # try to find a dataset at or above CWD dspath = GitRepo.get_toppath(abspath(getpwd())) if dspath is None: raise ValueError("""No dataset found at or above {0}.""".format(getpwd())) ds = Dataset(dspath) lgr.debug("Resolved dataset for target creation: {0}".format(ds)) assert (ds is not None and sshurl is not None) if not ds.is_installed(): raise ValueError( """Dataset {0} is not installed yet.""".format(ds)) assert (ds.repo is not None) # determine target parameters: parsed_target = urlparse(sshurl) host_name = parsed_target.netloc # TODO: Sufficient to fail on this condition? if not parsed_target.netloc: raise ValueError("Malformed URL: {0}".format(sshurl)) if target_dir is None: if parsed_target.path: target_dir = parsed_target.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = False if "%NAME" not in target_dir: replicate_local_structure = True # collect datasets to use: datasets = dict() datasets[basename(ds.path)] = ds if recursive: for subds in ds.get_dataset_handles(recursive=True): sub_path = opj(ds.path, subds) # TODO: when enhancing Dataset/*Repo classes and therefore # adapt to moved code, make proper distinction between name and # path of a submodule, which are technically different. This # probably will become important on windows as well as whenever # we want to allow for moved worktrees. datasets[basename(ds.path) + '/' + subds] = \ Dataset(sub_path) # setup SSH Connection: # TODO: Make the entire setup a helper to use it when pushing via # publish? # - build control master: from datalad.utils import assure_dir not_supported_on_windows("TODO") from os import geteuid # Linux specific import var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid() assure_dir(var_run_user_datalad) control_path = "%s/%s" % (var_run_user_datalad, host_name) control_path += ":%s" % parsed_target.port if parsed_target.port else "" # - start control master: cmd = "ssh -o ControlMaster=yes -o \"ControlPath=%s\" " \ "-o ControlPersist=yes %s exit" % (control_path, host_name) lgr.debug("Try starting control master by calling:\n%s" % cmd) import subprocess proc = subprocess.Popen(cmd, shell=True) proc.communicate(input="\n") # why the f.. this is necessary? runner = Runner() ssh_cmd = ["ssh", "-S", control_path, host_name] lgr.info("Creating target datasets ...") for current_dataset in datasets: if not replicate_local_structure: path = target_dir.replace("%NAME", current_dataset.replace("/", "-")) else: # TODO: opj depends on local platform, not the remote one. # check how to deal with it. Does windows ssh server accept # posix paths? vice versa? Should planned SSH class provide # tools for this issue? path = normpath( opj(target_dir, relpath(datasets[current_dataset].path, start=ds.path))) if path != '.': # check if target exists # TODO: Is this condition valid for != '.' only? path_exists = True cmd = ssh_cmd + ["ls", path] try: out, err = runner.run(cmd, expect_fail=True, expect_stderr=True) except CommandError as e: if "No such file or directory" in e.stderr and \ path in e.stderr: path_exists = False else: raise # It's an unexpected failure here if path_exists: if existing == 'raise': raise RuntimeError( "Target directory %s already exists." % path) elif existing == 'skip': continue elif existing == 'replace': pass else: raise ValueError( "Do not know how to hand existing=%s" % repr(existing)) cmd = ssh_cmd + ["mkdir", "-p", path] try: runner.run(cmd) except CommandError as e: lgr.error("Remotely creating target directory failed at " "%s.\nError: %s" % (path, str(e))) continue # init git repo cmd = ssh_cmd + ["git", "-C", path, "init"] if shared: cmd.append("--shared=%s" % shared) try: runner.run(cmd) except CommandError as e: lgr.error("Remotely initializing git repository failed at %s." "\nError: %s\nSkipping ..." % (path, str(e))) continue # check git version on remote end: cmd = ssh_cmd + ["git", "version"] try: out, err = runner.run(cmd) git_version = out.lstrip("git version").strip() lgr.debug("Detected git version on server: %s" % git_version) if git_version < "2.4": lgr.error("Git version >= 2.4 needed to configure remote." " Version detected on server: %s\nSkipping ..." % git_version) continue except CommandError as e: lgr.warning("Failed to determine git version on remote.\n" "Error: {0}\nTrying to configure anyway " "...".format(e.message)) # allow for pushing to checked out branch cmd = ssh_cmd + [ "git", "-C", path, "config", "receive.denyCurrentBranch", "updateInstead" ] try: runner.run(cmd) except CommandError as e: lgr.warning("git config failed at remote location %s.\n" "You will not be able to push to checked out " "branch." % path) # enable post-update hook: cmd = ssh_cmd + [ "mv", opj(path, ".git/hooks/post-update.sample"), opj(path, ".git/hooks/post-update") ] try: runner.run(cmd) except CommandError as e: lgr.error("Failed to enable post update hook.\n" "Error: %s" % e.message) # initially update server info "manually": cmd = ssh_cmd + ["git", "-C", path, "update-server-info"] try: runner.run(cmd) except CommandError as e: lgr.error("Failed to update server info.\n" "Error: %s" % e.message) # stop controlmaster (close ssh connection): cmd = ["ssh", "-O", "stop", "-S", control_path, host_name] out, err = runner.run(cmd, expect_stderr=True) if target: # add the sibling(s): if target_url is None: target_url = sshurl if target_pushurl is None: target_pushurl = sshurl result_adding = AddSibling()(dataset=ds, name=target, url=target_url, pushurl=target_pushurl, recursive=recursive, force=existing in {'replace'})
def __call__(dataset=None, name=None, url=None, pushurl=None, recursive=False, force=False): # TODO: Detect malformed URL and fail? if name is None or (url is None and pushurl is None): raise ValueError("""insufficient information to add a sibling (needs at least a dataset, a name and an URL).""") if url is None: url = pushurl # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) if ds is None: # try to find a dataset at or above CWD dspath = GitRepo.get_toppath(abspath(getpwd())) if dspath is None: raise ValueError( "No dataset found at or above {0}.".format(getpwd())) ds = Dataset(dspath) lgr.debug("Resolved dataset for target creation: {0}".format(ds)) assert(ds is not None and name is not None and url is not None) if not ds.is_installed(): raise ValueError("Dataset {0} is not installed yet.".format(ds)) assert(ds.repo is not None) ds_basename = basename(ds.path) repos = { ds_basename: {'repo': ds.repo} } if recursive: for subds in ds.get_dataset_handles(recursive=True): sub_path = opj(ds.path, subds) repos[ds_basename + '/' + subds] = { # repos[subds] = { 'repo': GitRepo(sub_path, create=False) } # Note: This is copied from create_publication_target_sshwebserver # as it is the same logic as for its target_dir. # TODO: centralize and generalize template symbol handling # TODO: Check pushurl for template symbols too. Probably raise if only # one of them uses such symbols replicate_local_structure = False if "%NAME" not in url: replicate_local_structure = True for repo in repos: if not replicate_local_structure: repos[repo]['url'] = url.replace("%NAME", repo.replace("/", "-")) if pushurl: repos[repo]['pushurl'] = pushurl.replace("%NAME", repo.replace("/", "-")) else: repos[repo]['url'] = url if pushurl: repos[repo]['pushurl'] = pushurl if repo != ds_basename: repos[repo]['url'] = _urljoin(repos[repo]['url'], repo[len(ds_basename)+1:]) if pushurl: repos[repo]['pushurl'] = _urljoin(repos[repo]['pushurl'], repo[len(ds_basename)+1:]) # collect existing remotes: already_existing = list() conflicting = list() for repo in repos: if name in repos[repo]['repo'].git_get_remotes(): already_existing.append(repo) lgr.debug("""Remote '{0}' already exists in '{1}'.""".format(name, repo)) existing_url = repos[repo]['repo'].git_get_remote_url(name) existing_pushurl = \ repos[repo]['repo'].git_get_remote_url(name, push=True) if repos[repo]['url'].rstrip('/') != existing_url.rstrip('/') \ or (pushurl and existing_pushurl and repos[repo]['pushurl'].rstrip('/') != existing_pushurl.rstrip('/')) \ or (pushurl and not existing_pushurl): conflicting.append(repo) if not force and conflicting: raise RuntimeError("Sibling '{0}' already exists with conflicting" " URL for {1} dataset(s). {2}".format( name, len(conflicting), conflicting)) runner = Runner() successfully_added = list() for repo in repos: if repo in already_existing: if repo not in conflicting: lgr.debug("Skipping {0}. Nothing to do.".format(repo)) continue # rewrite url cmd = ["git", "remote", "set-url", name, repos[repo]['url']] runner.run(cmd, cwd=repos[repo]['repo'].path) else: # add the remote cmd = ["git", "remote", "add", name, repos[repo]['url']] runner.run(cmd, cwd=repos[repo]['repo'].path) if pushurl: cmd = ["git", "remote", "set-url", "--push", name, repos[repo]['pushurl']] runner.run(cmd, cwd=repos[repo]['repo'].path) successfully_added.append(repo) return successfully_added
def __call__( dataset=None, dest=None, path=None, # Note: add remote currently disabled in publish # dest_url=None, dest_pushurl=None, with_data=None, recursive=False): # Note: add remote currently disabled in publish # if dest is None and (dest_url is not None # or dest_pushurl is not None): # raise ValueError("""insufficient information for adding the # destination as a sibling (needs at least a name)""") # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) if not path: path = curdir elif isinstance(path, list): return [ Publish.__call__( dataset=ds, dest=dest, path=p, # Note: add remote currently disabled in publish # dest_url=dest_url, # dest_pushurl=dest_pushurl, with_data=with_data, recursive=recursive) for p in path ] # resolve the location against the provided dataset if path is not None: path = resolve_path(path, ds) lgr.info("Publishing {0}".format(path)) # if we have no dataset given, figure out which one we need to operate # on, based on the resolved location (that is now guaranteed to # be specified if ds is None: # try to find a dataset at or above the location dspath = GitRepo.get_toppath(abspath(path)) if dspath is None: # no top-level dataset found, use path as such dspath = path ds = Dataset(dspath) lgr.debug("Resolved dataset for publication: {0}".format(ds)) assert (ds is not None) # it might still be about a subdataset of ds: if path is not None: relativepath = relpath(path, start=ds.path) subds = get_containing_subdataset(ds, relativepath) if subds.path != ds.path: # path belongs to a subdataset; hand it over lgr.debug("Hand over to submodule %s" % subds.path) return subds.publish( dest=dest, path=relpath(path, start=subds.path), # Note: add remote currently disabled in publish # dest_url=dest_url, # dest_pushurl=dest_pushurl, with_data=with_data, recursive=recursive) # now, we know, we have to operate on ds. So, ds needs to be installed, # since we cannot publish anything from a not installed dataset, # can we? # (But may be just the existence of ds.repo is important here.) if not ds.is_installed(): raise ValueError("No installed dataset found at " "{0}.".format(ds.path)) assert (ds.repo is not None) # TODO: For now we can deal with a sibling(remote) name given by `dest` # only. Figure out, when to allow for passing a local path or URL # directly and what to do in that case. # Note: we need an upstream remote, if there's none given. We could # wait for git push to complain, but we need to explicitly figure it # out for pushing annex branch anyway and we might as well fail right # here. # keep original dest in case it's None for passing to recursive calls: dest_resolved = dest if dest is None: # check for tracking branch's remote: try: std_out, std_err = \ ds.repo._git_custom_command('', ["git", "config", "--get", "branch.{active_branch}.remote".format(active_branch=ds.repo.git_get_active_branch())], expect_fail=True) except CommandError as e: if e.code == 1 and e.stdout == "": std_out = None else: raise if std_out: dest_resolved = std_out.strip() else: # we have no remote given and no upstream => fail raise RuntimeError("No known default target for " "publication and none given.") # upstream branch needed for update (merge) and subsequent push, # in case there is no. set_upstream = False try: # Note: tracking branch actually defined bei entry "merge" # PLUS entry "remote" std_out, std_err = \ ds.repo._git_custom_command('', ["git", "config", "--get", "branch.{active_branch}.merge".format(active_branch=ds.repo.git_get_active_branch())], expect_fail=True) except CommandError as e: if e.code == 1 and e.stdout == "": # no tracking branch yet: set_upstream = True else: raise # is `dest` an already known remote? if dest_resolved not in ds.repo.git_get_remotes(): # unknown remote raise ValueError("No sibling '%s' found." % dest_resolved) # Note: add remote currently disabled in publish # if dest_url is None: # raise ValueError("No sibling '%s' found. Provide `dest-url`" # " to register it." % dest_resolved) # lgr.info("Sibling %s unknown. Registering ...") # # # Fill in URL-Template: # remote_url = dest_url.replace("%NAME", basename(ds.path)) # # TODO: handle_name.replace("/", "-")) instead of basename() # # - figure it out ;) # # - either a datasets needs to discover superdatasets in # # order to get it's relative path to provide a name # # - or: We need a different approach on the templates # # # Add the remote # ds.repo.git_remote_add(dest_resolved, remote_url) # if dest_pushurl: # # Fill in template: # remote_url_push = \ # dest_pushurl.replace("%NAME", basename(ds.path)) # # TODO: Different way of replacing %NAME; See above # # # Modify push url: # ds.repo._git_custom_command('', # ["git", "remote", # "set-url", # "--push", dest_resolved, # remote_url_push]) # lgr.info("Added sibling '%s'." % dest) # lgr.debug("Added remote '%s':\n %s (fetch)\n%s (push)." % # (dest_resolved, remote_url, # remote_url_push if dest_pushurl else remote_url)) # Note: add remote currently disabled in publish # else: # # known remote: parameters dest-url-* currently invalid. # # This may change to adapt the existing remote. # if dest_url: # lgr.warning("Sibling '%s' already exists for dataset '%s'. " # "Ignoring dest-url %s." % # (dest_resolved, ds.path, dest_url)) # if dest_pushurl: # lgr.warning("Sibling '%s' already exists for dataset '%s'. " # "Ignoring dest-pushurl %s." % # (dest_resolved, ds.path, dest_pushurl)) # Figure out, what to publish if path is None or path == ds.path: # => publish the dataset itself # push local state: # TODO: Rework git_push in GitRepo cmd = ['git', 'push'] if set_upstream: # no upstream branch yet cmd.append("--set-upstream") cmd += [dest_resolved, ds.repo.git_get_active_branch()] ds.repo._git_custom_command('', cmd) # push annex branch: if isinstance(ds.repo, AnnexRepo): ds.repo.git_push("%s +git-annex:git-annex" % dest_resolved) # TODO: if with_data is a shell pattern, we get a list, when called # from shell, right? # => adapt the following and check constraints to allow for that if with_data: ds.repo._git_custom_command('', ["git", "annex", "copy"] + with_data + ["--to", dest_resolved]) if recursive and ds.get_dataset_handles() != []: results = [ds] # Note: add remote currently disabled in publish # modify URL templates: # if dest_url: # dest_url = dest_url.replace('%NAME', basename(ds.path) + '-%NAME') # if dest_pushurl: # dest_pushurl = dest_pushurl.replace('%NAME', basename(ds.path) + '-%NAME') for subds in ds.get_dataset_handles(): results.append( Dataset(opj(ds.path, subds)).publish( dest=dest, # Note: use `dest` instead of `dest_resolved` in case # dest was None, so subdatasets would use their default # as well # Note: add remote currently disabled in publish # dest_url=dest_url, # dest_pushurl=dest_pushurl, with_data=with_data, recursive=recursive)) return results return ds elif exists(path): # At this point `path` is not referencing a (sub)dataset. # An annexed file is the only thing left, that `path` might be # validly pointing to. Anything else we can't handle currently. if isinstance(ds.repo, AnnexRepo): try: if ds.repo.get_file_key(relativepath): # file is in annex, publish it ds.repo._run_annex_command( 'copy', annex_options=[path, '--to=%s' % dest_resolved]) return path except (FileInGitError, FileNotInAnnexError): pass # `path` can't be published lgr.warning("Don't know how to publish %s." % path) return None else: # nothing to publish found lgr.warning("Nothing to publish found at %s." % path) return None
def __call__(name=None, dataset=None, merge=False, recursive=False, fetch_all=False, reobtain_data=False): """ """ # TODO: Is there an 'update filehandle' similar to install and publish? # What does it mean? if reobtain_data: # TODO: properly define, what to do raise NotImplementedError("TODO: Option '--reobtain-data' not " "implemented yet.") # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) # if we have no dataset given, figure out which one we need to operate # on, based on the current working directory of the process: if ds is None: # try to find a dataset at or above PWD: dspath = GitRepo.get_toppath(getpwd()) if dspath is None: raise ValueError("No dataset found at %s." % getpwd()) ds = Dataset(dspath) assert(ds is not None) if not ds.is_installed(): raise ValueError("No installed dataset found at " "{0}.".format(ds.path)) assert(ds.repo is not None) repos_to_update = [ds.repo] if recursive: repos_to_update += [GitRepo(opj(ds.path, sub_path)) for sub_path in ds.get_dataset_handles(recursive=True)] for repo in repos_to_update: # get all remotes: remotes = repo.git_get_remotes() if name and name not in remotes: lgr.warning("'%s' not known to dataset %s.\nSkipping" % (name, repo.path)) continue # Currently '--merge' works for single remote only: # TODO: - condition still incomplete # - We can merge if a remote was given or there is a # tracking branch # - we also can fetch all remotes independently on whether or # not we merge a certain remote if not name and len(remotes) > 1 and merge: lgr.debug("Found multiple remotes:\n%s" % remotes) raise NotImplementedError("No merge strategy for multiple " "remotes implemented yet.") lgr.info("Updating handle '%s' ..." % repo.path) # fetch remote(s): repo.git_fetch(name if name else '', "--all" if fetch_all else '') # if it is an annex and there is a tracking branch, and we didn't # fetch the entire remote anyway, explicitly fetch git-annex # branch: # TODO: Is this logic correct? Shouldn't we fetch git-annex from # `name` if there is any (or if there is no tracking branch but we # have a `name`? if knows_annex(repo.path) and not fetch_all: # check for tracking branch's remote: try: std_out, std_err = \ repo._git_custom_command('', ["git", "config", "--get", "branch.{active_branch}.remote".format( active_branch=repo.git_get_active_branch())]) except CommandError as e: if e.code == 1 and e.stdout == "": std_out = None else: raise if std_out: # we have a "tracking remote" repo.git_fetch("%s git-annex" % std_out.strip()) # merge: if merge: lgr.info("Applying changes from tracking branch...") cmd_list = ["git", "pull"] if name: cmd_list.append(name) # branch needed, if not default remote # => TODO: use default remote/tracking branch to compare # (see above, where git-annex is fetched) # => TODO: allow for passing a branch # (or more general refspec?) # For now, just use the same name cmd_list.append(repo.git_get_active_branch()) out, err = repo._git_custom_command('', cmd_list) lgr.info(out) if knows_annex(repo.path): # annex-apply: lgr.info("Updating annex ...") out, err = repo._git_custom_command('', ["git", "annex", "merge"]) lgr.info(out)
def __call__(dataset=None, path=None, data_only=True, recursive=False): # Note: copy logic from install to resolve dataset and path: # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) if not path: if ds is None: # no dataset, no target location, nothing to do raise ValueError( "insufficient information for uninstallation (needs at " "least a dataset or a path") elif isinstance(path, list): # TODO: not sure. might be possible to deal with that list directly return [Uninstall.__call__( dataset=ds, path=p, data_only=data_only, recursive=recursive) for p in path] # resolve the target location against the provided dataset if path is not None: path = resolve_path(path, ds) lgr.debug("Resolved uninstallation target: {0}".format(path)) # if we have no dataset given, figure out which one we need to operate # on, based on the resolved target location (that is now guaranteed to # be specified if ds is None: # try to find a dataset at or above the installation target dspath = GitRepo.get_toppath(abspath(path)) if dspath is None: # no top-level dataset found, use path as such dspath = path ds = Dataset(dspath) assert(ds is not None) lgr.debug("Resolved target dataset for uninstallation: {0}".format(ds)) if not ds.is_installed(): if not path or path == ds.path: # we want to uninstall the dataset itself, which is not # installed => nothing to do # TODO: consider `data` option! is_installed currently only # checks for a repository lgr.info("Dataset {0} not installed. Nothing to " "do.".format(ds.path)) return else: # we want to uninstall something from a not installed dataset # Doesn't make sense, does it? => fail raise ValueError("Dataset {0} is not installed.".format(ds.path)) assert(ds.repo is not None) if not path or path == ds.path: # uninstall the dataset `ds` # TODO: what to consider? # - whether it is a submodule of another dataset # - `data_only` ? # - `recursive` # - what to return in what case (data_only)? raise NotImplementedError("TODO: Uninstall dataset %s" % ds.path) # needed by the logic below assert(isabs(path)) # express the destination path relative to the root of this dataset relativepath = relpath(path, start=ds.path) if path.startswith(pardir): raise ValueError("uninstallation path outside dataset") lgr.debug( "Resolved uninstallation target relative to dataset {0}: {1}".format( ds, relativepath)) # figure out, what path actually is pointing to: if not exists(path): # nothing there, nothing to uninstall lgr.info("Nothing found to uninstall at %s" % path) return if relativepath in ds.get_dataset_handles(recursive=True): # it's a submodule # --recursive required or implied? raise NotImplementedError("TODO: uninstall submodule %s from " "dataset %s" % (relativepath, ds.path)) if isdir(path): # don't know what to do yet # in git vs. untracked? # recursive? raise NotImplementedError("TODO: uninstall directory %s from " "dataset %s" % (path, ds.path)) # we know, it's an existing file if isinstance(ds.repo, AnnexRepo): try: ds.repo.get_file_key(relativepath) except FileInGitError: # file directly in git _file_in_git = True except FileNotInAnnexError: # either an untracked file in this dataset, or something that # also actually exists in the file system but could be part of # a subdataset _untracked_or_within_submodule = True # it's an annexed file if data_only: ds.repo.annex_drop([path]) return path else: raise NotImplementedError("TODO: fully uninstall file %s " "(annex) from dataset %s" % (path, ds.path)) else: # plain git repo if relativepath in ds.repo.get_indexed_files(): # file directly in git _file_in_git = True else: # either an untracked file in this dataset, or something that # also actually exists in the file system but could be part of # a subdataset _untracked_or_within_submodule = True if _file_in_git: if data_only: raise ValueError("%s is not a file handle. Removing its " "data only doesn't make sense." % path) else: return ds.repo.git_remove([relativepath]) elif _untracked_or_within_submodule: subds = get_containing_subdataset(ds, relativepath) if ds.path != subds.path: # target path belongs to a subdataset, hand uninstallation # over to it return subds.uninstall( path=relpath(path, start=subds.path), data_only=data_only, recursive=recursive) # this must be an untracked/existing something # it wasn't installed, so we cannot uninstall it raise ValueError("Cannot uninstall %s" % path)
def __call__(sshurl, target=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, existing='raise', shared=False): if sshurl is None: raise ValueError("""insufficient information for target creation (needs at least a dataset and a SSH URL).""") if target is None and (target_url is not None or target_pushurl is not None): raise ValueError("""insufficient information for adding the target as a sibling (needs at least a name)""") # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) if ds is None: # try to find a dataset at or above CWD dspath = GitRepo.get_toppath(abspath(getpwd())) if dspath is None: raise ValueError("""No dataset found at or above {0}.""".format(getpwd())) ds = Dataset(dspath) lgr.debug("Resolved dataset for target creation: {0}".format(ds)) assert(ds is not None and sshurl is not None) if not ds.is_installed(): raise ValueError("""Dataset {0} is not installed yet.""".format(ds)) assert(ds.repo is not None) # determine target parameters: parsed_target = urlparse(sshurl) host_name = parsed_target.netloc # TODO: Sufficient to fail on this condition? if not parsed_target.netloc: raise ValueError("Malformed URL: {0}".format(sshurl)) if target_dir is None: if parsed_target.path: target_dir = parsed_target.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = False if "%NAME" not in target_dir: replicate_local_structure = True # collect datasets to use: datasets = dict() datasets[basename(ds.path)] = ds if recursive: for subds in ds.get_dataset_handles(recursive=True): sub_path = opj(ds.path, subds) # TODO: when enhancing Dataset/*Repo classes and therefore # adapt to moved code, make proper distinction between name and # path of a submodule, which are technically different. This # probably will become important on windows as well as whenever # we want to allow for moved worktrees. datasets[basename(ds.path) + '/' + subds] = \ Dataset(sub_path) # setup SSH Connection: # TODO: Make the entire setup a helper to use it when pushing via # publish? # - build control master: from datalad.utils import assure_dir not_supported_on_windows("TODO") from os import geteuid # Linux specific import var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid() assure_dir(var_run_user_datalad) control_path = "%s/%s" % (var_run_user_datalad, host_name) control_path += ":%s" % parsed_target.port if parsed_target.port else "" # - start control master: cmd = "ssh -o ControlMaster=yes -o \"ControlPath=%s\" " \ "-o ControlPersist=yes %s exit" % (control_path, host_name) lgr.debug("Try starting control master by calling:\n%s" % cmd) import subprocess proc = subprocess.Popen(cmd, shell=True) proc.communicate(input="\n") # why the f.. this is necessary? runner = Runner() ssh_cmd = ["ssh", "-S", control_path, host_name] lgr.info("Creating target datasets ...") for current_dataset in datasets: if not replicate_local_structure: path = target_dir.replace("%NAME", current_dataset.replace("/", "-")) else: # TODO: opj depends on local platform, not the remote one. # check how to deal with it. Does windows ssh server accept # posix paths? vice versa? Should planned SSH class provide # tools for this issue? path = normpath(opj(target_dir, relpath(datasets[current_dataset].path, start=ds.path))) if path != '.': # check if target exists # TODO: Is this condition valid for != '.' only? path_exists = True cmd = ssh_cmd + ["ls", path] try: out, err = runner.run(cmd, expect_fail=True, expect_stderr=True) except CommandError as e: if "No such file or directory" in e.stderr and \ path in e.stderr: path_exists = False else: raise # It's an unexpected failure here if path_exists: if existing == 'raise': raise RuntimeError( "Target directory %s already exists." % path) elif existing == 'skip': continue elif existing == 'replace': pass else: raise ValueError("Do not know how to hand existing=%s" % repr(existing)) cmd = ssh_cmd + ["mkdir", "-p", path] try: runner.run(cmd) except CommandError as e: lgr.error("Remotely creating target directory failed at " "%s.\nError: %s" % (path, str(e))) continue # init git repo cmd = ssh_cmd + ["git", "-C", path, "init"] if shared: cmd.append("--shared=%s" % shared) try: runner.run(cmd) except CommandError as e: lgr.error("Remotely initializing git repository failed at %s." "\nError: %s\nSkipping ..." % (path, str(e))) continue # check git version on remote end: cmd = ssh_cmd + ["git", "version"] try: out, err = runner.run(cmd) git_version = out.lstrip("git version").strip() lgr.debug("Detected git version on server: %s" % git_version) if git_version < "2.4": lgr.error("Git version >= 2.4 needed to configure remote." " Version detected on server: %s\nSkipping ..." % git_version) continue except CommandError as e: lgr.warning( "Failed to determine git version on remote.\n" "Error: {0}\nTrying to configure anyway " "...".format(e.message)) # allow for pushing to checked out branch cmd = ssh_cmd + ["git", "-C", path, "config", "receive.denyCurrentBranch", "updateInstead"] try: runner.run(cmd) except CommandError as e: lgr.warning("git config failed at remote location %s.\n" "You will not be able to push to checked out " "branch." % path) # enable post-update hook: cmd = ssh_cmd + ["mv", opj(path, ".git/hooks/post-update.sample"), opj(path, ".git/hooks/post-update")] try: runner.run(cmd) except CommandError as e: lgr.error("Failed to enable post update hook.\n" "Error: %s" % e.message) # initially update server info "manually": cmd = ssh_cmd + ["git", "-C", path, "update-server-info"] try: runner.run(cmd) except CommandError as e: lgr.error("Failed to update server info.\n" "Error: %s" % e.message) # stop controlmaster (close ssh connection): cmd = ["ssh", "-O", "stop", "-S", control_path, host_name] out, err = runner.run(cmd, expect_stderr=True) if target: # add the sibling(s): if target_url is None: target_url = sshurl if target_pushurl is None: target_pushurl = sshurl result_adding = AddSibling()(dataset=ds, name=target, url=target_url, pushurl=target_pushurl, recursive=recursive, force=existing in {'replace'})
def __call__(dataset=None, dest=None, path=None, # Note: add remote currently disabled in publish # dest_url=None, dest_pushurl=None, with_data=None, recursive=False): # Note: add remote currently disabled in publish # if dest is None and (dest_url is not None # or dest_pushurl is not None): # raise ValueError("""insufficient information for adding the # destination as a sibling (needs at least a name)""") # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) if not path: path = curdir elif isinstance(path, list): return [Publish.__call__( dataset=ds, dest=dest, path=p, # Note: add remote currently disabled in publish # dest_url=dest_url, # dest_pushurl=dest_pushurl, with_data=with_data, recursive=recursive) for p in path] # resolve the location against the provided dataset if path is not None: path = resolve_path(path, ds) lgr.info("Publishing {0}".format(path)) # if we have no dataset given, figure out which one we need to operate # on, based on the resolved location (that is now guaranteed to # be specified if ds is None: # try to find a dataset at or above the location dspath = GitRepo.get_toppath(abspath(path)) if dspath is None: # no top-level dataset found, use path as such dspath = path ds = Dataset(dspath) lgr.debug("Resolved dataset for publication: {0}".format(ds)) assert(ds is not None) # it might still be about a subdataset of ds: if path is not None: relativepath = relpath(path, start=ds.path) subds = get_containing_subdataset(ds, relativepath) if subds.path != ds.path: # path belongs to a subdataset; hand it over lgr.debug("Hand over to submodule %s" % subds.path) return subds.publish(dest=dest, path=relpath(path, start=subds.path), # Note: add remote currently disabled in publish # dest_url=dest_url, # dest_pushurl=dest_pushurl, with_data=with_data, recursive=recursive) # now, we know, we have to operate on ds. So, ds needs to be installed, # since we cannot publish anything from a not installed dataset, # can we? # (But may be just the existence of ds.repo is important here.) if not ds.is_installed(): raise ValueError("No installed dataset found at " "{0}.".format(ds.path)) assert(ds.repo is not None) # TODO: For now we can deal with a sibling(remote) name given by `dest` # only. Figure out, when to allow for passing a local path or URL # directly and what to do in that case. # Note: we need an upstream remote, if there's none given. We could # wait for git push to complain, but we need to explicitly figure it # out for pushing annex branch anyway and we might as well fail right # here. # keep original dest in case it's None for passing to recursive calls: dest_resolved = dest if dest is None: # check for tracking branch's remote: try: std_out, std_err = \ ds.repo._git_custom_command('', ["git", "config", "--get", "branch.{active_branch}.remote".format(active_branch=ds.repo.git_get_active_branch())], expect_fail=True) except CommandError as e: if e.code == 1 and e.stdout == "": std_out = None else: raise if std_out: dest_resolved = std_out.strip() else: # we have no remote given and no upstream => fail raise RuntimeError("No known default target for " "publication and none given.") # upstream branch needed for update (merge) and subsequent push, # in case there is no. set_upstream = False try: # Note: tracking branch actually defined bei entry "merge" # PLUS entry "remote" std_out, std_err = \ ds.repo._git_custom_command('', ["git", "config", "--get", "branch.{active_branch}.merge".format(active_branch=ds.repo.git_get_active_branch())], expect_fail=True) except CommandError as e: if e.code == 1 and e.stdout == "": # no tracking branch yet: set_upstream = True else: raise # is `dest` an already known remote? if dest_resolved not in ds.repo.git_get_remotes(): # unknown remote raise ValueError("No sibling '%s' found." % dest_resolved) # Note: add remote currently disabled in publish # if dest_url is None: # raise ValueError("No sibling '%s' found. Provide `dest-url`" # " to register it." % dest_resolved) # lgr.info("Sibling %s unknown. Registering ...") # # # Fill in URL-Template: # remote_url = dest_url.replace("%NAME", basename(ds.path)) # # TODO: handle_name.replace("/", "-")) instead of basename() # # - figure it out ;) # # - either a datasets needs to discover superdatasets in # # order to get it's relative path to provide a name # # - or: We need a different approach on the templates # # # Add the remote # ds.repo.git_remote_add(dest_resolved, remote_url) # if dest_pushurl: # # Fill in template: # remote_url_push = \ # dest_pushurl.replace("%NAME", basename(ds.path)) # # TODO: Different way of replacing %NAME; See above # # # Modify push url: # ds.repo._git_custom_command('', # ["git", "remote", # "set-url", # "--push", dest_resolved, # remote_url_push]) # lgr.info("Added sibling '%s'." % dest) # lgr.debug("Added remote '%s':\n %s (fetch)\n%s (push)." % # (dest_resolved, remote_url, # remote_url_push if dest_pushurl else remote_url)) # Note: add remote currently disabled in publish # else: # # known remote: parameters dest-url-* currently invalid. # # This may change to adapt the existing remote. # if dest_url: # lgr.warning("Sibling '%s' already exists for dataset '%s'. " # "Ignoring dest-url %s." % # (dest_resolved, ds.path, dest_url)) # if dest_pushurl: # lgr.warning("Sibling '%s' already exists for dataset '%s'. " # "Ignoring dest-pushurl %s." % # (dest_resolved, ds.path, dest_pushurl)) # Figure out, what to publish if path is None or path == ds.path: # => publish the dataset itself # push local state: # TODO: Rework git_push in GitRepo cmd = ['git', 'push'] if set_upstream: # no upstream branch yet cmd.append("--set-upstream") cmd += [dest_resolved, ds.repo.git_get_active_branch()] ds.repo._git_custom_command('', cmd) # push annex branch: if isinstance(ds.repo, AnnexRepo): ds.repo.git_push("%s +git-annex:git-annex" % dest_resolved) # TODO: if with_data is a shell pattern, we get a list, when called # from shell, right? # => adapt the following and check constraints to allow for that if with_data: ds.repo._git_custom_command('', ["git", "annex", "copy"] + with_data + ["--to", dest_resolved]) if recursive and ds.get_dataset_handles() != []: results = [ds] # Note: add remote currently disabled in publish # modify URL templates: # if dest_url: # dest_url = dest_url.replace('%NAME', basename(ds.path) + '-%NAME') # if dest_pushurl: # dest_pushurl = dest_pushurl.replace('%NAME', basename(ds.path) + '-%NAME') for subds in ds.get_dataset_handles(): results.append(Dataset(opj(ds.path, subds)).publish( dest=dest, # Note: use `dest` instead of `dest_resolved` in case # dest was None, so subdatasets would use their default # as well # Note: add remote currently disabled in publish # dest_url=dest_url, # dest_pushurl=dest_pushurl, with_data=with_data, recursive=recursive)) return results return ds elif exists(path): # At this point `path` is not referencing a (sub)dataset. # An annexed file is the only thing left, that `path` might be # validly pointing to. Anything else we can't handle currently. if isinstance(ds.repo, AnnexRepo): try: if ds.repo.get_file_key(relativepath): # file is in annex, publish it ds.repo._run_annex_command('copy', annex_options=[path, '--to=%s' % dest_resolved]) return path except (FileInGitError, FileNotInAnnexError): pass # `path` can't be published lgr.warning("Don't know how to publish %s." % path) return None else: # nothing to publish found lgr.warning("Nothing to publish found at %s." % path) return None