def assure_initialized(self): """Assures that manager is initialized - knows socket_dir, previous connections """ if self._socket_dir is not None: return from ..config import ConfigManager from os import chmod cfg = ConfigManager() self._socket_dir = opj(cfg.obtain('datalad.locations.cache'), 'sockets') assure_dir(self._socket_dir) try: chmod(self._socket_dir, 0o700) except OSError as exc: lgr.warning( "Failed to (re)set permissions on the %s. " "Most likely future communications would be impaired or fail. " "Original exception: %s", self._socket_dir, exc_str(exc)) from os import listdir from os.path import isdir try: self._prev_connections = [ opj(self.socket_dir, p) for p in listdir(self.socket_dir) if not isdir(opj(self.socket_dir, p)) ] except OSError as exc: self._prev_connections = [] lgr.warning( "Failed to list %s for existing sockets. " "Most likely future communications would be impaired or fail. " "Original exception: %s", self._socket_dir, exc_str(exc)) lgr.log(5, "Found %d previous connections", len(self._prev_connections))
def socket_dir(self): if self._socket_dir is None: from ..config import ConfigManager from os import chmod cfg = ConfigManager() self._socket_dir = opj(cfg.obtain('datalad.locations.cache'), 'sockets') assure_dir(self._socket_dir) chmod(self._socket_dir, 0o700) return self._socket_dir
def get_cached_url_content(url, name=None, fetcher=None, maxage=None): """Loader of a document from a url, which caches loaded instance on disk Doesn't do anything smart about http headers etc which could provide information for cache/proxy servers for how long to retain etc TODO: theoretically it is not network specific at all -- and just a memoize pattern, but may be some time we would make it treat headers etc correctly. And ATM would support any URL we support via providers/downloaders Parameters ---------- fetcher: callable, optional Function to call with url if needed to be refetched maxage: float, optional Age in days to retain valid for. <0 - would retain forever. If None - would consult the config, 0 - would force to reload """ doc_fname = get_url_cache_filename(url, name) if maxage is None: maxage = float(cfg.get('datalad.locations.cache-maxage')) doc = None if os.path.exists(doc_fname) and maxage != 0: fage = (time.time() - os.stat(doc_fname).st_mtime) / (24. * 3600) if maxage < 0 or fage < maxage: try: lgr.debug("use cached request result to '%s' from %s", url, doc_fname) doc = pickle.load(open(doc_fname, 'rb')) except Exception as e: # it is OK to ignore any error and fall back on the true source lgr.warning( "cannot load cache from '%s', fall back to download: %s", doc_fname, exc_str(e)) if doc is None: if fetcher is None: from datalad.downloaders.providers import Providers providers = Providers.from_config_files() fetcher = providers.fetch doc = fetcher(url) assure_dir(dirname(doc_fname)) # use pickle to store the entire request result dict pickle.dump(doc, open(doc_fname, 'wb')) lgr.debug("stored result of request to '{}' in {}".format( url, doc_fname)) return doc
def get_cached_url_content(url, name=None, fetcher=None, maxage=None): """Loader of a document from a url, which caches loaded instance on disk Doesn't do anything smart about http headers etc which could provide information for cache/proxy servers for how long to retain etc TODO: theoretically it is not network specific at all -- and just a memoize pattern, but may be some time we would make it treat headers etc correctly. And ATM would support any URL we support via providers/downloaders Parameters ---------- fetcher: callable, optional Function to call with url if needed to be refetched maxage: float, optional Age in days to retain valid for. <0 - would retain forever. If None - would consult the config, 0 - would force to reload """ doc_fname = get_url_cache_filename(url, name) if maxage is None: maxage = float(cfg.get('datalad.locations.cache-maxage')) doc = None if os.path.exists(doc_fname) and maxage != 0: fage = (time.time() - os.stat(doc_fname).st_mtime)/(24. * 3600) if maxage < 0 or fage < maxage: try: lgr.debug("use cached request result to '%s' from %s", url, doc_fname) doc = pickle.load(open(doc_fname, 'rb')) except Exception as e: # it is OK to ignore any error and fall back on the true source lgr.warning( "cannot load cache from '%s', fall back to download: %s", doc_fname, exc_str(e)) if doc is None: if fetcher is None: from datalad.downloaders.providers import Providers providers = Providers.from_config_files() fetcher = providers.fetch doc = fetcher(url) assure_dir(dirname(doc_fname)) # use pickle to store the entire request result dict pickle.dump(doc, open(doc_fname, 'wb')) lgr.debug("stored result of request to '{}' in {}".format(url, doc_fname)) return doc
def assure_initialized(self): """Assures that manager is initialized - knows socket_dir, previous connections """ if self._socket_dir is not None: return from ..config import ConfigManager from os import chmod cfg = ConfigManager() self._socket_dir = opj(cfg.obtain('datalad.locations.cache'), 'sockets') assure_dir(self._socket_dir) try: chmod(self._socket_dir, 0o700) except OSError as exc: lgr.warning( "Failed to (re)set permissions on the %s. " "Most likely future communications would be impaired or fail. " "Original exception: %s", self._socket_dir, exc_str(exc) ) from os import listdir from os.path import isdir try: self._prev_connections = [opj(self.socket_dir, p) for p in listdir(self.socket_dir) if not isdir(opj(self.socket_dir, p))] except OSError as exc: self._prev_connections = [] lgr.warning( "Failed to list %s for existing sockets. " "Most likely future communications would be impaired or fail. " "Original exception: %s", self._socket_dir, exc_str(exc) ) lgr.log(5, "Found %d previous connections", len(self._prev_connections))
def _cached_load_document(url): """Loader of pyld document from a url, which caches loaded instance on disk """ doc_fname = _get_schema_url_cache_filename(url) doc = None if os.path.exists(doc_fname): try: lgr.debug("use cached request result to '%s' from %s", url, doc_fname) doc = pickle.load(open(doc_fname, 'rb')) except Exception as e: # it is OK to ignore any error and fall back on the true source lgr.warning( "cannot load cache from '%s', fall back on schema download: %s", doc_fname, exc_str(e)) if doc is None: from pyld.jsonld import load_document doc = load_document(url) assure_dir(dirname(doc_fname)) # use pickle to store the entire request result dict pickle.dump(doc, open(doc_fname, 'wb')) lgr.debug("stored result of request to '{}' in {}".format(url, doc_fname)) return doc
def _fixup_submodule_dotgit_setup(ds, relativepath): """Implementation of our current of .git in a subdataset""" # move .git to superrepo's .git/modules, remove .git, create # .git-file path = opj(ds.path, relativepath) subds_git_dir = opj(path, ".git") ds_git_dir = get_git_dir(ds.path) moved_git_dir = opj(ds.path, ds_git_dir, "modules", relativepath) # safety net if islink(subds_git_dir) \ and realpath(subds_git_dir) == moved_git_dir: # .git dir is already moved and linked # remove link to enable .git replacement logic below os.remove(subds_git_dir) else: # move .git from os import rename, listdir, rmdir assure_dir(moved_git_dir) for dot_git_entry in listdir(subds_git_dir): rename(opj(subds_git_dir, dot_git_entry), opj(moved_git_dir, dot_git_entry)) assert not listdir(subds_git_dir) rmdir(subds_git_dir) # TODO: symlink or whatever annex does, since annexes beneath # might break # - figure out, what annex does in direct mode # and/or on windows # - for now use .git file on windows and symlink otherwise if not on_windows: os.symlink(relpath(moved_git_dir, start=path), opj(path, ".git")) else: with open(opj(path, ".git"), "w") as f: f.write("gitdir: {moved}\n".format(moved=relpath(moved_git_dir, start=path)))
def __call__(self, target, collection=curdir, baseurl=None, remote_name=None): """ Returns ------- Collection """ # TODO: Note: Yarik's git mtheirs for publishing branches! local_master = get_datalad_master() if isdir(abspath(expandvars(expanduser(collection)))): c_path = abspath(expandvars(expanduser(collection))) elif collection in local_master.git_get_remotes(): c_path = urlparse(local_master.git_get_remote_url(collection)).path if not isdir(c_path): raise RuntimeError("Invalid path to collection '%s':\n%s" % (collection, c_path)) else: raise RuntimeError("Unknown collection '%s'." % collection) local_collection_repo = get_repo_instance( abspath(expandvars(expanduser(c_path))), CollectionRepo) available_handles = [key for key in local_collection_repo.get_handle_list() if exists(urlparse(CollectionRepoHandleBackend( local_collection_repo, key).url).path)] parsed_target = urlparse(target) # => scheme, path from pkg_resources import resource_filename prepare_script_path = \ resource_filename('datalad', 'resources/sshserver_prepare_for_publish.sh') cleanup_script_path = \ resource_filename('datalad', 'resources/sshserver_cleanup_after_publish.sh') from ..cmd import Runner runner = Runner() if parsed_target.scheme == 'ssh': if parsed_target.netloc == '': raise RuntimeError("Invalid ssh address: %s" % target) if baseurl is None: baseurl = target collection_url = baseurl + '/' + local_collection_repo.name + \ ".datalad-collection" # build control master: from datalad.utils import assure_dir var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid() assure_dir(var_run_user_datalad) control_path = "%s/%s" % (var_run_user_datalad, parsed_target.netloc) control_path += ":%s" % parsed_target.port if parsed_target.port else "" # start controlmaster: cmd_str = "ssh -o \"ControlMaster=yes\" -o \"ControlPath=%s\" " \ "-o \"ControlPersist=yes\" %s exit" % \ (control_path, parsed_target.netloc) lgr.error("DEBUG: %s" % cmd_str) import subprocess proc = subprocess.Popen(cmd_str, shell=True) proc.communicate(input="\n") # why the f.. this is necessary? # prepare target repositories: script_options = "%s %s.datalad-collection" % (parsed_target.path, local_collection_repo.name) for key in available_handles: # prepare repos for locally available handles only script_options += " %s" % key cmd_str = "ssh -S %s %s \'cat | sh /dev/stdin\' %s" % \ (control_path, parsed_target.netloc, script_options) cmd_str += " < %s" % prepare_script_path try: out, err = runner.run(cmd_str) except CommandError as e: lgr.error("Preparation script failed: %s" % str(e)) out = e.stdout err = e.stderr # set GIT-SSH: environ['GIT_SSH'] = resource_filename('datalad', 'resources/git_ssh.sh') elif parsed_target.scheme == 'file' or parsed_target.scheme == '': # we should have a local target path if not isdir(abspath(expandvars(expanduser(parsed_target.path)))): raise RuntimeError("%s doesn't exist." % parsed_target.path) target_path = abspath(expandvars(expanduser(parsed_target.path))) if baseurl is None: baseurl = target_path collection_url = baseurl + '/' + local_collection_repo.name + \ ".datalad-collection" try: out, err = runner.run(["sh", prepare_script_path, target_path, local_collection_repo.name + ".datalad-collection"] + available_handles) except CommandError as e: lgr.error("Preparation script failed: %s" % str(e)) out = e.stdout err = e.stderr else: raise RuntimeError("Don't know scheme '%s'." % parsed_target.scheme) # check output: results = parse_script_output(out, err) script_failed = False for name in available_handles + \ [local_collection_repo.name + ".datalad-collection"]: if not results[name]['init']: lgr.error("Server setup for %s failed." % name) script_failed = True # exit here, if something went wrong: if script_failed: raise RuntimeError("Server setup failed.") # Now, all the handles: from .publish_handle import PublishHandle handle_publisher = PublishHandle() for handle_name in available_handles: # get location: handle_loc = urlparse(CollectionRepoHandleBackend( local_collection_repo, handle_name).url).path # raise exception if there's no handle at that location: try: handle_repo = get_repo_instance(handle_loc, HandleRepo) except RuntimeError as e: lgr.error("'%s': No handle available at %s. Skip." % (handle_name, handle_loc)) continue annex_ssh = "-S %s" % control_path \ if parsed_target.scheme == 'ssh' else None handle_publisher(None, handle=handle_loc, url=baseurl + '/' + handle_name, ssh_options=annex_ssh) # TODO: check success => go on with collection # prepare publish branch in local collection: # check for existing publish branches: from random import choice from string import ascii_letters from six.moves import xrange p_branch = "publish_" + ''.join(choice(ascii_letters) for i in xrange(6)) local_collection_repo.git_checkout(p_branch, '-b') importer = CustomImporter('Collection', 'Collection', DLNS.this) importer.import_data(local_collection_repo.path) graphs = importer.get_graphs() orig_uri = graphs[REPO_STD_META_FILE[0:-4]].value(predicate=RDF.type, object=DLNS.Collection) # correct collection uri new_uri = URIRef(collection_url) for graph_name in graphs: for p, o in graphs[graph_name].predicate_objects(subject=orig_uri): graphs[graph_name].remove((orig_uri, p, o)) graphs[graph_name].add((new_uri, p, o)) # correct handle uris in hasPart statements: replacements = [] from datalad.support.collection import Collection from datalad.support.collectionrepo import CollectionRepoBackend col_meta = Collection(CollectionRepoBackend(local_collection_repo)) for o in graphs[REPO_STD_META_FILE[0:-4]].objects(subject=new_uri, predicate=DCTERMS.hasPart): from os.path import basename path = urlparse(o).path if exists(path): # local handle # retrieve name for that uri: # Note: That's an experimental implementation hdl_name = None for key in col_meta: if urlparse(col_meta[key].url).path == path: hdl_name = col_meta[key].name if hdl_name is None: raise RuntimeError("No handle found for path '%s'." % path) o_new = URIRef(baseurl + '/' + hdl_name) # replacements for collection level: replacements.append((o, o_new)) # replace in collection's handle storage: hdl_dir = opj(local_collection_repo.path, local_collection_repo._key2filename(hdl_name)) hdl_importer = CustomImporter('Collection', 'Handle', o) hdl_importer.import_data(hdl_dir) hdl_graphs = hdl_importer.get_graphs() for g in hdl_graphs: import rdflib rdflib.Graph() for pre, obj in hdl_graphs[g].predicate_objects(o): hdl_graphs[g].remove((o, pre, obj)) hdl_graphs[g].add((o_new, pre, obj)) hdl_importer.store_data(hdl_dir) local_collection_repo.git_add(hdl_dir) else: # We have a locally not available handle # in that collection, that therefore can't be published. # Just skip for now and assume uri simply doesn't change. continue for o, o_new in replacements: graphs[REPO_STD_META_FILE[0:-4]].remove((new_uri, DCTERMS.hasPart, o)) graphs[REPO_STD_META_FILE[0:-4]].add((new_uri, DCTERMS.hasPart, o_new)) # TODO: add commit reference? importer.store_data(local_collection_repo.path) [local_collection_repo.git_add(graph_name + '.ttl') for graph_name in graphs] local_collection_repo.git_commit("metadata prepared for publishing") # add as remote to local: # TODO: Better remote name? if remote_name is None: remote_name = p_branch local_collection_repo.git_remote_add(remote_name, collection_url) # push local branch "publish" to remote branch "master" # we want to push to master, so a different branch has to be checked # out in target; in general we can't explicitly allow for the local # repo to push local_collection_repo.git_push("%s +%s:master" % (remote_name, p_branch)) # checkout master in local collection: local_collection_repo.git_checkout("master") # checkout master in published collection: if parsed_target.scheme == 'ssh': cmd_str = "ssh -S %s %s \'cat | sh /dev/stdin\' %s" % \ (control_path, parsed_target.netloc, script_options) cmd_str += " < %s" % cleanup_script_path try: out, err = runner.run(cmd_str) except CommandError as e: lgr.error("Clean-up script failed: %s" % str(e)) # stop controlmaster: cmd_str = "ssh -O stop -S %s %s" % (control_path, parsed_target.netloc) try: out, err = runner.run(cmd_str) except CommandError as e: lgr.error("Stopping ssh control master failed: %s" % str(e)) else: try: out, err = runner.run(["sh", cleanup_script_path, target_path, local_collection_repo.name + ".datalad-collection"] + available_handles) except CommandError as e: lgr.error("Clean-up script failed: %s" % str(e)) # TODO: final check, whether everything is fine # Delete publish branch: local_collection_repo._git_custom_command('', 'git branch -D %s' % p_branch) return Collection(CollectionRepoBackend(local_collection_repo, remote_name + "/master"))
def __call__(sshurl, target=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, existing='raise', shared=False): if sshurl is None: raise ValueError("""insufficient information for target creation (needs at least a dataset and a SSH URL).""") if target is None and (target_url is not None or target_pushurl is not None): raise ValueError("""insufficient information for adding the target as a sibling (needs at least a name)""") # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) if ds is None: # try to find a dataset at or above CWD dspath = GitRepo.get_toppath(abspath(getpwd())) if dspath is None: raise ValueError("""No dataset found at or above {0}.""".format(getpwd())) ds = Dataset(dspath) lgr.debug("Resolved dataset for target creation: {0}".format(ds)) assert (ds is not None and sshurl is not None) if not ds.is_installed(): raise ValueError( """Dataset {0} is not installed yet.""".format(ds)) assert (ds.repo is not None) # determine target parameters: parsed_target = urlparse(sshurl) host_name = parsed_target.netloc # TODO: Sufficient to fail on this condition? if not parsed_target.netloc: raise ValueError("Malformed URL: {0}".format(sshurl)) if target_dir is None: if parsed_target.path: target_dir = parsed_target.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = False if "%NAME" not in target_dir: replicate_local_structure = True # collect datasets to use: datasets = dict() datasets[basename(ds.path)] = ds if recursive: for subds in ds.get_dataset_handles(recursive=True): sub_path = opj(ds.path, subds) # TODO: when enhancing Dataset/*Repo classes and therefore # adapt to moved code, make proper distinction between name and # path of a submodule, which are technically different. This # probably will become important on windows as well as whenever # we want to allow for moved worktrees. datasets[basename(ds.path) + '/' + subds] = \ Dataset(sub_path) # setup SSH Connection: # TODO: Make the entire setup a helper to use it when pushing via # publish? # - build control master: from datalad.utils import assure_dir not_supported_on_windows("TODO") from os import geteuid # Linux specific import var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid() assure_dir(var_run_user_datalad) control_path = "%s/%s" % (var_run_user_datalad, host_name) control_path += ":%s" % parsed_target.port if parsed_target.port else "" # - start control master: cmd = "ssh -o ControlMaster=yes -o \"ControlPath=%s\" " \ "-o ControlPersist=yes %s exit" % (control_path, host_name) lgr.debug("Try starting control master by calling:\n%s" % cmd) import subprocess proc = subprocess.Popen(cmd, shell=True) proc.communicate(input="\n") # why the f.. this is necessary? runner = Runner() ssh_cmd = ["ssh", "-S", control_path, host_name] lgr.info("Creating target datasets ...") for current_dataset in datasets: if not replicate_local_structure: path = target_dir.replace("%NAME", current_dataset.replace("/", "-")) else: # TODO: opj depends on local platform, not the remote one. # check how to deal with it. Does windows ssh server accept # posix paths? vice versa? Should planned SSH class provide # tools for this issue? path = normpath( opj(target_dir, relpath(datasets[current_dataset].path, start=ds.path))) if path != '.': # check if target exists # TODO: Is this condition valid for != '.' only? path_exists = True cmd = ssh_cmd + ["ls", path] try: out, err = runner.run(cmd, expect_fail=True, expect_stderr=True) except CommandError as e: if "No such file or directory" in e.stderr and \ path in e.stderr: path_exists = False else: raise # It's an unexpected failure here if path_exists: if existing == 'raise': raise RuntimeError( "Target directory %s already exists." % path) elif existing == 'skip': continue elif existing == 'replace': pass else: raise ValueError( "Do not know how to hand existing=%s" % repr(existing)) cmd = ssh_cmd + ["mkdir", "-p", path] try: runner.run(cmd) except CommandError as e: lgr.error("Remotely creating target directory failed at " "%s.\nError: %s" % (path, str(e))) continue # init git repo cmd = ssh_cmd + ["git", "-C", path, "init"] if shared: cmd.append("--shared=%s" % shared) try: runner.run(cmd) except CommandError as e: lgr.error("Remotely initializing git repository failed at %s." "\nError: %s\nSkipping ..." % (path, str(e))) continue # check git version on remote end: cmd = ssh_cmd + ["git", "version"] try: out, err = runner.run(cmd) git_version = out.lstrip("git version").strip() lgr.debug("Detected git version on server: %s" % git_version) if git_version < "2.4": lgr.error("Git version >= 2.4 needed to configure remote." " Version detected on server: %s\nSkipping ..." % git_version) continue except CommandError as e: lgr.warning("Failed to determine git version on remote.\n" "Error: {0}\nTrying to configure anyway " "...".format(e.message)) # allow for pushing to checked out branch cmd = ssh_cmd + [ "git", "-C", path, "config", "receive.denyCurrentBranch", "updateInstead" ] try: runner.run(cmd) except CommandError as e: lgr.warning("git config failed at remote location %s.\n" "You will not be able to push to checked out " "branch." % path) # enable post-update hook: cmd = ssh_cmd + [ "mv", opj(path, ".git/hooks/post-update.sample"), opj(path, ".git/hooks/post-update") ] try: runner.run(cmd) except CommandError as e: lgr.error("Failed to enable post update hook.\n" "Error: %s" % e.message) # initially update server info "manually": cmd = ssh_cmd + ["git", "-C", path, "update-server-info"] try: runner.run(cmd) except CommandError as e: lgr.error("Failed to update server info.\n" "Error: %s" % e.message) # stop controlmaster (close ssh connection): cmd = ["ssh", "-O", "stop", "-S", control_path, host_name] out, err = runner.run(cmd, expect_stderr=True) if target: # add the sibling(s): if target_url is None: target_url = sshurl if target_pushurl is None: target_pushurl = sshurl result_adding = AddSibling()(dataset=ds, name=target, url=target_url, pushurl=target_pushurl, recursive=recursive, force=existing in {'replace'})
def __call__(sshurl, target=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, existing='raise', shared=False): if sshurl is None: raise ValueError("""insufficient information for target creation (needs at least a dataset and a SSH URL).""") if target is None and (target_url is not None or target_pushurl is not None): raise ValueError("""insufficient information for adding the target as a sibling (needs at least a name)""") # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) if ds is None: # try to find a dataset at or above CWD dspath = GitRepo.get_toppath(abspath(getpwd())) if dspath is None: raise ValueError("""No dataset found at or above {0}.""".format(getpwd())) ds = Dataset(dspath) lgr.debug("Resolved dataset for target creation: {0}".format(ds)) assert(ds is not None and sshurl is not None) if not ds.is_installed(): raise ValueError("""Dataset {0} is not installed yet.""".format(ds)) assert(ds.repo is not None) # determine target parameters: parsed_target = urlparse(sshurl) host_name = parsed_target.netloc # TODO: Sufficient to fail on this condition? if not parsed_target.netloc: raise ValueError("Malformed URL: {0}".format(sshurl)) if target_dir is None: if parsed_target.path: target_dir = parsed_target.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = False if "%NAME" not in target_dir: replicate_local_structure = True # collect datasets to use: datasets = dict() datasets[basename(ds.path)] = ds if recursive: for subds in ds.get_dataset_handles(recursive=True): sub_path = opj(ds.path, subds) # TODO: when enhancing Dataset/*Repo classes and therefore # adapt to moved code, make proper distinction between name and # path of a submodule, which are technically different. This # probably will become important on windows as well as whenever # we want to allow for moved worktrees. datasets[basename(ds.path) + '/' + subds] = \ Dataset(sub_path) # setup SSH Connection: # TODO: Make the entire setup a helper to use it when pushing via # publish? # - build control master: from datalad.utils import assure_dir not_supported_on_windows("TODO") from os import geteuid # Linux specific import var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid() assure_dir(var_run_user_datalad) control_path = "%s/%s" % (var_run_user_datalad, host_name) control_path += ":%s" % parsed_target.port if parsed_target.port else "" # - start control master: cmd = "ssh -o ControlMaster=yes -o \"ControlPath=%s\" " \ "-o ControlPersist=yes %s exit" % (control_path, host_name) lgr.debug("Try starting control master by calling:\n%s" % cmd) import subprocess proc = subprocess.Popen(cmd, shell=True) proc.communicate(input="\n") # why the f.. this is necessary? runner = Runner() ssh_cmd = ["ssh", "-S", control_path, host_name] lgr.info("Creating target datasets ...") for current_dataset in datasets: if not replicate_local_structure: path = target_dir.replace("%NAME", current_dataset.replace("/", "-")) else: # TODO: opj depends on local platform, not the remote one. # check how to deal with it. Does windows ssh server accept # posix paths? vice versa? Should planned SSH class provide # tools for this issue? path = normpath(opj(target_dir, relpath(datasets[current_dataset].path, start=ds.path))) if path != '.': # check if target exists # TODO: Is this condition valid for != '.' only? path_exists = True cmd = ssh_cmd + ["ls", path] try: out, err = runner.run(cmd, expect_fail=True, expect_stderr=True) except CommandError as e: if "No such file or directory" in e.stderr and \ path in e.stderr: path_exists = False else: raise # It's an unexpected failure here if path_exists: if existing == 'raise': raise RuntimeError( "Target directory %s already exists." % path) elif existing == 'skip': continue elif existing == 'replace': pass else: raise ValueError("Do not know how to hand existing=%s" % repr(existing)) cmd = ssh_cmd + ["mkdir", "-p", path] try: runner.run(cmd) except CommandError as e: lgr.error("Remotely creating target directory failed at " "%s.\nError: %s" % (path, str(e))) continue # init git repo cmd = ssh_cmd + ["git", "-C", path, "init"] if shared: cmd.append("--shared=%s" % shared) try: runner.run(cmd) except CommandError as e: lgr.error("Remotely initializing git repository failed at %s." "\nError: %s\nSkipping ..." % (path, str(e))) continue # check git version on remote end: cmd = ssh_cmd + ["git", "version"] try: out, err = runner.run(cmd) git_version = out.lstrip("git version").strip() lgr.debug("Detected git version on server: %s" % git_version) if git_version < "2.4": lgr.error("Git version >= 2.4 needed to configure remote." " Version detected on server: %s\nSkipping ..." % git_version) continue except CommandError as e: lgr.warning( "Failed to determine git version on remote.\n" "Error: {0}\nTrying to configure anyway " "...".format(e.message)) # allow for pushing to checked out branch cmd = ssh_cmd + ["git", "-C", path, "config", "receive.denyCurrentBranch", "updateInstead"] try: runner.run(cmd) except CommandError as e: lgr.warning("git config failed at remote location %s.\n" "You will not be able to push to checked out " "branch." % path) # enable post-update hook: cmd = ssh_cmd + ["mv", opj(path, ".git/hooks/post-update.sample"), opj(path, ".git/hooks/post-update")] try: runner.run(cmd) except CommandError as e: lgr.error("Failed to enable post update hook.\n" "Error: %s" % e.message) # initially update server info "manually": cmd = ssh_cmd + ["git", "-C", path, "update-server-info"] try: runner.run(cmd) except CommandError as e: lgr.error("Failed to update server info.\n" "Error: %s" % e.message) # stop controlmaster (close ssh connection): cmd = ["ssh", "-O", "stop", "-S", control_path, host_name] out, err = runner.run(cmd, expect_stderr=True) if target: # add the sibling(s): if target_url is None: target_url = sshurl if target_pushurl is None: target_pushurl = sshurl result_adding = AddSibling()(dataset=ds, name=target, url=target_url, pushurl=target_pushurl, recursive=recursive, force=existing in {'replace'})