def compress_files(files, archive, path=None, overwrite=True): """Compress `files` into an `archive` file Parameters ---------- files : list of str archive : str path : str Alternative directory under which compressor will be invoked, to e.g. take into account relative paths of files and/or archive overwrite : bool Whether to allow overwriting the target archive file if one already exists """ runner = Runner(cwd=path) apath = Path(archive) if apath.exists(): if overwrite: apath.unlink() else: raise ValueError( 'Target archive {} already exists and overwrite is forbidden'. format(apath)) if len(apath.suffixes) > 1 and apath.suffixes[-2] == '.tar': cmd = '7z u .tar -so -- {} | 7z u -si -- {}'.format( ' '.join(quote_cmdlinearg(f) for f in files), quote_cmdlinearg(str(apath)), ) else: cmd = ['7z', 'u', str(apath), '--'] + files runner.run(cmd)
def test_configs(path=None): # set up dataset with registered procedure (c&p from test_basics): ds = Dataset(path).create(force=True) ds.run_procedure('cfg_yoda') # configure dataset to look for procedures in its code folder ds.config.add('datalad.locations.dataset-procedures', 'code', scope='branch') # 1. run procedure based on execution guessing by run_procedure: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n') # 2. now configure specific call format including usage of substitution config # for run: ds.config.add('datalad.procedures.datalad_test_proc.call-format', u'%s {script} {ds} {{mysub}} {args}' % quote_cmdlinearg(sys.executable), scope='branch') ds.config.add('datalad.run.substitutions.mysub', 'dataset-call-config', scope='branch') # TODO: Should we allow for --inputs/--outputs arguments for run_procedure # (to be passed into run)? ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'dataset-call-config\n') # 3. have a conflicting config at user-level, which should override the # config on dataset level: ds.config.add('datalad.procedures.datalad_test_proc.call-format', u'%s {script} {ds} local {args}' % quote_cmdlinearg(sys.executable), scope='local') ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n') # 4. get configured help message: r = ds.run_procedure('datalad_test_proc', help_proc=True, on_failure='ignore') assert_true(len(r) == 1) assert_in_results(r, status="impossible") ds.config.add('datalad.procedures.datalad_test_proc.help', "This is a help message", scope='branch') r = ds.run_procedure('datalad_test_proc', help_proc=True) assert_true(len(r) == 1) assert_in_results(r, message="This is a help message", status='ok')
def to_str(self, include_output=True): from datalad.utils import ( ensure_unicode, ensure_list, quote_cmdlinearg, ) to_str = "{}: ".format(self.__class__.__name__) if self.cmd: to_str += "'{}'".format( # go for a compact, normal looking, properly quoted # command rendering ' '.join(quote_cmdlinearg(c) for c in ensure_list(self.cmd))) if self.code: to_str += " failed with exitcode {}".format(self.code) if self.cwd: # only if not under standard PWD to_str += " under {}".format(self.cwd) if self.msg: # typically a command error has no specific idea to_str += " [{}]".format(ensure_unicode(self.msg)) if not include_output: return to_str if self.stdout: to_str += " [out: '{}']".format( ensure_unicode(self.stdout).strip()) if self.stderr: to_str += " [err: '{}']".format( ensure_unicode(self.stderr).strip()) if self.kwargs: to_str += " [info keys: {}]".format(', '.join(self.kwargs.keys())) return to_str
def normalize_command(command): """Convert `command` to the string representation. """ if isinstance(command, list): command = list(map(assure_unicode, command)) if len(command) == 1 and command[0] != "--": # This is either a quoted compound shell command or a simple # one-item command. Pass it as is. # # FIXME: This covers the predominant command-line case, but, for # Python API callers, it means values like ["./script with spaces"] # requires additional string-like escaping, which is inconsistent # with the handling of multi-item lists (and subprocess's # handling). Once we have a way to detect "running from Python API" # (discussed in gh-2986), update this. command = command[0] else: if command and command[0] == "--": # Strip disambiguation marker. Note: "running from Python API" # FIXME from below applies to this too. command = command[1:] command = " ".join(quote_cmdlinearg(c) for c in command) else: command = assure_unicode(command) return command
def _guess_exec(script_file): state = None try: is_exec = os.stat(script_file).st_mode & stat.S_IEXEC except OSError as e: from errno import ENOENT if e.errno == ENOENT and op.islink(script_file): # broken symlink # does not exist; there's nothing to detect at all return {'type': None, 'template': None, 'state': 'absent'} else: raise e # TODO check for exec permission and rely on interpreter if is_exec and not os.path.isdir(script_file): return {'type': u'executable', 'template': u'{script} {ds} {args}', 'state': 'executable'} elif script_file.endswith('.sh'): return {'type': u'bash_script', 'template': u'bash {script} {ds} {args}', 'state': 'executable'} elif script_file.endswith('.py'): ex = quote_cmdlinearg(sys.executable) return {'type': u'python_script', 'template': u'%s {script} {ds} {args}' % ex, 'state': 'executable'} else: return {'type': None, 'template': None, 'state': None}
def put(self, source, destination, recursive=False, preserve_attrs=False): import shutil copy_fn = shutil.copy2 if preserve_attrs else shutil.copy if recursive: args = source, destination kwargs = {"copy_function": copy_fn} try: shutil.copytree(*args, **kwargs) except FileExistsError: # SSHConnection.put() is okay with copying a tree if the # destination directory already exists. With Python 3.8, we can # make copytree() do the same with dirs_exist_ok=True. But for # now, just rely on `cp`. cmd = ["cp", "--recursive"] if preserve_attrs: cmd.append("--preserve") self(cmd + [quote_cmdlinearg(a) for a in args]) else: copy_fn(source, destination)
def decompress_file(archive, dir_): """Decompress `archive` into a directory `dir_` This is an alternative implementation without patool, but directly calling 7z. Parameters ---------- archive: str dir_: str """ apath = Path(archive) runner = Runner(cwd=dir_) suffixes = _normalize_fname_suffixes(apath.suffixes) if len(suffixes) > 1 and suffixes[-2] == '.tar': # we have a compressed tar file that needs to be fed through the # decompressor first cmd = '7z x {} -so | 7z x -si -ttar'.format(quote_cmdlinearg(archive)) else: # fire and forget cmd = ['7z', 'x', archive] runner.run(cmd, protocol=KillOutput)
def decompress_file(archive, dir_): """Decompress `archive` into a directory `dir_` This is an alternative implementation without patool, but directly calling 7z. Parameters ---------- archive: str dir_: str """ apath = Path(archive) runner = Runner(cwd=dir_) if len(apath.suffixes) > 1 and apath.suffixes[-2] == '.tar': # we have a compressed tar file that needs to be fed through the # decompressor first # hangs somehow, do via single string arg #cmd = ['7z', 'x', archive, '-so', '|', '7z', 'x', '-si', '-ttar'] cmd = '7z x {} -so | 7z x -si -ttar'.format(quote_cmdlinearg(archive)) else: # fire and forget cmd = ['7z', 'x', archive] runner.run(cmd)
def test_runner(tempfile): # test non-dry command call runner = Runner() content = 'Testing äöü東 real run' cmd = 'echo %s > %s' % (content, quote_cmdlinearg(tempfile)) ret = runner.run(cmd) assert_equal(ret, ('', '')) # no out or err ok_file_has_content(tempfile, content, strip=True) os.unlink(tempfile) # Run with shell ret = runner.run(cmd, shell=True) assert_equal(ret, ('', '')) # no out or err ok_file_has_content(tempfile, content, strip=True) os.unlink(tempfile) # Pass as a list and with shell - "not exactly what we expect" # Initial suspicion came from incorrect behavior of Runner as a runner # for patool. Apparently (docs for 2.7): # If args is a sequence, the first item specifies the command string, # and any additional items will be treated as additional arguments to # the shell itself. # which is what it ruins it for us! So, for now we are not testing/using # this form # ret = runner.run(split_cmdline(cmd), shell=True) # # ?! for some reason there is an empty line in stdout # # TODO: figure out. It shouldn't though be of critical effect # ret = (ret[0].rstrip(), ret[1]) # assert_equal(ret, ('', '')) # no out or err # # And here we get kaboom ATM! # ok_file_has_content(tempfile, content, strip=True) # test non-dry python function call output = runner.call(os.path.join, 'foo', 'bar') assert_equal(os.path.join('foo', 'bar'), output, "Call of: os.path.join, 'foo', 'bar' returned %s" % output)
def _guess_exec(script_file): try: is_exec = os.stat(script_file).st_mode & stat.S_IEXEC except OSError as e: from errno import ENOENT if e.errno == ENOENT and op.islink(script_file): # broken symlink # does not exist; there's nothing to detect at all return {'type': None, 'template': None, 'state': 'absent'} else: raise e # on some FS the executable bit might not be all that reliable # but a procedure might nevertheless be supported. # go by extension with "known" interpreters first, and only then # try to execute something that looks executable if script_file.endswith('.sh'): return { 'type': u'bash_script', 'template': u'bash {script} {ds} {args}', 'state': 'executable' } elif script_file.endswith('.py'): ex = quote_cmdlinearg(sys.executable) return { 'type': u'python_script', 'template': u'%s {script} {ds} {args}' % ex, 'state': 'executable' } elif is_exec and not os.path.isdir(script_file): return { 'type': u'executable', 'template': u'{script} {ds} {args}', 'state': 'executable' } else: return {'type': None, 'template': None, 'state': None}
def __call__(keyfile=None, merge=False, force_update=False, bids=False, non_bids_dir='non-bids', dataset=None): ds = require_dataset(dataset, check_installed=True, purpose='update') repo = ds.repo if not keyfile: # will error out, if no config was given keyfile = repo.config.obtain('datalad.ukbiobank.keyfile') # prep for yield res = dict( action='ukb_update', path=ds.path, type='dataset', logger=lgr, refds=ds.path, ) if repo.dirty: yield dict( res, status='error', message="Refuse to operate on dirty dataset", ) return # check if we have 'ukbfetch' before we start fiddling with the dataset # and leave it in a mess for no reason try: subprocess.run( # pull version info ['ukbfetch', '-i'], capture_output=True, ) except Exception as e: raise RuntimeError( "Cannot execute 'ukbfetch'. Original error: {}".format(e)) # just to be nice, and to be able to check it out again, # when we are done initial_branch = repo.get_active_branch() initial_incoming = repo.get_hexsha('incoming') # make sure we are in incoming repo.call_git(['checkout', 'incoming']) # first wipe out all prev. downloaded zip files so we can detect # when some files are no longer available for fp in repo.pathobj.glob('[0-9]*_[0-9]*_[0-9]_[0-9].*'): fp.unlink() # a place to put the download logs # better be semi-persistent to ease inspection tmpdir = repo.pathobj / repo.get_git_dir(repo) / 'tmp' / 'ukb' tmpdir.mkdir(parents=True, exist_ok=True) # redownload, run with explicit mode, because we just deleted the # ZIP files and that is OK ds.run( cmd='ukbfetch -v -a{} -b.ukbbatch -o{}'.format( quote_cmdlinearg(keyfile), quote_cmdlinearg(str(tmpdir)), ), explicit=True, outputs=['.'], message="Update from UKbiobank", ) # TODO what if something broke before? needs force switch if not force_update and repo.get_hexsha() == initial_incoming: yield dict( res, status='notneeded', message='No new content available', ) repo.call_git(['checkout', initial_branch]) # TODO drop? return # onto extraction and transformation of downloaded content repo.call_git(['checkout', 'incoming-processed']) # mark the incoming change as merged # (but we do not actually want any branch content) repo.call_git(['merge', 'incoming', '--strategy=ours', 'incoming']) for fp in repo.get_content_info(ref='incoming-processed', eval_file_type=False): fp.unlink() subid = None if bids: from datalad_ukbiobank.ukb2bids import restructure_ukb2bids # get participant ID from batch file subid = list( repo.call_git_items_(["cat-file", "-p", "incoming:.ukbbatch" ]))[0].split(maxsplit=1)[0] # discover all zip files present in the last commit in 'incoming' for fp, props in repo.get_content_annexinfo( ref='incoming', eval_availability=False).items(): if fp.name.startswith('.'): # skip internals continue # we have to extract into per-instance directories, otherwise files # would conflict ids = fp.stem.split('_') if not len(ids) >= 3: raise RuntimeError( 'Unrecognized filename structure: {}'.format(fp)) extract_dir = repo.pathobj / 'instance-{}'.format(ids[2]) extract_dir.mkdir(exist_ok=True) if fp.suffix == '.zip': with chpwd(extract_dir): # extract and add their content AddArchiveContent.__call__( props['key'], key=True, annex=repo, # --use-current-dir due to # https://github.com/datalad/datalad/issues/3995 use_current_dir=True, allow_dirty=True, commit=False, ) else: # move into instance dir, and strip participant ID, and instance ID # but keep array index # e.g. -> 25747_3_0.adv -> instance-3/25747_0 repo.call_git([ 'annex', 'fromkey', props['key'], str(extract_dir / ('_'.join(ids[1::2]) + ''.join(fp.suffixes))) ]) if bids: yield from restructure_ukb2bids( ds, subid=subid, unrecognized_dir=Path('ses-{}'.format(ids[2])) / non_bids_dir, base_path=extract_dir, session=ids[2], ) # save whatever the state is now, `save` will discover deletions # automatically and also commit them -- wonderful! ds.save(message="Track ZIP file content") yield dict( res, status='ok', ) if not merge: return # and update active branch repo.call_git(['checkout', initial_branch]) if initial_branch in ('incoming', 'incoming-processed'): yield dict( res, action='ukb_merge_update', status='impossible', message='Refuse to merge into incoming* branch', ) return repo.call_git([ 'merge', '-m', "Merge update from UKbiobank", 'incoming-processed' ]) yield dict( res, action='ukb_merge_update', status='ok', ) return
def __str__(self): s = self._str if s is None: s = self._str = \ '%s(%s)' % (self.__class__.__name__, ut.quote_cmdlinearg(self.path)) return s
def __call__(subjects='list', dataset=None, ifexists=None, force=False): from pyxnat import Interface as XNATInterface ds = require_dataset(dataset, check_installed=True, purpose='update') subjects = ensure_list(subjects) # require a clean dataset if ds.repo.dirty: yield get_status_dict( 'update', ds=ds, status='impossible', message=( 'Clean dataset required; use `datalad status` to inspect ' 'unsaved changes')) return # prep for yield res = dict( action='xnat_update', path=ds.path, type='dataset', logger=lgr, refds=ds.path, ) # obtain configured XNAT url and project name xnat_cfg_name = ds.config.get('datalad.xnat.default-name', 'default') cfg_section = 'datalad.xnat.{}'.format(xnat_cfg_name) xnat_url = ds.config.get('{}.url'.format(cfg_section)) xnat_project = ds.config.get('{}.project'.format(cfg_section)) file_path = ds.config.get('{}.path'.format(cfg_section)) # obtain user credentials parsed_url = urlparse(xnat_url) no_proto_url = '{}{}'.format(parsed_url.netloc, parsed_url.path).replace(' ', '') cred = UserPassword(name=no_proto_url, url=None)() xn = XNATInterface(server=xnat_url, **cred) # provide subject list if 'list' in subjects: from datalad.ui import ui subs = xn.select.project(xnat_project).subjects().get() ui.message('The following subjects are available for XNAT ' 'project {}:'.format(xnat_project)) for s in sorted(subs): ui.message(" {}".format(quote_cmdlinearg(s))) ui.message( 'Specify a specific subject(s) or "all" to download associated ' 'files for.') return # query the specified subject(s) to make sure it exists and is accessible if 'all' not in subjects: from datalad.ui import ui subs = [] for s in subjects: sub = xn.select.project(xnat_project).subject(s) nexp = len(sub.experiments().get()) if nexp > 0: subs.append(s) else: ui.message( 'Failed to obtain information on subject {} from XNAT ' 'project {}:'.format(s, xnat_project)) return else: # if all, get list of all subjects subs = xn.select.project(xnat_project).subjects().get() # parse and download one subject at a time from datalad_xnat.parser import parse_xnat addurl_dir = ds.pathobj / 'code' / 'addurl_files' for sub in subs: yield from parse_xnat( ds, sub=sub, force=force, xn=xn, xnat_url=xnat_url, xnat_project=xnat_project, ) # add file urls for subject lgr.info('Downloading files for subject %s', sub) table = f"{addurl_dir}/{sub}_table.csv" # this corresponds to the header field 'filename' in the csv table filename = '{filename}' filenameformat = f"{file_path}{filename}" ds.addurls( table, '{url}', filenameformat, ifexists=ifexists, save=False, cfg_proc='xnat_dataset', result_renderer='default', ) ds.save(message=f"Update files for subject {sub}", recursive=True) lgr.info( 'Files were updated for the following subjects in XNAT project %s:', xnat_project) for s in sorted(subs): lgr.info(" {}".format(quote_cmdlinearg(s))) yield dict(res, status='ok') return
def __call__( spec=None, dataset=None, discover=False, help_proc=False): if not spec and not discover: raise InsufficientArgumentsError('requires at least a procedure name') if help_proc and not spec: raise InsufficientArgumentsError('requires a procedure name') try: ds = require_dataset( dataset, check_installed=False, purpose='run a procedure') except NoDatasetFound: ds = None if discover: # specific path of procedures that were already reported reported = set() # specific names of procedure for which an active one has been # found active = set() for m, cmd_name, cmd_tmpl, cmd_help in \ _get_procedure_implementation('*', ds=ds): if m in reported: continue ex = _guess_exec(m) # configured template (call-format string) takes precedence: if cmd_tmpl: ex['template'] = cmd_tmpl if ex['state'] is None: # doesn't seem like a match lgr.debug("%s does not look like a procedure, ignored.", m) continue state = 'overridden' if cmd_name in active else ex['state'] message = ex['type'] if ex['type'] else 'unknown type' message += ' ({})'.format(state) if state != 'executable' else '' res = get_status_dict( action='discover_procedure', path=m, type='file', logger=lgr, refds=ds.path if ds else None, status='ok', state=state, procedure_name=cmd_name, procedure_type=ex['type'], procedure_callfmt=ex['template'], procedure_help=cmd_help, message=message) reported.add(m) if state == 'executable': active.add(cmd_name) yield res return if not isinstance(spec, (tuple, list)): # maybe coming from config spec = split_cmdline(spec) name = spec[0] args = spec[1:] try: # get the first match an run with it procedure_file, cmd_name, cmd_tmpl, cmd_help = \ next(_get_procedure_implementation(name, ds=ds)) except StopIteration: res = get_status_dict( action='run_procedure', # TODO: Default renderer requires a key "path" to exist. # Doesn't make a lot of sense in this case path=name, logger=lgr, refds=ds.path if ds else None, status='impossible', message="Cannot find procedure with name '%s'" % name) yield res return ex = _guess_exec(procedure_file) # configured template (call-format string) takes precedence: if cmd_tmpl: ex['template'] = cmd_tmpl if help_proc: if cmd_help: res = get_status_dict( action='procedure_help', path=procedure_file, type='file', logger=lgr, refds=ds.path if ds else None, status='ok', state=ex['state'], procedure_name=cmd_name, procedure_type=ex['type'], procedure_callfmt=ex['template'], message=cmd_help) else: res = get_status_dict( action='procedure_help', path=procedure_file, type='file', logger=lgr, refds=ds.path if ds else None, status='impossible', state=ex['state'], procedure_name=cmd_name, procedure_type=ex['type'], procedure_callfmt=ex['template'], message="No help available for '%s'" % name) yield res return if not ex['template']: raise ValueError("No idea how to execute procedure %s. " "Missing 'execute' permissions?" % procedure_file) cmd = ex['template'].format( script=quote_cmdlinearg(procedure_file), ds=quote_cmdlinearg(ds.path) if ds else '', args=(u' '.join(quote_cmdlinearg(a) for a in args) if args else '')) lgr.info(u"Running procedure %s", name) lgr.debug(u'Full procedure command: %r', cmd) for r in Run.__call__( cmd=cmd, dataset=ds, explicit=True, inputs=None, outputs=None, # pass through here on_failure='ignore', return_type='generator' ): yield r
def __call__(url, path="{subject}/{session}/{scan}/", project=None, force=False, dataset=None): from pyxnat import Interface as XNATInterface ds = require_dataset(dataset, check_installed=True, purpose='initialization') config = ds.config path = with_pathsep(path) # prep for yield res = dict( action='xnat_init', path=ds.path, type='dataset', logger=lgr, refds=ds.path, ) # obtain user credentials, use simplified/stripped URL as identifier # given we don't have more knowledge than the user, do not # give a `url` to provide hints on how to obtain credentials parsed_url = urlparse(url) no_proto_url = '{}{}'.format(parsed_url.netloc, parsed_url.path).replace(' ', '') cred = UserPassword(name=no_proto_url, url=None)() xn = XNATInterface(server=url, **cred) # now we make a simple request to obtain the server version # we don't care much, but if the URL or the credentials are wrong # we will not get to see one try: xnat_version = xn.version() lgr.debug("XNAT server version is %s", xnat_version) except Exception as e: yield dict( res, status='error', message=('Failed to access the XNAT server. Full error:\n%s', e), ) return if project is None: from datalad.ui import ui projects = xn.select.projects().get() ui.message('No project name specified. The following projects are ' 'available on {} for user {}:'.format( url, cred['user'])) for p in sorted(projects): # list and prep for C&P # TODO multi-column formatting? ui.message(" {}".format(quote_cmdlinearg(p))) return # query the specified project to make sure it exists and is accessible proj = xn.select.project(project) try: nsubj = len(proj.subjects().get()) except Exception as e: yield dict( res, status='error', message=( 'Failed to obtain information on project %s from XNAT. ' 'Full error:\n%s', project, e), ) return lgr.info('XNAT reports %i subjects currently on-record for project %s', nsubj, project) # check if dataset already initialized auth_dir = ds.pathobj / '.datalad' / 'providers' if auth_dir.exists() and not force: yield dict( res, status='error', message='Dataset found already initialized, ' 'use `force` to reinitialize', ) return # put essential configuration into the dataset config.set('datalad.xnat.default.url', url, where='dataset', reload=False) config.set('datalad.xnat.default.project', project, where='dataset') config.set('datalad.xnat.default.path', path, where='dataset') ds.save( path=ds.pathobj / '.datalad' / 'config', to_git=True, message="Configure default XNAT url and project", ) # Configure XNAT access authentication ds.run_procedure(spec='cfg_xnat_dataset') yield dict( res, status='ok', ) return
def _create_sibling_ria( ds, url, name, storage_sibling, storage_name, existing, shared, group, post_update_hook, trust_level, res_kwargs): # be safe across datasets res_kwargs = res_kwargs.copy() # update dataset res_kwargs['ds'] = ds if not isinstance(ds.repo, AnnexRepo): # No point in dealing with a special remote when there's no annex. # Note, that in recursive invocations this might only apply to some of # the datasets. Therefore dealing with it here rather than one level up. lgr.debug("No annex at %s. Ignoring special remote options.", ds.path) storage_sibling = False storage_name = None # parse target URL try: ssh_host, base_path, rewritten_url = verify_ria_url(url, ds.config) except ValueError as e: yield get_status_dict( status='error', message=str(e), **res_kwargs ) return base_path = Path(base_path) git_url = decode_source_spec( # append dataset id to url and use magic from clone-helper: url + '#{}'.format(ds.id), cfg=ds.config )['giturl'] # determine layout locations; go for a v1 layout repo_path, _, _ = get_layout_locations(1, base_path, ds.id) ds_siblings = [r['name'] for r in ds.siblings(result_renderer=None)] # Figure whether we are supposed to skip this very dataset if existing == 'skip' and ( name in ds_siblings or ( storage_name and storage_name in ds_siblings)): yield get_status_dict( status='notneeded', message="Skipped on existing sibling", **res_kwargs ) # if we skip here, nothing else can change that decision further # down return # figure whether we need to skip or error due an existing target repo before # we try to init a special remote. if ssh_host: from datalad import ssh_manager ssh = ssh_manager.get_connection( ssh_host, use_remote_annex_bundle=False) ssh.open() if existing in ['skip', 'error']: config_path = repo_path / 'config' # No .git -- if it's an existing repo in a RIA store it should be a # bare repo. # Theoretically we could have additional checks for whether we have # an empty repo dir or a non-bare repo or whatever else. if ssh_host: try: ssh('[ -e {p} ]'.format(p=quote_cmdlinearg(str(config_path)))) exists = True except CommandError: exists = False else: exists = config_path.exists() if exists: if existing == 'skip': # 1. not rendered by default # 2. message doesn't show up in ultimate result # record as shown by -f json_pp yield get_status_dict( status='notneeded', message="Skipped on existing remote " "directory {}".format(repo_path), **res_kwargs ) return else: # existing == 'error' yield get_status_dict( status='error', message="remote directory {} already " "exists.".format(repo_path), **res_kwargs ) return if storage_sibling == 'only': lgr.info("create storage sibling '{}' ...".format(name)) else: lgr.info("create sibling{} '{}'{} ...".format( 's' if storage_name else '', name, " and '{}'".format(storage_name) if storage_name else '', )) create_ds_in_store(SSHRemoteIO(ssh_host) if ssh_host else LocalIO(), base_path, ds.id, '2', '1') if storage_sibling: # we are using the main `name`, if the only thing we are creating # is the storage sibling srname = name if storage_sibling == 'only' else storage_name lgr.debug('init special remote {}'.format(srname)) special_remote_options = [ 'type=external', 'externaltype=ora', 'encryption=none', 'autoenable=true', 'url={}'.format(url)] try: ds.repo.init_remote( srname, options=special_remote_options) except CommandError as e: if existing == 'reconfigure' \ and 'git-annex: There is already a special remote' \ in e.stderr: # run enableremote instead lgr.debug( "special remote '%s' already exists. " "Run enableremote instead.", srname) # TODO: Use AnnexRepo.enable_remote (which needs to get # `options` first) ds.repo.call_annex([ 'enableremote', srname] + special_remote_options) else: yield get_status_dict( status='error', message="initremote failed.\nstdout: %s\nstderr: %s" % (e.stdout, e.stderr), **res_kwargs ) return if trust_level: ds.repo.call_annex([trust_level, srname]) # get uuid for use in bare repo's config uuid = ds.config.get("remote.{}.annex-uuid".format(srname)) if storage_sibling == 'only': # we can stop here, the rest of the function is about setting up # the git remote part of the sibling yield get_status_dict( status='ok', **res_kwargs, ) return # 2. create a bare repository in-store: lgr.debug("init bare repository") # TODO: we should prob. check whether it's there already. How? # Note: like the special remote itself, we assume local FS if no # SSH host is specified disabled_hook = repo_path / 'hooks' / 'post-update.sample' enabled_hook = repo_path / 'hooks' / 'post-update' if group: chgrp_cmd = "chgrp -R {} {}".format( quote_cmdlinearg(str(group)), quote_cmdlinearg(str(repo_path))) if ssh_host: ssh('cd {rootdir} && git init --bare{shared}'.format( rootdir=quote_cmdlinearg(str(repo_path)), shared=" --shared='{}'".format( quote_cmdlinearg(shared)) if shared else '' )) if storage_sibling: # write special remote's uuid into git-config, so clone can # which one it is supposed to be and enable it even with # fallback URL ssh("cd {rootdir} && git config datalad.ora-remote.uuid {uuid}" "".format(rootdir=quote_cmdlinearg(str(repo_path)), uuid=uuid)) if post_update_hook: ssh('mv {} {}'.format(quote_cmdlinearg(str(disabled_hook)), quote_cmdlinearg(str(enabled_hook)))) if group: # Either repository existed before or a new directory was # created for it, set its group to a desired one if was # provided with the same chgrp ssh(chgrp_cmd) else: gr = GitRepo(repo_path, create=True, bare=True, shared=shared if shared else None) if storage_sibling: # write special remote's uuid into git-config, so clone can # which one it is supposed to be and enable it even with # fallback URL gr.config.add("datalad.ora-remote.uuid", uuid, where='local') if post_update_hook: disabled_hook.rename(enabled_hook) if group: # TODO; do we need a cwd here? subprocess.run(chgrp_cmd, cwd=quote_cmdlinearg(ds.path)) # add a git remote to the bare repository # Note: needs annex-ignore! Otherwise we might push into dirhash # lower annex/object tree instead of mixed, since it's a bare # repo. This in turn would be an issue, if we want to pack the # entire thing into an archive. Special remote will then not be # able to access content in the "wrong" place within the archive lgr.debug("set up git remote") if name in ds_siblings: # otherwise we should have skipped or failed before assert existing == 'reconfigure' ds.config.set( "remote.{}.annex-ignore".format(name), value="true", where="local") ds.siblings( 'configure', name=name, url=git_url if ssh_host else str(repo_path), recursive=False, # Note, that this should be None if storage_sibling was not set publish_depends=storage_name, result_renderer=None, # Note, that otherwise a subsequent publish will report # "notneeded". fetch=True ) yield get_status_dict( status='ok', **res_kwargs, )
def _create_sibling_ria(ds, url, name, ria_remote, ria_remote_name, existing, shared, group, post_update_hook, res_kwargs): # be safe across datasets res_kwargs = res_kwargs.copy() # parse target URL try: ssh_host, base_path = verify_ria_url(url, ds.config) except ValueError as e: yield get_status_dict(status='error', message=str(e), **res_kwargs) return base_path = Path(base_path) git_url = decode_source_spec( # append dataset id to url and use magic from clone-helper: url + '#{}'.format(ds.id), cfg=ds.config)['giturl'] # go for a v1 layout repo_path, _, _ = get_layout_locations(1, base_path, ds.id) ds_siblings = [r['name'] for r in ds.siblings(result_renderer=None)] # Figure whether we are supposed to skip this very dataset if existing == 'skip' and (name in ds_siblings or (ria_remote_name and ria_remote_name in ds_siblings)): yield get_status_dict(status='notneeded', message="Skipped on existing sibling", **res_kwargs) # if we skip here, nothing else can change that decision further # down return # we might learn that some processing (remote repo creation is # not desired) skip = False lgr.info("create sibling{} '{}'{} ...".format( 's' if ria_remote_name else '', name, " and '{}'".format(ria_remote_name) if ria_remote_name else '', )) if ssh_host: from datalad import ssh_manager ssh = ssh_manager.get_connection(ssh_host, use_remote_annex_bundle=False) ssh.open() # determine layout locations if ria_remote: lgr.debug('init special remote {}'.format(ria_remote_name)) ria_remote_options = [ 'type=external', 'externaltype=ria', 'encryption=none', 'autoenable=true', 'url={}'.format(url) ] try: ds.repo.init_remote(ria_remote_name, options=ria_remote_options) except CommandError as e: if existing in ['replace', 'reconfigure'] \ and 'git-annex: There is already a special remote' \ in e.stderr: # run enableremote instead lgr.debug( "special remote '%s' already exists. " "Run enableremote instead.", ria_remote_name) # TODO: Use AnnexRepo.enable_remote (which needs to get # `options` first) cmd = ['git', 'annex', 'enableremote', ria_remote_name ] + ria_remote_options subprocess.run(cmd, cwd=quote_cmdlinearg(ds.repo.path)) else: yield get_status_dict( status='error', message="initremote failed.\nstdout: %s\nstderr: %s" % (e.stdout, e.stderr), **res_kwargs) return # 1. create remote object store: # Note: All it actually takes is to trigger the special # remote's `prepare` method once. # ATM trying to achieve that by invoking a minimal fsck. # TODO: - It's probably faster to actually talk to the special # remote (i.e. pretending to be annex and use # the protocol to send PREPARE) # - Alternatively we can create the remote directory and # ria version file directly, but this means # code duplication that then needs to be kept in sync # with ria-remote implementation. # - this leads to the third option: Have that creation # routine importable and callable from # ria-remote package without the need to actually # instantiate a RIARemote object lgr.debug("initializing object store") ds.repo.fsck(remote=ria_remote_name, fast=True, annex_options=['--exclude=*/*']) else: # with no special remote we currently need to create the # required directories # TODO: This should be cleaner once we have access to the # special remote's RemoteIO classes without # talking via annex if ssh_host: try: stdout, stderr = ssh('test -e {repo}'.format( repo=quote_cmdlinearg(str(repo_path)))) exists = True except CommandError as e: exists = False if exists: if existing == 'skip': # 1. not rendered by default # 2. message doesn't show up in ultimate result # record as shown by -f json_pp yield get_status_dict(status='notneeded', message="Skipped on existing remote " "directory {}".format(repo_path), **res_kwargs) skip = True elif existing in ['error', 'reconfigure']: yield get_status_dict( status='error', message="remote directory {} already " "exists.".format(repo_path), **res_kwargs) return elif existing == 'replace': ssh('chmod u+w -R {}'.format( quote_cmdlinearg(str(repo_path)))) ssh('rm -rf {}'.format(quote_cmdlinearg(str(repo_path)))) if not skip: ssh('mkdir -p {}'.format(quote_cmdlinearg(str(repo_path)))) else: if repo_path.exists(): if existing == 'skip': skip = True elif existing in ['error', 'reconfigure']: yield get_status_dict( status='error', message="remote directory {} already " "exists.".format(repo_path), **res_kwargs) return elif existing == 'replace': rmtree(repo_path) if not skip: repo_path.mkdir(parents=True) # Note, that this could have changed since last tested due to existing # remote dir if skip: return # 2. create a bare repository in-store: lgr.debug("init bare repository") # TODO: we should prob. check whether it's there already. How? # Note: like the special remote itself, we assume local FS if no # SSH host is specified disabled_hook = repo_path / 'hooks' / 'post-update.sample' enabled_hook = repo_path / 'hooks' / 'post-update' if group: chgrp_cmd = "chgrp -R {} {}".format(quote_cmdlinearg(str(group)), quote_cmdlinearg(str(repo_path))) if ssh_host: ssh('cd {rootdir} && git init --bare{shared}'.format( rootdir=quote_cmdlinearg(str(repo_path)), shared=" --shared='{}'".format(quote_cmdlinearg(shared)) if shared else '')) if post_update_hook: ssh('mv {} {}'.format(quote_cmdlinearg(str(disabled_hook)), quote_cmdlinearg(str(enabled_hook)))) if group: # Either repository existed before or a new directory was # created for it, set its group to a desired one if was # provided with the same chgrp ssh(chgrp_cmd) else: GitRepo(repo_path, create=True, bare=True, shared=" --shared='{}'".format(quote_cmdlinearg(shared)) if shared else None) if post_update_hook: disabled_hook.rename(enabled_hook) if group: # TODO; do we need a cwd here? subprocess.run(chgrp_cmd, cwd=quote_cmdlinearg(ds.path)) # add a git remote to the bare repository # Note: needs annex-ignore! Otherwise we might push into default # annex/object tree instead of directory type tree with dirhash # lower. This in turn would be an issue, if we want to pack the # entire thing into an archive. Special remote will then not be # able to access content in the "wrong" place within the archive lgr.debug("set up git remote") # TODO: # - This sibings call results in "[WARNING] Failed to determine # if datastore carries annex." # (see https://github.com/datalad/datalad/issues/4028) # => for now have annex-ignore configured before. Evtl. Allow # configure/add to include that option # - additionally there's # https://github.com/datalad/datalad/issues/3989, # where datalad-siblings might hang forever if name in ds_siblings: # otherwise we should have skipped or failed before assert existing in ['replace', 'reconfigure'] ds.config.set("remote.{}.annex-ignore".format(name), value="true", where="local") ds.siblings( 'configure', name=name, url=git_url if ssh_host else str(repo_path), recursive=False, # Note, that this should be None if ria_remote was not set publish_depends=ria_remote_name, result_renderer=None, # Note, that otherwise a subsequent publish will report # "notneeded". fetch=True) yield get_status_dict( status='ok', **res_kwargs, )
def __call__(keyfile=None, merge=False, force=False, drop=None, dataset=None): ds = require_dataset(dataset, check_installed=True, purpose='update') if drop and drop not in ('extracted', 'archives'): raise ValueError( "Unrecognized value for 'drop' option: {}".format(drop)) repo = ds.repo if not keyfile: # will error out, if no config was given keyfile = repo.config.obtain( 'datalad.ukbiobank.keyfile', dialog_type='question', title='Key file location', text= 'Where is the location of the file with the UKB access key?', ) # prep for yield res = dict( action='ukb_update', path=ds.path, type='dataset', logger=lgr, refds=ds.path, ) if repo.dirty: yield dict( res, status='error', message="Refuse to operate on dirty dataset", ) return # check if we have 'ukbfetch' before we start fiddling with the dataset # and leave it in a mess for no reason try: subprocess.run( # pull version info ['ukbfetch', '-i'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) except Exception as e: raise RuntimeError( "Cannot execute 'ukbfetch'. Original error: {}".format(e)) # just to be nice, and to be able to check it out again, # when we are done initial_branch = repo.get_active_branch() initial_incoming = repo.get_hexsha('incoming') # make sure we are in incoming repo.call_git(['checkout', 'incoming']) # first wipe out all prev. downloaded zip files so we can detect # when some files are no longer available for fp in repo.pathobj.glob('[0-9]*_[0-9]*_[0-9]_[0-9].*'): fp.unlink() # a place to put the download logs # better be semi-persistent to ease inspection tmpdir = repo.pathobj / repo.get_git_dir(repo) / 'tmp' / 'ukb' tmpdir.mkdir(parents=True, exist_ok=True) # redownload, run with explicit mode, because we just deleted the # ZIP files and that is OK ds.run( cmd='ukbfetch -v -a{} -b.ukbbatch -o{}'.format( quote_cmdlinearg(keyfile), # use relative path to tmpdir to avoid leakage # of system-specific information into the run record quote_cmdlinearg(str(tmpdir.relative_to(repo.pathobj))), ), explicit=True, outputs=['.'], message="Update from UKBiobank", ) # TODO what if something broke before? needs force switch if not force and repo.get_hexsha() == initial_incoming: yield dict( res, status='notneeded', message='No new content available', ) repo.call_git(['checkout', initial_branch]) # TODO drop? return # onto extraction and transformation of downloaded content repo.call_git(['checkout', 'incoming-native']) # mark the incoming change as merged # (but we do not actually want any branch content) repo.call_git(['merge', 'incoming', '--strategy=ours']) for fp in repo.get_content_info(ref='incoming-native', eval_file_type=False): fp.unlink() # discover all files present in the last commit in 'incoming' for fp, props in repo.get_content_annexinfo( ref='incoming', eval_availability=False).items(): if fp.name.startswith('.'): # skip internals continue # we have to extract into per-instance directories, otherwise files # would conflict ids = fp.stem.split('_') if not len(ids) >= 3: raise RuntimeError( 'Unrecognized filename structure: {}'.format(fp)) # build an ID from the data record and the array index rec_id = '_'.join(ids[1:]) if fp.suffix == '.zip': extract_dir = repo.pathobj / rec_id extract_dir.mkdir(exist_ok=True) with chpwd(extract_dir): # extract and add their content AddArchiveContent.__call__( props['key'], key=True, annex=repo, # --use-current-dir due to # https://github.com/datalad/datalad/issues/3995 use_current_dir=True, allow_dirty=True, commit=False, ) else: # move into instance dir, and strip participant ID, and instance ID # but keep array index # e.g. -> 25747_3_0.adv -> instance-3/25747_0 repo.call_git([ 'annex', 'fromkey', props['key'], str(repo.pathobj / (rec_id + ''.join(fp.suffixes))) ]) # save whatever the state is now, `save` will discover deletions # automatically and also commit them -- wonderful! ds.save(message="Update native layout") yield dict( res, status='ok', ) want_bids = 'incoming-bids' in repo.get_branches() if want_bids: repo.call_git(['checkout', 'incoming-bids']) # mark the incoming change as merged # (but we do not actually want any branch content) repo.call_git(['merge', 'incoming', '--strategy=ours']) # prepare the worktree to match the latest state # of incoming-native but keep histories separate # (ie. no merge), because we cannot handle partial # changes repo.call_git(['read-tree', '-u', '--reset', 'incoming-native']) # unstage change to present a later `datalad save` a single # changeset to be saved (otherwise it might try to keep staged # content staged und only save additional modifications) #repo.call_git(['restore', '--staged', '.']) repo.call_git(['reset', 'HEAD', '.']) # and now do the BIDSification from datalad_ukbiobank.ukb2bids import restructure_ukb2bids # get participant ID from batch file subid = list( repo.call_git_items_(["cat-file", "-p", "incoming:.ukbbatch" ]))[0].split(maxsplit=1)[0] yield from restructure_ukb2bids( ds, subid=subid, unrecognized_dir='non-bids', base_path=repo.pathobj, ) ds.save(message="Update BIDS layout") if drop: if drop == 'archives': # we need to force the drop, because the download is the # only copy we have in general drop_opts = ['--force', '--branch', 'incoming', '-I', '*.zip'] else: # drop == 'extracted': drop_opts = [ '--in', 'datalad-archives', '--branch', 'incoming-native' ] for rec in repo.call_annex_records(['drop'] + drop_opts): if not rec.get('success', False): yield dict( action='drop', status='error', message=rec.get('note', 'could not drop key'), key=rec.get('key', None), type='key', path=ds.path, ) if not merge: return # and update active branch repo.call_git(['checkout', initial_branch]) if initial_branch in ('incoming', 'incoming-native', 'incoming-bids'): yield dict( res, action='ukb_merge_update', status='impossible', message='Refuse to merge into incoming* branch', ) return repo.call_git([ 'merge', '-m', "Merge update from UKbiobank", 'incoming-bids' if want_bids else 'incoming-native' ]) yield dict( res, action='ukb_merge_update', status='ok', ) return
def _test_bare_git_version_2(host, dspath, store): # Similarly to test_bare_git_version_1, this should ensure a bare git repo # at the store location for a dataset doesn't conflict with the ORA remote. # Note: Usability of git remote by annex depends on dataset layout version # (dirhashlower vs. -mixed). # For version 2 (mixed) upload via ORA and consumption via git should # work. But not the other way around, since git-annex uses # dirhashlower with bare repos. ds_path = Path(dspath) store = Path(store) ds = Dataset(ds_path).create() populate_dataset(ds) ds.save() bare_repo_path, _, _ = get_layout_locations(1, store, ds.id) # Use git to make sure the remote end is what git thinks a bare clone of it # should look like subprocess.run([ 'git', 'clone', '--bare', quote_cmdlinearg(str(dspath)), quote_cmdlinearg(str(bare_repo_path)) ]) if host: url = "ria+ssh://{host}{path}".format(host=host, path=store) else: url = "ria+{}".format(store.as_uri()) init_opts = common_init_opts + ['url={}'.format(url)] # set up store: io = SSHRemoteIO(host) if host else LocalIO() create_store(io, store, '1') # set up the dataset location, too. # Note: Dataset layout version 2 (dirhash mixed): create_ds_in_store(io, store, ds.id, '2', '1') # Now, let's have the bare repo as a git remote git_url = "ssh://{host}{path}".format(host=host, path=bare_repo_path) \ if host else bare_repo_path.as_uri() ds.repo.add_remote('bare-git', git_url) ds.repo.enable_remote('bare-git') # and the ORA remote in addition: ds.repo.init_remote('ora-remote', options=init_opts) # upload keys via ORA: ds.repo.copy_to('.', 'ora-remote') # bare-git doesn't know yet: eq_(len(ds.repo.whereis('one.txt')), 2) # fsck to make availability known assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='bare-git', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 3) ds.drop('.') eq_(len(ds.repo.whereis('one.txt')), 2) # actually consumable via git remote: ds.repo.call_annex(['move', 'one.txt', '--from', 'bare-git']) eq_(len(ds.repo.whereis('one.txt')), 2) # now, move back via git - shouldn't be consumable via ORA ds.repo.call_annex(['move', 'one.txt', '--to', 'bare-git']) # fsck to make availability known, but there's nothing from POV of ORA: fsck_res = [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='ora-remote', fast=True) ] assert_result_count(fsck_res, 1, status='error', message='** Based on the location log, one.txt\n' '** was expected to be present, ' 'but its content is missing.') assert_result_count(fsck_res, 1, status='ok') eq_(len(fsck_res), 2) eq_(len(ds.repo.whereis('one.txt')), 1)
def _test_bare_git_version_1(host, dspath, store): # This test should take a dataset and create a bare repository at the remote # end from it. # Given, that it is placed correctly within a tree of dataset, that remote # thing should then be usable as an ora-remote as well as as a git-type # remote. # Note: Usability of git remote by annex depends on dataset layout version # (dirhashlower vs. -mixed). # For version 1 (lower) upload and consumption should be # interchangeable. It doesn't matter which remote is used for what # direction. ds_path = Path(dspath) store = Path(store) ds = Dataset(ds_path).create() populate_dataset(ds) ds.save() bare_repo_path, _, _ = get_layout_locations(1, store, ds.id) # Use git to make sure the remote end is what git thinks a bare clone of it # should look like subprocess.run([ 'git', 'clone', '--bare', quote_cmdlinearg(str(dspath)), quote_cmdlinearg(str(bare_repo_path)) ]) if host: url = "ria+ssh://{host}{path}".format(host=host, path=store) else: url = "ria+{}".format(store.as_uri()) init_opts = common_init_opts + ['url={}'.format(url)] # set up store: io = SSHRemoteIO(host) if host else LocalIO() create_store(io, store, '1') # set up the dataset location, too. # Note: Dataset layout version 1 (dirhash lower): create_ds_in_store(io, store, ds.id, '1', '1') # Now, let's have the bare repo as a git remote and use it with annex git_url = "ssh://{host}{path}".format(host=host, path=bare_repo_path) \ if host else bare_repo_path.as_uri() ds.repo.add_remote('bare-git', git_url) ds.repo.enable_remote('bare-git') # copy files to the remote ds.repo.copy_to('.', 'bare-git') eq_(len(ds.repo.whereis('one.txt')), 2) # now we can drop all content locally, reobtain it, and survive an # fsck ds.drop('.') ds.get('.') assert_status('ok', [annexjson2result(r, ds) for r in ds.repo.fsck()]) # Now, add the ora remote: ds.repo.init_remote('ora-remote', options=init_opts) # fsck to make availability known assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='ora-remote', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 3) # Now move content from git-remote to local and see it not being available # via bare-git anymore. ds.repo.call_annex(['move', '--all', '--from=bare-git']) # ora-remote doesn't know yet: eq_(len(ds.repo.whereis('one.txt')), 2) # But after fsck it does: fsck_res = [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='ora-remote', fast=True) ] assert_result_count(fsck_res, 1, status='error', message='** Based on the location log, one.txt\n' '** was expected to be present, ' 'but its content is missing.') assert_result_count(fsck_res, 1, status='error', message='** Based on the location log, subdir/two\n' '** was expected to be present, ' 'but its content is missing.') eq_(len(ds.repo.whereis('one.txt')), 1) # and the other way around: upload via ora-remote and have it available via # git-remote: ds.repo.copy_to('.', 'ora-remote') # fsck to make availability known assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='bare-git', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 3)
def __call__(spec=None, *, dataset=None, discover=False, help_proc=False): if not spec and not discover: raise InsufficientArgumentsError( 'requires at least a procedure name') if help_proc and not spec: raise InsufficientArgumentsError('requires a procedure name') try: ds = require_dataset(dataset, check_installed=False, purpose='run a procedure') except NoDatasetFound: ds = None if discover: # specific path of procedures that were already reported reported = set() # specific names of procedure for which an active one has been # found active = set() for m, cmd_name, cmd_tmpl, cmd_help in \ _get_procedure_implementation('*', ds=ds): if m in reported: continue ex = _guess_exec(m) # configured template (call-format string) takes precedence: if cmd_tmpl: ex['template'] = cmd_tmpl if ex['state'] is None: # doesn't seem like a match lgr.debug("%s does not look like a procedure, ignored.", m) continue state = 'overridden' if cmd_name in active else ex['state'] message = ex['type'] if ex['type'] else 'unknown type' message += ' ({})'.format( state) if state != 'executable' else '' res = get_status_dict(action='discover_procedure', path=m, type='file', logger=lgr, refds=ds.path if ds else None, status='ok', state=state, procedure_name=cmd_name, procedure_type=ex['type'], procedure_callfmt=ex['template'], procedure_help=cmd_help, message=message) reported.add(m) if state == 'executable': active.add(cmd_name) yield res return if isinstance(spec, dict): # Skip getting procedure implementation if called with a # dictionary (presumably coming from --discover) procedure_file = spec['path'] cmd_name = spec['procedure_name'] cmd_tmpl = spec['procedure_callfmt'] cmd_help = spec['procedure_help'] name = cmd_name args = [] else: if not isinstance(spec, (tuple, list)): # maybe coming from config spec = split_cmdline(spec) name = spec[0] args = spec[1:] try: # get the first match an run with it procedure_file, cmd_name, cmd_tmpl, cmd_help = \ next(_get_procedure_implementation(name, ds=ds)) except StopIteration: raise ValueError("Cannot find procedure with name '%s'" % name) ex = _guess_exec(procedure_file) # configured template (call-format string) takes precedence: if cmd_tmpl: ex['template'] = cmd_tmpl if help_proc: if cmd_help: res = get_status_dict(action='procedure_help', path=procedure_file, type='file', logger=lgr, refds=ds.path if ds else None, status='ok', state=ex['state'], procedure_name=cmd_name, procedure_type=ex['type'], procedure_callfmt=ex['template'], message=cmd_help) else: res = get_status_dict(action='procedure_help', path=procedure_file, type='file', logger=lgr, refds=ds.path if ds else None, status='impossible', state=ex['state'], procedure_name=cmd_name, procedure_type=ex['type'], procedure_callfmt=ex['template'], message="No help available for '%s'" % name) yield res return if not ex['template']: raise ValueError("No idea how to execute procedure %s. " "Missing 'execute' permissions?" % procedure_file) cmd = ex['template'].format( script=guard_for_format(quote_cmdlinearg(procedure_file)), ds=guard_for_format(quote_cmdlinearg(ds.path)) if ds else '', args=join_cmdline(args) if args else '') lgr.info(u"Running procedure %s", name) lgr.debug(u'Full procedure command: %r', cmd) for r in Run.__call__( cmd=cmd, dataset=ds, explicit=True, inputs=None, outputs=None, # pass through here on_failure='ignore', return_type='generator', result_renderer='disabled'): yield r if ds: # the procedure ran and we have to anticipate that it might have # changed the dataset config, so we need to trigger an unforced # reload. # we have to do this despite "being done here", because # run_procedure() runs in the same process and reuses dataset (config # manager) instances, and the next interaction with a dataset should # be able to count on an up-to-date config ds.config.reload()