def test_runner_failure(dir_): runner = Runner() with assert_raises(CommandError) as cme: runner.run( py2cmd('import sys; sys.exit(53)') ) eq_(53, cme.exception.code)
def get_7z(self): from datalad.cmd import CommandError, StdOutErrCapture, WitlessRunner # from datalad.utils import on_windows runner = WitlessRunner() # TODO: To not rely on availability in PATH we might want to use `which` # (`where` on windows) and get the actual path to 7z to re-use in # in_archive() and get(). # Note: `command -v XXX` or `type` might be cross-platform # solution! # However, for availability probing only, it would be sufficient # to just call 7z and see whether it returns zero. # cmd = 'where' if on_windows else 'which' # try: # out = runner.run([cmd, '7z'], protocol=StdOutErrCapture) # return out['stdout'] # except CommandError: # return None try: runner.run('7z', protocol=StdOutErrCapture) return True except (FileNotFoundError, CommandError): return False
def compress_files(files, archive, path=None, overwrite=True): """Compress `files` into an `archive` file Parameters ---------- files : list of str archive : str path : str Alternative directory under which compressor will be invoked, to e.g. take into account relative paths of files and/or archive overwrite : bool Whether to allow overwriting the target archive file if one already exists """ runner = Runner(cwd=path) apath = Path(archive) if apath.exists(): if overwrite: apath.unlink() else: raise ValueError( 'Target archive {} already exists and overwrite is forbidden'. format(apath)) suffixes = _normalize_fname_suffixes(apath.suffixes) if len(suffixes) > 1 and suffixes[-2] == '.tar': cmd = '7z u .tar -so -- {} | 7z u -si -- {}'.format( join_cmdline(files), quote_cmdlinearg(str(apath)), ) else: cmd = ['7z', 'u', str(apath), '--'] + files runner.run(cmd, protocol=KillOutput)
def _execute_command(command, pwd, expected_exit=None): from datalad.cmd import WitlessRunner exc = None cmd_exitcode = None runner = WitlessRunner(cwd=pwd) try: lgr.info("== Command start (output follows) =====") runner.run( # command is always a string command) except CommandError as e: # strip our own info from the exception. The original command output # went to stdout/err -- we just have to exitcode in the same way exc = e cmd_exitcode = e.code if expected_exit is not None and expected_exit != cmd_exitcode: # we failed in a different way during a rerun. This can easily # happen if we try to alter a locked file # # TODO add the ability to `git reset --hard` the dataset tree on failure # we know that we started clean, so we could easily go back, needs gh-1424 # to be able to do it recursively raise exc lgr.info("== Command exit (modification check follows) =====") return cmd_exitcode or 0, exc
def test_addurls_stdin_input_command_line(self=None, path=None): # The previous test checks all the cases, but it overrides sys.stdin. # Do a simple check that's closer to a command line call. Dataset(path).create(force=True) runner = WitlessRunner(cwd=path) with open(self.json_file) as jfh: runner.run(["datalad", "addurls", '-', '{url}', '{name}'], stdin=jfh) for fname in ["a", "b", "c"]: ok_exists(op.join(path, fname))
class witlessrunner(SuprocBenchmarks): """Some rudimentary tests to see if there is no major slowdowns of WitlessRunner """ def setup(self): self.runner = WitlessRunner() self.git_runner = GitWitlessRunner() def time_echo(self): self.runner.run(["echo"]) def time_echo_gitrunner(self): self.git_runner.run(["echo"]) def time_echo_gitrunner_fullcapture(self): self.git_runner.run(["echo"], protocol=StdOutErrCapture)
def _execute_command(command, pwd): from datalad.cmd import WitlessRunner exc = None cmd_exitcode = None runner = WitlessRunner(cwd=pwd) try: lgr.info("== Command start (output follows) =====") runner.run( # command is always a string command) except CommandError as e: exc = e cmd_exitcode = e.code lgr.info("== Command exit (modification check follows) =====") return cmd_exitcode or 0, exc
def test_annex_get_from_subdir(topdir=None): ds = Dataset(topdir) ds.create(force=True) ds.save('a.tar.gz') ds.add_archive_content('a.tar.gz', delete=True) fpath = op.join(topdir, 'a', 'd', fn_in_archive_obscure) with chpwd(op.join(topdir, 'a', 'd')): runner = WitlessRunner() runner.run(['git', 'annex', 'drop', '--', fn_in_archive_obscure], protocol=KillOutput) # run git annex drop assert_false(ds.repo.file_has_content( fpath)) # and verify if file deleted from directory runner.run(['git', 'annex', 'get', '--', fn_in_archive_obscure], protocol=KillOutput) # run git annex get assert_true(ds.repo.file_has_content( fpath)) # and verify if file got into directory
def test_cfg_passthrough(path=None): runner = Runner() _ = runner.run([ 'datalad', '-c', 'annex.tune.objecthash1=true', '-c', 'annex.tune.objecthashlower=true', 'create', path ]) ds = Dataset(path) eq_(ds.config.get('annex.tune.objecthash1', None), 'true') eq_(ds.config.get('annex.tune.objecthashlower', None), 'true')
def test_annex_get_from_subdir(topdir): from datalad.api import add_archive_content annex = AnnexRepo(topdir, backend='MD5E', init=True) annex.add('a.tar.gz') annex.commit() add_archive_content('a.tar.gz', annex=annex, delete=True) fpath = op.join(topdir, 'a', 'd', fn_in_archive_obscure) with chpwd(op.join(topdir, 'a', 'd')): runner = WitlessRunner() runner.run(['git', 'annex', 'drop', '--', fn_in_archive_obscure], protocol=KillOutput) # run git annex drop assert_false(annex.file_has_content( fpath)) # and verify if file deleted from directory runner.run(['git', 'annex', 'get', '--', fn_in_archive_obscure], protocol=KillOutput) # run git annex get assert_true(annex.file_has_content( fpath)) # and verify if file got into directory
def test_runner_stdout_capture(): runner = Runner() test_msg = "stdout-Message" res = runner.run( py2cmd('import sys; print(%r, file=sys.stdout)' % test_msg), protocol=StdOutErrCapture, ) eq_(res['stdout'].rstrip(), test_msg) ok_(not res['stderr'])
def test_runner(tempfile): runner = Runner() content = 'Testing real run' if on_windows else 'Testing äöü東 real run' cmd = 'echo %s > %s' % (content, tempfile) res = runner.run(cmd) # no capture of any kind, by default ok_(not res['stdout']) ok_(not res['stderr']) ok_file_has_content(tempfile, content, strip=True) os.unlink(tempfile)
def test_runner_fix_PWD(path): env = os.environ.copy() env['PWD'] = orig_cwd = os.getcwd() runner = Runner(cwd=path, env=env) res = runner.run( py2cmd('import os; print(os.environ["PWD"])'), protocol=StdOutCapture, ) eq_(res['stdout'].strip(), path) # was fixed up to point to point to cwd's path eq_(env['PWD'], orig_cwd) # no side-effect
def test_quoting(path): ds = Dataset(op.join(path, OBSCURE_FILENAME)).create(force=True) # Our custom procedure fails if it receives anything other than two # procedure arguments (so the script itself receives 3). Check a few cases # from the Python API and CLI. ds.config.add("datalad.locations.dataset-procedures", "code", where="dataset") with swallow_outputs(): ds.run_procedure(spec=["just2args", "with ' sing", 'with " doub']) with assert_raises(CommandError): ds.run_procedure(spec=["just2args", "still-one arg"]) runner = WitlessRunner(cwd=ds.path) runner.run( "datalad run-procedure just2args \"with ' sing\" 'with \" doub'", protocol=KillOutput) with assert_raises(CommandError): runner.run( "datalad run-procedure just2args 'still-one arg'", protocol=KillOutput)
def test_runner_stdin(path): runner = Runner() fakestdin = Path(path) / 'io' # go for difficult content fakestdin.write_text(OBSCURE_FILENAME) res = runner.run( py2cmd('import fileinput; print(fileinput.input().readline())'), stdin=fakestdin.open(), protocol=StdOutCapture, ) assert_in(OBSCURE_FILENAME, res['stdout']) # we can do the same without a tempfile, too res = runner.run( py2cmd('import fileinput; print(fileinput.input().readline())'), stdin=OBSCURE_FILENAME.encode('utf-8'), protocol=StdOutCapture, ) assert_in(OBSCURE_FILENAME, res['stdout'])
def test_script_shims(): runner = Runner() # The EASY-INSTALL checks below aren't valid for editable installs. Use the # existence of setup.py as an indication that install is _probably_ # editable. The file should always exist for editable installs, but it can # also exist for non-editable installs when the tests are being executed # from the top of the source tree. setup_exists = (Path(datalad.__file__).parent.parent / "setup.py").exists() for script in [ 'datalad', 'git-annex-remote-datalad-archives', 'git-annex-remote-datalad' ]: if not on_windows: # those must be available for execution, and should not contain which = runner.run(['which', script], protocol=StdOutErrCapture)['stdout'] # test if there is no easy install shim in there with open(which.rstrip()) as f: content = f.read() else: from distutils.spawn import find_executable content = find_executable(script) if not setup_exists: assert_not_in('EASY', content) # NOTHING easy should be there assert_not_in('pkg_resources', content) # and let's check that it is our script out = runner.run([script, '--version'], protocol=StdOutErrCapture) version = out['stdout'].rstrip() mod, version = version.split(' ', 1) assert_equal(mod, 'datalad') # we can get git and non git .dev version... so for now # relax get_numeric_portion = lambda v: [ x for x in v.split('.') if x.isdigit() ] # extract numeric portion assert get_numeric_portion(version) # that my lambda is correctish assert_equal(get_numeric_portion(__version__), get_numeric_portion(version))
def decompress_file(archive, dir_): """Decompress `archive` into a directory `dir_` This is an alternative implementation without patool, but directly calling 7z. Parameters ---------- archive: str dir_: str """ apath = Path(archive) runner = Runner(cwd=dir_) suffixes = _normalize_fname_suffixes(apath.suffixes) if len(suffixes) > 1 and suffixes[-2] == '.tar': # we have a compressed tar file that needs to be fed through the # decompressor first cmd = '7z x {} -so | 7z x -si -ttar'.format(quote_cmdlinearg(archive)) else: # fire and forget cmd = ['7z', 'x', archive] runner.run(cmd, protocol=KillOutput)
def check_run_and_get_output(cmd): runner = WitlessRunner() try: # suppress log output happen it was set to high values with patch.dict('os.environ', {'DATALAD_LOG_LEVEL': 'WARN'}): output = runner.run(["datalad", "--help"], protocol=StdOutErrCapture) except CommandError as e: raise AssertionError("'datalad --help' failed to start normally. " "Exited with %d and output %s" % (e.code, (e.stdout, e.stderr))) return output['stdout'], output['stderr']
def test_runner_stdin(path): runner = Runner() fakestdin = Path(path) / 'io' # go for diffcult content fakestdin.write_text(OBSCURE_FILENAME) res = runner.run( py2cmd('import fileinput; print(fileinput.input().readline())'), stdin=fakestdin.open(), protocol=StdOutCapture, ) assert_in(OBSCURE_FILENAME, res['stdout'])
def test_no_rdflib_loaded(): # rely on rdflib polluting stdout to see that it is not loaded whenever we load this remote # since that adds 300ms delay for no immediate use runner = WitlessRunner() out = runner.run([ sys.executable, '-c', 'import datalad.customremotes.archives, sys; ' 'print([k for k in sys.modules if k.startswith("rdflib")])' ], protocol=StdOutErrCapture) # print cmo.out assert_not_in("rdflib", out['stdout']) assert_not_in("rdflib", out['stderr'])
def test_asyncio_forked(temp): # temp will be used to communicate from child either it succeeded or not temp = Path(temp) runner = Runner() import os try: pid = os.fork() except BaseException as exc: # .fork availability is "Unix", and there are cases where it is "not supported" # so we will just skip if no forking is possible raise SkipTest(f"Cannot fork: {exc}") # if does not fail (in original or in a fork) -- we are good if sys.version_info < (3, 8) and pid != 0: # for some reason it is crucial to sleep a little (but 0.001 is not enough) # in the master process with older pythons or it takes forever to make the child run sleep(0.1) try: runner.run([sys.executable, '--version'], protocol=StdOutCapture) if pid == 0: temp.write_text("I rule") except: if pid == 0: temp.write_text("I suck") if pid != 0: # parent: look after the child t0 = time() try: while not temp.exists() or temp.stat().st_size < 6: if time() - t0 > 5: raise AssertionError( "Child process did not create a file we expected!") finally: # kill the child os.kill(pid, signal.SIGTERM) # see if it was a good one eq_(temp.read_text(), "I rule") else: # sleep enough so parent just kills me the kid before I continue doing bad deeds sleep(10)
def test_external_modification(path=None): from datalad.cmd import WitlessRunner as Runner runner = Runner(cwd=path) repo = GitRepo(path, create=True) config = repo.config key = 'sec.sub.key' assert_not_in(key, config) config.set(key, '1', scope='local') assert_equal(config[key], '1') # we pick up the case where we modified so size changed runner.run(['git', 'config', '--local', '--replace-all', key, '10']) # unfortunately we do not react for .get unless reload. But here # we will test if reload is correctly decides to reload without force config.reload() assert_equal(config[key], '10') # and no size change runner.run(['git', 'config', '--local', '--replace-all', key, '11']) config.reload() assert_equal(config[key], '11')
def in_archive(self, archive_path, file_path): if not archive_path.exists(): # no archive, not file return False loc = str(file_path) from datalad.cmd import ( StdOutErrCapture, WitlessRunner, ) runner = WitlessRunner() # query 7z for the specific object location, keeps the output # lean, even for big archives out = runner.run( ['7z', 'l', str(archive_path), loc], protocol=StdOutErrCapture, ) return loc in out['stdout']
def test_runner_parametrized_protocol(): runner = Runner() # protocol returns a given value whatever it receives class ProtocolInt(StdOutCapture): def __init__(self, value): self.value = value super().__init__() def pipe_data_received(self, fd, data): super().pipe_data_received(fd, self.value) res = runner.run( py2cmd('print(1)'), protocol=ProtocolInt, # value passed to protocol constructor value=b'5', ) eq_(res['stdout'], '5')
def test_script_shims(script): runner = Runner() if not on_windows: from shutil import which which(script) # and let's check that it is our script out = runner.run([script, '--version'], protocol=StdOutErrCapture) version = out['stdout'].rstrip() mod, version = version.split(' ', 1) assert_equal(mod, 'datalad') # we can get git and non git .dev version... so for now # relax get_numeric_portion = lambda v: [ x for x in re.split('[+.]', v) if x.isdigit() ] # extract numeric portion assert get_numeric_portion( version), f"Got no numeric portion from {version}" assert_equal(get_numeric_portion(__version__), get_numeric_portion(version))
def test_create(probe, path): # only as a probe whether this FS is a crippled one ar = AnnexRepo(probe, create=True) ds = Dataset(path) ds.create( description="funny", # custom git init option initopts=dict(shared='world') if not ar.is_managed_branch() else None) ok_(ds.is_installed()) assert_repo_status(ds.path, annex=True) # check default backend eq_(ds.config.get("annex.backends"), 'MD5E') if not ar.is_managed_branch(): eq_(ds.config.get("core.sharedrepository"), '2') runner = Runner() # check description in `info` cmd = ['git', 'annex', 'info'] cmlout = runner.run(cmd, cwd=path, protocol=StdOutErrCapture)['stdout'] assert_in('funny [here]', cmlout) # check datset ID eq_(ds.config.get_value('datalad.dataset', 'id'), ds.id)
def test_runner_empty_stdin(): # Ensure a runner without stdin data and output capture progresses runner = Runner() runner.run(["cat"], stdin=b"", protocol=None)
def test_runner_no_stdin_no_capture(): # Ensure a runner without stdin data and output capture progresses runner = Runner() runner.run(["echo", "a", "b", "c"], stdin=None, protocol=None)
def postclonecfg_ria(ds, props): """Configure a dataset freshly cloned from a RIA store""" repo = ds.repo # RIA uses hashdir mixed, copying data to it via git-annex (if cloned via # ssh) would make it see a bare repo and establish a hashdir lower annex # object tree. # Moreover, we want the ORA remote to receive all data for the store, so its # objects could be moved into archives (the main point of a RIA store). RIA_REMOTE_NAME = 'origin' # don't hardcode everywhere ds.config.set('remote.{}.annex-ignore'.format(RIA_REMOTE_NAME), 'true', where='local') # chances are that if this dataset came from a RIA store, its subdatasets # may live there too. Place a subdataset source candidate config that makes # get probe this RIA store when obtaining subdatasets ds.config.set( # we use the label 'origin' for this candidate in order to not have to # generate a complicated name from the actual source specification. # we pick a cost of 200 to sort it before datalad's default candidates # for non-RIA URLs, because they prioritize hierarchical layouts that # cannot be found in a RIA store 'datalad.get.subdataset-source-candidate-200origin', # use the entire original URL, up to the fragment + plus dataset ID # placeholder, this should make things work with any store setup we # support (paths, ports, ...) props['source'].split('#', maxsplit=1)[0] + '#{id}', where='local') # setup publication dependency, if a corresponding special remote exists # and was enabled (there could be RIA stores that actually only have repos) # make this function be a generator ora_remotes = [ s for s in ds.siblings('query', result_renderer='disabled') if s.get('annex-externaltype') == 'ora' ] if not ora_remotes and any( r.get('externaltype') == 'ora' for r in (repo.get_special_remotes( ).values() if hasattr(repo, 'get_special_remotes') else [])): # no ORA remote autoenabled, but configuration known about at least one. # Let's check origin's config for datalad.ora-remote.uuid as stored by # create-sibling-ria and enable try enabling that one. lgr.debug("Found no autoenabled ORA special remote. Trying to look it " "up in source config ...") # First figure whether we cloned via SSH, HTTP or local path and then # get that config file the same way: config_content = None scheme = props['giturl'].split(':', 1)[0] if scheme == 'http': try: response = requests.get("{}{}config".format( props['giturl'], '/' if not props['giturl'].endswith('/') else '')) config_content = response.text except requests.RequestException as e: lgr.debug("Failed to get config file from source:\n%s", exc_str(e)) elif scheme == 'ssh': # TODO: switch the following to proper command abstraction: # SSHRemoteIO ignores the path part ATM. No remote CWD! (To be # changed with command abstractions). So we need to get that part to # have a valid path to origin's config file: cfg_path = PurePosixPath(URL(props['giturl']).path) / 'config' op = SSHRemoteIO(props['giturl']) try: config_content = op.read_file(cfg_path) except RIARemoteError as e: lgr.debug("Failed to get config file from source: %s", exc_str(e)) elif scheme == 'file': # TODO: switch the following to proper command abstraction: op = LocalIO() cfg_path = Path(URL(props['giturl']).localpath) / 'config' try: config_content = op.read_file(cfg_path) except (RIARemoteError, OSError) as e: lgr.debug("Failed to get config file from source: %s", exc_str(e)) else: lgr.debug( "Unknown URL-Scheme in %s. Can handle SSH, HTTP or " "FILE scheme URLs.", props['source']) # 3. And read it org_uuid = None if config_content: # TODO: We might be able to spare the saving to a file. # "git config -f -" is not explicitly documented but happens # to work and would read from stdin. Make sure we know this # works for required git versions and on all platforms. with make_tempfile(content=config_content) as cfg_file: runner = WitlessRunner( env=GitRunner.get_git_environ_adjusted()) try: result = runner.run([ 'git', 'config', '-f', cfg_file, 'datalad.ora-remote.uuid' ], protocol=StdOutCapture) org_uuid = result['stdout'].strip() except CommandError as e: # doesn't contain what we are looking for lgr.debug( "Found no UUID for ORA special remote at " "'%s' (%s)", RIA_REMOTE_NAME, exc_str(e)) # Now, enable it. If annex-init didn't fail to enable it as stored, we # wouldn't end up here, so enable with store URL as suggested by the URL # we cloned from. if org_uuid: srs = repo.get_special_remotes() if org_uuid in srs.keys(): # TODO: - Double-check autoenable value and only do this when # true? # - What if still fails? -> Annex shouldn't change config # in that case # we only need the store: new_url = props['source'].split('#')[0] try: repo.enable_remote(srs[org_uuid]['name'], options=['url={}'.format(new_url)]) lgr.info("Reconfigured %s for %s", srs[org_uuid]['name'], new_url) # update ora_remotes for considering publication dependency # below ora_remotes = [ s for s in ds.siblings('query', result_renderer='disabled') if s.get('annex-externaltype', None) == 'ora' ] except CommandError as e: lgr.debug("Failed to reconfigure ORA special remote: %s", exc_str(e)) else: lgr.debug("Unknown ORA special remote uuid at '%s': %s", RIA_REMOTE_NAME, org_uuid) if ora_remotes: if len(ora_remotes) == 1: yield from ds.siblings('configure', name=RIA_REMOTE_NAME, publish_depends=ora_remotes[0]['name'], result_filter=None, result_renderer='disabled') else: lgr.warning( "Found multiple ORA remotes. Couldn't decide which " "publishing to 'origin' should depend on: %s. Consider " "running 'datalad siblings configure -s origin " "--publish-depends ORAREMOTENAME' to set publication " "dependency manually.", [r['name'] for r in ora_remotes])
def test_clone(src, tempdir): # Verify that all our repos are clonable r = WitlessRunner() output = r.run(["git", "clone", src, tempdir], protocol=StdOutErrCapture) #status, output = getstatusoutput("git clone %(src)s %(tempdir)s" % locals()) ok_(os.path.exists(os.path.join(tempdir, ".git")))