def test_our_metadataset_search(tdir): # smoke test for basic search operations on our super-megadataset # expensive operation but ok ds = install(path=tdir, source=DATASETS_TOPURL, result_xfm='datasets', return_type='item-or-list') res_haxby = list(ds.search('haxby')) assert_greater(len(res_haxby), 10) # default search should be case insensitive # but somehow it is not fully -- we get 12 here #res_Haxby = list(ds.search('Haxby')) #eq_(len(res_haxby), len(res_Haxby)) assert_result_count(ds.search('id:873a6eae-7ae6-11e6-a6c8-002590f97d84', mode='textblob'), 1, type='dataset', path=op.join(ds.path, 'crcns', 'pfc-2')) # there is a problem with argparse not decoding into utf8 in PY2 from datalad.cmdline.tests.test_main import run_main # TODO: make it into an independent lean test from datalad.cmd import Runner out, err = Runner(cwd=ds.path)('datalad search Buzsáki') assert_in('crcns/pfc-2 ', out) # has it in description # and then another aspect: this entry it among multiple authors, need to # check if aggregating them into a searchable entity was done correctly assert_in('crcns/hc-1 ', out)
def test_external_versions_basic(): ev = ExternalVersions() our_module = 'datalad' assert_equal(ev.versions, {}) assert_equal(ev[our_module], __version__) # and it could be compared assert_greater_equal(ev[our_module], __version__) assert_greater(ev[our_module], '0.1') assert_equal(list(ev.keys()), [our_module]) assert_true(our_module in ev) assert_false('unknown' in ev) # all are LooseVersions now assert_true(isinstance(ev[our_module], LooseVersion)) version_str = __version__ assert_equal(ev.dumps(), "Versions: %s=%s" % (our_module, version_str)) # For non-existing one we get None assert_equal(ev['custom__nonexisting'], None) # and nothing gets added to _versions for nonexisting assert_equal(set(ev.versions.keys()), {our_module}) # but if it is a module without version, we get it set to UNKNOWN assert_equal(ev['os'], ev.UNKNOWN) # And get a record on that inside assert_equal(ev.versions.get('os'), ev.UNKNOWN) # And that thing is "True", i.e. present assert (ev['os']) # but not comparable with anything besides itself (was above) assert_raises(TypeError, cmp, ev['os'], '0') assert_raises(TypeError, assert_greater, ev['os'], '0') return
def _test_external(ev, modname): try: exec("import %s" % modname, globals(), locals()) except ImportError: raise SkipTest("External %s not present" % modname) except Exception as e: raise SkipTest("External %s fails to import: %s" % (modname, exc_str(e))) assert (ev[modname] is not ev.UNKNOWN) assert_greater(ev[modname], '0.0.1') assert_greater('1000000.0', ev[modname]) # unlikely in our lifetimes
def test__version__(): # in released stage, version in the last CHANGELOG entry # should correspond to the one in datalad CHANGELOG_filename = op.join( op.dirname(__file__), op.pardir, op.pardir, 'CHANGELOG.md') if not op.exists(CHANGELOG_filename): raise SkipTest("no %s found" % CHANGELOG_filename) regex = re.compile(r'^## ' r'(?P<version>[0-9]+\.[0-9.abcrc~]+)\s+' r'\((?P<date>.*)\)' r'\s+--\s+' r'(?P<codename>.+)' ) with open(CHANGELOG_filename, 'rb') as f: for line in f: line = line.rstrip() if not line.startswith(b'## '): # The first section header we hit, must be our changelog entry continue reg = regex.match(assure_unicode(line)) if not reg: # first one at that level is the one raise AssertionError( "Following line must have matched our regex: %r" % line) regd = reg.groupdict() changelog_version = regd['version'] lv_changelog_version = LooseVersion(changelog_version) # we might have a suffix - sanitize san__version__ = __version__.rstrip('.devdirty') lv__version__ = LooseVersion(san__version__) if '???' in regd['date'] and 'will be better than ever' in regd['codename']: # we only have our template # we can only assert that its version should be higher than # the one we have now assert_greater(lv_changelog_version, lv__version__) else: # should be a "release" record assert_not_in('???', regd['date']) assert_not_in('will be better than ever', regd['codename']) assert_equal(__hardcoded_version__, changelog_version) if __hardcoded_version__ != san__version__: # It was not tagged yet and Changelog should have its # template record for the next release assert_greater(lv_changelog_version, lv__version__) assert_in('.dev', san__version__) else: # all is good, tagged etc assert_equal(lv_changelog_version, lv__version__) assert_equal(changelog_version, san__version__) assert_equal(__hardcoded_version__, san__version__) return raise AssertionError( "No log line matching our regex found in %s" % CHANGELOG_filename )
def test__gen_github_entity_organization(): # to test effectiveness of the fix, we need to provide some # token which would not work with patch_config( {CONFIG_HUB_TOKEN_FIELD: "ed51111111111111111111111111111111111111"}): org_cred = next(_gen_github_entity(None, 'datalad-collection-1')) assert len(org_cred) == 2, "we return organization and credential" org, _ = org_cred assert org repos = list(org.get_repos()) repos_names = [r.name for r in repos] assert_greater(len(repos), 3) # we have a number of those assert_in('datasets.datalad.org', repos_names)
def check_runner_heavy_output(log_online): # TODO: again, no automatic detection of this resulting in being # stucked yet. runner = Runner() cmd = '%s %s' % (sys.executable, op.join(op.dirname(__file__), "heavyoutput.py")) with swallow_outputs() as cm, swallow_logs(): ret = runner.run(cmd, log_online=log_online, log_stderr=False, log_stdout=False, expect_stderr=True) eq_(cm.err, cm.out) # they are identical in that script eq_(cm.out[:10], "0 [0, 1, 2") eq_(cm.out[-15:], "997, 998, 999]\n") # for some reason swallow_logs is not effective, so we just skip altogether # if too heavy debug output if lgr.getEffectiveLevel() <= logging.DEBUG: raise SkipTest( "Skipping due to too heavy impact on logs complicating debugging") #do it again with capturing: with swallow_logs(): ret = runner.run(cmd, log_online=log_online, log_stderr=True, log_stdout=True, expect_stderr=True) if log_online: # halting case of datalad add and other batch commands #2116 logged = [] with swallow_logs(): def process_stdout(l): assert l logged.append(l) ret = runner.run(cmd, log_online=log_online, log_stdout=process_stdout, log_stderr='offline', expect_stderr=True) assert_equal(len(logged), 100) assert_greater(len(ret[1]), 1000) # stderr all here assert not ret[0], "all messages went into `logged`"
def test_install_top(tdir): # installs one level of subdatasets only ds = install( path=tdir, source=DATASETS_TOPURL, recursive=DATASETS_FULL_INSTALL, recursion_limit=1, ) subdss = ds.subdatasets(fulfilled=True, result_xfm='datasets') if DATASETS_FULL_INSTALL: assert_greater(len(subdss), 25) # we have a good number on top assert_equal(ds.subdatasets(fulfilled=False), []) # and none is left behind detached = [s.path for s in subdss if not s.repo.get_active_branch()] assert_equal(detached, [])
def test_crawl(tempd): if not _get_github_cred().is_known: raise SkipTest("no github credential") ds = create(tempd) with chpwd(tempd): crawl_init(template='gh', save=True, args={ 'org': 'datalad-collection-1', 'include': 'kaggle' }) crawl() subdss = ds.subdatasets(fulfilled=True, result_xfm='datasets') assert all('kaggle' in d.path for d in subdss) assert_greater(len(subdss), 1) assert_false(ds.repo.dirty)
def test_external_versions_basic(): ev = ExternalVersions() our_module = 'datalad' assert_equal(ev.versions, {}) assert_equal(ev[our_module], __version__) # and it could be compared assert_greater_equal(ev[our_module], __version__) assert_greater(ev[our_module], '0.1') assert_equal(list(ev.keys()), [our_module]) assert_true(our_module in ev) assert_false('unknown' in ev) # all are LooseVersions now assert_true(isinstance(ev[our_module], LooseVersion)) version_str = __version__ assert_equal(ev.dumps(), "Versions: %s=%s" % (our_module, version_str)) # For non-existing one we get None assert_equal(ev['custom__nonexisting'], None) # and nothing gets added to _versions for nonexisting assert_equal(set(ev.versions.keys()), {our_module}) # but if it is a module without version, we get it set to UNKNOWN assert_equal(ev['os'], ev.UNKNOWN) # And get a record on that inside assert_equal(ev.versions.get('os'), ev.UNKNOWN) # And that thing is "True", i.e. present assert (ev['os']) # but not comparable with anything besides itself (was above) assert_raises(TypeError, cmp, ev['os'], '0') assert_raises(TypeError, assert_greater, ev['os'], '0') return # Code below is from original duecredit, and we don't care about # testing this one # And we can get versions based on modules themselves from datalad.tests import mod assert_equal(ev[mod], mod.__version__) # Check that we can get a copy of the versions versions_dict = ev.versions versions_dict[our_module] = "0.0.1" assert_equal(versions_dict[our_module], "0.0.1") assert_equal(ev[our_module], __version__)
def test_wtf(topdir): path = opj(topdir, OBSCURE_FILENAME) # smoke test for now with swallow_outputs() as cmo: wtf(dataset=path, on_failure="ignore") assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # Those sections get sensored out by default now assert_not_in('user.name: ', cmo.out) with chpwd(path): with swallow_outputs() as cmo: wtf() assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # now with a dataset ds = create(path) with swallow_outputs() as cmo: wtf(dataset=ds.path) assert_in('## configuration', cmo.out) assert_in('## dataset', cmo.out) assert_in(u'path: {}'.format(ds.path), ensure_unicode(cmo.out)) # and if we run with all sensitive for sensitive in ('some', True): with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive=sensitive) # we fake those for tests anyways, but we do show cfg in this mode # and explicitly not showing them assert_in('user.name: %s' % _HIDDEN, cmo.out) with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive='all') assert_not_in(_HIDDEN, cmo.out) # all is shown assert_in('user.name: ', cmo.out) # Sections selection # # If we ask for no sections and there is no dataset with chpwd(path): with swallow_outputs() as cmo: wtf(sections=[]) assert_not_in('## dataset', cmo.out) for s in SECTION_CALLABLES: assert_not_in('## %s' % s.lower(), cmo.out.lower()) # ask for a selected set secs = ['git-annex', 'configuration'] with chpwd(path): with swallow_outputs() as cmo: wtf(sections=secs) for s in SECTION_CALLABLES: (assert_in if s in secs else assert_not_in)( '## %s' % s.lower(), cmo.out.lower() ) # order should match our desired one, not alphabetical # but because of https://github.com/datalad/datalad/issues/3915 # alphanum is now desired assert cmo.out.index('## git-annex') > cmo.out.index('## configuration') # not achievable from cmdline is to pass an empty list of sections. with chpwd(path): with swallow_outputs() as cmo: wtf(sections=[]) eq_(cmo.out.rstrip(), '# WTF') # and we could decorate it nicely for embedding e.g. into github issues with swallow_outputs() as cmo: wtf(sections=['dependencies'], decor='html_details') ok_startswith(cmo.out, '<details><summary>DataLad %s WTF' % __version__) assert_in('## dependencies', cmo.out) # short flavor with swallow_outputs() as cmo: wtf(flavor='short') assert_in("- datalad: version=%s" % __version__, cmo.out) assert_in("- dependencies: ", cmo.out) eq_(len(cmo.out.splitlines()), 4) # #WTF, datalad, dependencies, trailing new line with swallow_outputs() as cmo: wtf(flavor='short', sections='*') assert_greater(len(cmo.out.splitlines()), 10) # many more # should result only in '# WTF' skip_if_no_module('pyperclip') # verify that it works correctly in the env/platform import pyperclip with swallow_outputs() as cmo: try: pyperclip.copy("xxx") pyperclip_works = pyperclip.paste().strip() == "xxx" wtf(dataset=ds.path, clipboard=True) except (AttributeError, pyperclip.PyperclipException) as exc: # AttributeError could come from pyperclip if no DISPLAY raise SkipTest(exc_str(exc)) assert_in("WTF information of length", cmo.out) assert_not_in('user.name', cmo.out) if not pyperclip_works: # Some times does not throw but just fails to work raise SkipTest( "Pyperclip seems to be not functioning here correctly") assert_not_in('user.name', pyperclip.paste()) assert_in(_HIDDEN, pyperclip.paste()) # by default no sensitive info assert_in("cmd:annex:", pyperclip.paste()) # but the content is there
def _test_list_tuple(thing): version = ExternalVersions._deduce_version(thing) assert_greater(version, '0.0.1') assert_greater('0.2', version) assert_equal('0.1', version) assert_equal(version, '0.1')
def test_gracefull_death(): def assert_provides_and_raises(pc, exception, target=None): """Helper to get all results before exception is raised""" results = [] with assert_raises(exception): for r in pc: results.append(r) # results should be sorted since we do not guarantee order results = sorted(results) if target is not None: assert_equal(results, target) return results def interrupted_producer(): yield 1 raise ValueError() def consumer(i): sleep(0.001) yield i assert_provides_and_raises( ProducerConsumer(interrupted_producer(), consumer, jobs=3), ValueError, [1]) def faulty_consumer(i): sleep(0.001) if i == 1: raise ValueError() return i # so we do not get failed, but other parallel ones finish their job results = assert_provides_and_raises( ProducerConsumer(range(1000), faulty_consumer, jobs=5), ValueError) # and analysis of futures to raise an exception can take some time etc, so # we could get more, but for sure we should not get all 999 and not even a 100 if info_log_level: assert_greater(100, len(results)) assert_equal(results[:4], [0, 2, 3, 4]) def producer(): for i in range(10): sleep(0.0001) yield i raise ValueError() # by default we do not stop upon producer failing assert_provides_and_raises(ProducerConsumer(producer(), consumer, jobs=2), ValueError, list(range(10))) # if producer produces more than we can as quickly consume but then fails # ATM we do not proceed to consume other items, but fail when we finish # consuming until the time point when producer has failed # by default we do not stop upon producer failing results = assert_provides_and_raises( ProducerConsumer(producer(), consumer, reraise_immediately=True, jobs=2), ValueError) # we will get some results, seems around 4 and they should be "sequential" assert_equal(results, list(range(len(results)))) assert_greater_equal(len(results), 2) if info_log_level: assert_greater_equal(6, len(results)) # Simulate situation close to what we have when outside code consumes # some yielded results and then "looses interest" (on_failure="error"). # In this case we should still exit gracefully (no GeneratorExit warnings), # not over-produce, and also do not kill already running consumers consumed = [] def inner(): def consumer(i): sleep(0.01) consumed.append(i) return i pc = iter(ProducerConsumer(range(1000), consumer, jobs=2)) yield next(pc) yield next(pc) assert_equal(sorted(inner()), [0, 1]) consumed = sorted(consumed) assert_equal(consumed, list(range(len(consumed)))) assert_greater_equal(len(consumed), 4) # we should wait for that 2nd batch to finish if info_log_level: assert_greater_equal(20, len(consumed))