def _search_from_virgin_install(dataset, query): # # this is to be nice to newbies # exc_info = sys.exc_info() if dataset is None: if not ui.is_interactive: raise NoDatasetArgumentFound( "No DataLad dataset found. Specify a dataset to be " "searched, or run interactively to get assistance " "installing a queriable superdataset." ) # none was provided so we could ask user whether he possibly wants # to install our beautiful mega-duper-super-dataset? # TODO: following logic could possibly benefit other actions. DEFAULT_DATASET_PATH = cfg.obtain('datalad.locations.default-dataset') if os.path.exists(DEFAULT_DATASET_PATH): default_ds = Dataset(DEFAULT_DATASET_PATH) if default_ds.is_installed(): if ui.yesno( title="No DataLad dataset found at current location", text="Would you like to search the DataLad " "superdataset at %r?" % DEFAULT_DATASET_PATH): pass else: raise exc_info[1] else: raise NoDatasetArgumentFound( "No DataLad dataset found at current location. " "The DataLad superdataset location %r exists, " "but does not contain an dataset." % DEFAULT_DATASET_PATH) elif ui.yesno( title="No DataLad dataset found at current location", text="Would you like to install the DataLad " "superdataset at %r?" % DEFAULT_DATASET_PATH): from datalad.api import install default_ds = install(DEFAULT_DATASET_PATH, source='///') ui.message( "From now on you can refer to this dataset using the " "label '///'" ) else: raise exc_info[1] lgr.info( "Performing search using DataLad superdataset %r", default_ds.path ) for res in default_ds.search(query): yield res return else: raise # this function is called within exception handling block
def _search_from_virgin_install(dataset, query): # # this is to be nice to newbies # exc_info = sys.exc_info() if dataset is None: if not ui.is_interactive: raise NoDatasetArgumentFound( "No DataLad dataset found. Specify a dataset to be " "searched, or run interactively to get assistance " "installing a queriable superdataset.") # none was provided so we could ask user either he possibly wants # to install our beautiful mega-duper-super-dataset? # TODO: following logic could possibly benefit other actions. if os.path.exists(LOCAL_CENTRAL_PATH): central_ds = Dataset(LOCAL_CENTRAL_PATH) if central_ds.is_installed(): if ui.yesno( title="No DataLad dataset found at current location", text="Would you like to search the DataLad " "superdataset at %r?" % LOCAL_CENTRAL_PATH): pass else: reraise(*exc_info) else: raise NoDatasetArgumentFound( "No DataLad dataset found at current location. " "The DataLad superdataset location %r exists, " "but does not contain an dataset." % LOCAL_CENTRAL_PATH) elif ui.yesno(title="No DataLad dataset found at current location", text="Would you like to install the DataLad " "superdataset at %r?" % LOCAL_CENTRAL_PATH): from datalad.api import install central_ds = install(LOCAL_CENTRAL_PATH, source='///') ui.message("From now on you can refer to this dataset using the " "label '///'") else: reraise(*exc_info) lgr.info("Performing search using DataLad superdataset %r", central_ds.path) for res in central_ds.search(query): yield res return else: raise
def __call__(match, dataset=None, search=None, report=None, report_matched=False, format='custom', regex=False): """ Yields ------ location : str (relative) path to the dataset report : dict fields which were requested by `report` option """ lgr.debug("Initiating search for match=%r and dataset %r", match, dataset) try: ds = require_dataset(dataset, check_installed=True, purpose='dataset search') if ds.id is None: raise NoDatasetArgumentFound( "This does not seem to be a dataset (no DataLad dataset ID " "found). 'datalad create --force %s' can initialize " "this repository as a DataLad dataset" % ds.path) except NoDatasetArgumentFound: exc_info = sys.exc_info() if dataset is None: if not ui.is_interactive: raise NoDatasetArgumentFound( "No DataLad dataset found. Specify a dataset to be " "searched, or run interactively to get assistance " "installing a queriable superdataset." ) # none was provided so we could ask user either he possibly wants # to install our beautiful mega-duper-super-dataset? # TODO: following logic could possibly benefit other actions. if os.path.exists(LOCAL_CENTRAL_PATH): central_ds = Dataset(LOCAL_CENTRAL_PATH) if central_ds.is_installed(): if ui.yesno( title="No DataLad dataset found at current location", text="Would you like to search the DataLad " "superdataset at %r?" % LOCAL_CENTRAL_PATH): pass else: reraise(*exc_info) else: raise NoDatasetArgumentFound( "No DataLad dataset found at current location. " "The DataLad superdataset location %r exists, " "but does not contain an dataset." % LOCAL_CENTRAL_PATH) elif ui.yesno( title="No DataLad dataset found at current location", text="Would you like to install the DataLad " "superdataset at %r?" % LOCAL_CENTRAL_PATH): from datalad.api import install central_ds = install(LOCAL_CENTRAL_PATH, source='///') ui.message( "From now on you can refer to this dataset using the " "label '///'" ) else: reraise(*exc_info) lgr.info( "Performing search using DataLad superdataset %r", central_ds.path ) for res in central_ds.search( match, search=search, report=report, report_matched=report_matched, format=format, regex=regex): yield res return else: raise cache_dir = opj(opj(ds.path, get_git_dir(ds.path)), 'datalad', 'cache') mcache_fname = opj(cache_dir, 'metadata.p%d' % pickle.HIGHEST_PROTOCOL) meta = None if os.path.exists(mcache_fname): lgr.debug("use cached metadata of '{}' from {}".format(ds, mcache_fname)) meta, checksum = pickle.load(open(mcache_fname, 'rb')) # TODO add more sophisticated tests to decide when the cache is no longer valid if checksum != ds.repo.get_hexsha(): # errrr, try again below meta = None # don't put in 'else', as yet to be written tests above might fail and require # regenerating meta data if meta is None: lgr.info("Loading and caching local meta-data... might take a few seconds") if not exists(cache_dir): os.makedirs(cache_dir) meta = get_metadata(ds, guess_type=False, ignore_subdatasets=False, ignore_cache=False) # merge all info on datasets into a single dict per dataset meta = flatten_metadata_graph(meta) # extract graph, if any meta = meta.get('@graph', meta) # build simple queriable representation if not isinstance(meta, list): meta = [meta] # sort entries by location (if present) sort_keys = ('location', 'description', 'id') # note with str() instead of '%' getting encoding issues... meta = sorted(meta, key=lambda m: tuple("%s" % (m.get(x, ""),) for x in sort_keys)) # use pickle to store the optimized graph in the cache pickle.dump( # graph plus checksum from what it was built (meta, ds.repo.get_hexsha()), open(mcache_fname, 'wb')) lgr.debug("cached meta data graph of '{}' in {}".format(ds, mcache_fname)) if report in ('', ['']): report = [] elif report and not isinstance(report, list): report = [report] match = assure_list(match) search = assure_list(search) # convert all to lower case for case insensitive matching search = {x.lower() for x in search} def get_in_matcher(m): """Function generator to provide closure for a specific value of m""" mlower = m.lower() def matcher(s): return mlower in s.lower() return matcher matchers = [ re.compile(match_).search if regex else get_in_matcher(match_) for match_ in match ] # location should be reported relative to current location # We will assume that noone chpwd while we are yielding ds_path_prefix = get_path_prefix(ds.path) # So we could provide a useful message whenever there were not a single # dataset with specified `--search` properties observed_properties = set() # for every meta data set for mds in meta: hit = False hits = [False] * len(matchers) matched_fields = set() if not mds.get('type', mds.get('schema:type', None)) == 'Dataset': # we are presently only dealing with datasets continue # TODO consider the possibility of nested and context/graph dicts # but so far we were trying to build simple lists of dicts, as much # as possible if not isinstance(mds, dict): raise NotImplementedError("nested meta data is not yet supported") # manual loop for now for k, v in iteritems(mds): if search: k_lower = k.lower() if k_lower not in search: if observed_properties is not None: # record for providing a hint later observed_properties.add(k_lower) continue # so we have a hit, no need to track observed_properties = None if isinstance(v, dict) or isinstance(v, list): v = text_type(v) for imatcher, matcher in enumerate(matchers): if matcher(v): hits[imatcher] = True matched_fields.add(k) if all(hits): hit = True # no need to do it longer than necessary if not report_matched: break if hit: location = mds.get('location', '.') report_ = matched_fields.union(report if report else {}) \ if report_matched else report if report_ == ['*']: report_dict = mds elif report_: report_dict = {k: mds[k] for k in report_ if k in mds} if report_ and not report_dict: lgr.debug( 'meta data match for %s, but no to-be-reported ' 'properties (%s) found. Present properties: %s', location, ", ".join(report_), ", ".join(sorted(mds)) ) else: report_dict = {} # it was empty but not None -- asked to # not report any specific field if isinstance(location, (list, tuple)): # could be that the same dataset installed into multiple # locations. For now report them separately for l in location: yield opj(ds_path_prefix, l), report_dict else: yield opj(ds_path_prefix, location), report_dict if search and observed_properties is not None: import difflib suggestions = { s: difflib.get_close_matches(s, observed_properties) for s in search } suggestions_str = "\n ".join( "%s for %s" % (", ".join(choices), s) for s, choices in iteritems(suggestions) if choices ) lgr.warning( "Found no properties which matched one of the one you " "specified (%s). May be you meant one among: %s.\n" "Suggestions:\n" " %s", ", ".join(search), ", ".join(observed_properties), suggestions_str if suggestions_str.strip() else "none" )
def func2(x): assert x == 1 eq_(ui.yesno("title"), True) eq_(ui.question("title2"), "maybe so") assert_raises(AssertionError, ui.question, "asking more than we know") return x * 2
def __call__(dataset, filename=None, missing_content='error', no_annex=False, # TODO: support working with projects and articles within them # project_id=None, article_id=None): import os import logging lgr = logging.getLogger('datalad.plugin.export_to_figshare') from datalad.ui import ui from datalad.api import add_archive_content from datalad.api import export_archive from datalad.distribution.dataset import require_dataset from datalad.support.annexrepo import AnnexRepo dataset = require_dataset(dataset, check_installed=True, purpose='export to figshare') if not isinstance(dataset.repo, AnnexRepo): raise ValueError( "%s is not an annex repo, so annexification could be done" % dataset ) if dataset.repo.is_dirty(): raise RuntimeError( "Paranoid authors of DataLad refuse to proceed in a dirty repository" ) if filename is None: filename = dataset.path lgr.info( "Exporting current tree as an archive under %s since figshare " "does not support directories", filename ) archive_out = next( export_archive( dataset, filename=filename, archivetype='zip', missing_content=missing_content, return_type="generator" ) ) assert archive_out['status'] == 'ok' fname = archive_out['path'] lgr.info("Uploading %s to figshare", fname) figshare = FigshareRESTLaison() if not article_id: # TODO: ask if it should be an article within a project if ui.is_interactive: # or should we just upload to a new article? if ui.yesno( "Would you like to create a new article to upload to? " "If not - we will list existing articles", title="Article" ): article = figshare.create_article( title=os.path.basename(dataset.path) ) lgr.info( "Created a new (private) article %(id)s at %(url_private_html)s. " "Please visit it, enter additional meta-data and make public", article ) article_id = article['id'] else: article_id = int(ui.question( "Which of the articles should we upload to.", choices=list(map(str, figshare.get_article_ids())) )) if not article_id: raise ValueError("We need an article to upload to.") file_info = figshare.upload_file( fname, files_url='account/articles/%s/files' % article_id ) if no_annex: lgr.info("Removing generated tarball") unlink(fname) else: # I will leave all the complaining etc to the dataset add if path # is outside etc lgr.info("'Registering' %s within annex", fname) repo = dataset.repo repo.add(fname, git=False) key = repo.get_file_key(fname) lgr.info("Adding URL %(download_url)s for it", file_info) repo._annex_custom_command([], [ "git", "annex", "registerurl", '-c', 'annex.alwayscommit=false', key, file_info['download_url'] ] ) lgr.info("Registering links back for the content of the archive") add_archive_content( fname, annex=dataset.repo, delete_after=True, # just remove extracted into a temp dir allow_dirty=True, # since we have a tarball commit=False # we do not want to commit anything we have done here ) lgr.info("Removing generated and now registered in annex archive") repo.drop(key, key=True, options=['--force']) repo.remove(fname, force=True) # remove the tarball # if annex in {'delete'}: # dataset.repo.remove(fname) # else: # # kinda makes little sense I guess. # # Made more sense if export_archive could export an arbitrary treeish # # so we could create a branch where to dump and export to figshare # # (kinda closer to my idea) # dataset.save(fname, message="Added the entire dataset into a zip file") # TODO: add to downloader knowledge about figshare token so it could download-url # those zipballs before they go public yield dict( status='ok', # TODO: add article url (which needs to be queried if only ID is known message="Published archive {}".format( file_info['download_url']), file_info=file_info, path=dataset, action='export_to_figshare', logger=lgr )
def _create_dataset_sibling(name, ds, hierarchy_basepath, shell, replicate_local_structure, ri, target_dir, target_url, target_pushurl, existing, shared, group, publish_depends, publish_by_default, install_postupdate_hook, as_common_datasrc, annex_wanted, annex_group, annex_groupwanted, inherit): """Everyone is very smart here and could figure out the combinatorial affluence among provided tiny (just slightly over a dozen) number of options and only a few pages of code """ localds_path = ds.path ds_name = relpath(localds_path, start=hierarchy_basepath) if not replicate_local_structure: ds_name = '' if ds_name == curdir \ else '-{}'.format(ds_name.replace("/", "-")) remoteds_path = target_dir.replace("%RELNAME", ds_name) else: # TODO: opj depends on local platform, not the remote one. # check how to deal with it. Does windows ssh server accept # posix paths? vice versa? Should planned SSH class provide # tools for this issue? # see gh-1188 remoteds_path = normpath(opj(target_dir, ds_name)) ds_repo = ds.repo # construct a would-be ssh url based on the current dataset's path ri.path = remoteds_path ds_url = ri.as_str() # configure dataset's git-access urls ds_target_url = target_url.replace('%RELNAME', ds_name) \ if target_url else ds_url # push, configure only if needed ds_target_pushurl = None if ds_target_url != ds_url: # not guaranteed that we can push via the primary URL ds_target_pushurl = target_pushurl.replace('%RELNAME', ds_name) \ if target_pushurl else ds_url lgr.info("Considering to create a target dataset {0} at {1} of {2}".format( localds_path, remoteds_path, "localhost" if isinstance(ri, PathRI) else ri.hostname)) # Must be set to True only if exists and existing='reconfigure' # otherwise we might skip actions if we say existing='reconfigure' # but it did not even exist before only_reconfigure = False if remoteds_path != '.': # check if target exists # TODO: Is this condition valid for != '.' only? path_children = _ls_remote_path(shell, remoteds_path) path_exists = path_children is not None if path_exists: _msg = "Target path %s already exists." % remoteds_path if path_exists and not path_children: # path should be an empty directory, which should be ok to remove try: lgr.debug( "Trying to rmdir %s on remote since seems to be an empty dir", remoteds_path) # should be safe since should not remove anything unless an empty dir shell("rmdir {}".format(sh_quote(remoteds_path))) path_exists = False except CommandError as e: # If fails to rmdir -- either contains stuff no permissions # TODO: fixup encode/decode dance again :-/ we should have got # unicode/str here by now. I guess it is the same as # https://github.com/ReproNim/niceman/issues/83 # where I have reused this Runner thing try: # ds_name is unicode which makes _msg unicode so we must be # unicode-ready err_str = str(e.stderr) except UnicodeDecodeError: err_str = e.stderr.decode(errors='replace') _msg += " And it fails to rmdir (%s)." % (err_str.strip(), ) if path_exists: if existing == 'error': raise RuntimeError(_msg) elif existing == 'skip': lgr.info(_msg + " Skipping") return elif existing == 'replace': remove = False if path_children: has_git = '.git' in path_children _msg_stats = _msg \ + " It is %sa git repository and has %d files/dirs." % ( "" if has_git else "not ", len(path_children) ) if ui.is_interactive: remove = ui.yesno("Do you really want to remove it?", title=_msg_stats, default=False) else: raise RuntimeError( _msg_stats + " Remove it manually first or rerun datalad in " "interactive shell to confirm this action.") if not remove: raise RuntimeError(_msg) # Remote location might already contain a git repository or be # just a directory. lgr.info(_msg + " Replacing") # enable write permissions to allow removing dir shell("chmod +r+w -R {}".format(sh_quote(remoteds_path))) # remove target at path shell("rm -rf {}".format(sh_quote(remoteds_path))) # if we succeeded in removing it path_exists = False # Since it is gone now, git-annex also should forget about it remotes = ds_repo.get_remotes() if name in remotes: # so we had this remote already, we should announce it dead # XXX what if there was some kind of mismatch and this name # isn't matching the actual remote UUID? should have we # checked more carefully? lgr.info( "Announcing existing remote %s dead to annex and removing", name) if isinstance(ds_repo, AnnexRepo): ds_repo.set_remote_dead(name) ds_repo.remove_remote(name) elif existing == 'reconfigure': lgr.info(_msg + " Will only reconfigure") only_reconfigure = True else: raise ValueError( "Do not know how to handle existing={}".format( repr(existing))) if not path_exists: shell("mkdir -p {}".format(sh_quote(remoteds_path))) delayed_super = _DelayedSuper(ds) if inherit and delayed_super.super: if shared is None: # here we must analyze current_ds's super, not the super_ds # inherit from the setting on remote end shared = CreateSibling._get_ds_remote_shared_setting( delayed_super, name, shell) if not install_postupdate_hook: # Even though directive from above was False due to no UI explicitly # requested, we were asked to inherit the setup, so we might need # to install the hook, if super has it on remote install_postupdate_hook = CreateSibling._has_active_postupdate( delayed_super, name, shell) if group: # Either repository existed before or a new directory was created for it, # set its group to a desired one if was provided with the same chgrp shell("chgrp -R {} {}".format(sh_quote(str(group)), sh_quote(remoteds_path))) # don't (re-)initialize dataset if existing == reconfigure if not only_reconfigure: # init git and possibly annex repo if not CreateSibling.init_remote_repo( remoteds_path, shell, shared, ds, description=target_url): return if target_url and not is_ssh(target_url): # we are not coming in via SSH, hence cannot assume proper # setup for webserver access -> fix shell('git -C {} update-server-info'.format( sh_quote(remoteds_path))) else: # TODO -- we might still want to reconfigure 'shared' setting! pass # at this point we have a remote sibling in some shape or form # -> add as remote lgr.debug("Adding the siblings") # TODO generator, yield the now swallowed results Siblings.__call__( 'configure', dataset=ds, name=name, url=ds_target_url, pushurl=ds_target_pushurl, recursive=False, fetch=True, as_common_datasrc=as_common_datasrc, publish_by_default=publish_by_default, publish_depends=publish_depends, annex_wanted=annex_wanted, annex_group=annex_group, annex_groupwanted=annex_groupwanted, inherit=inherit, result_renderer=None, ) # check git version on remote end lgr.info("Adjusting remote git configuration") if shell.get_git_version( ) and shell.get_git_version() >= LooseVersion("2.4"): # allow for pushing to checked out branch try: shell("git -C {} config receive.denyCurrentBranch updateInstead". format(sh_quote(remoteds_path))) except CommandError as e: lgr.error( "git config failed at remote location %s.\n" "You will not be able to push to checked out " "branch. Error: %s", remoteds_path, exc_str(e)) else: lgr.error( "Git version >= 2.4 needed to configure remote." " Version detected on server: %s\nSkipping configuration" " of receive.denyCurrentBranch - you will not be able to" " publish updates to this repository. Upgrade your git" " and run with --existing=reconfigure", shell.get_git_version()) branch = ds_repo.get_active_branch() if branch is not None: branch = ds_repo.get_corresponding_branch(branch) or branch # Setting the HEAD for the created sibling to the original repo's # current branch should be unsurprising, and it helps with consumers # that don't properly handle the default branch with no commits. See # gh-4349. shell("git -C {} symbolic-ref HEAD refs/heads/{}".format( sh_quote(remoteds_path), branch)) if install_postupdate_hook: # enable metadata refresh on dataset updates to publication server lgr.info("Enabling git post-update hook ...") try: CreateSibling.create_postupdate_hook(remoteds_path, shell, ds) except CommandError as e: lgr.error("Failed to add json creation command to post update " "hook.\nError: %s" % exc_str(e)) return remoteds_path
def enter_new(self, url=None, auth_types=[], url_re=None, name=None, credential_name=None, credential_type=None): # TODO: level/location! """Create new provider and credential config If interactive, this will ask the user to enter the details (or confirm default choices). A dedicated config file is written at <user_config_dir>/providers/<name>.cfg Parameters: ----------- url: str or RI URL this config is created for auth_types: list List of authentication types to choose from. First entry becomes default. See datalad.downloaders.providers.AUTHENTICATION_TYPES url_re: str regular expression; Once created, this config will be used for any matching URL; defaults to `url` name: str name for the provider; needs to be unique per user credential_name: str name for the credential; defaults to the provider's name credential_type: str credential type to use (key for datalad.downloaders.CREDENTIAL_TYPES) """ from datalad.ui import ui if url and not name: ri = RI(url) for f in ('hostname', 'name'): try: # might need sanitarization name = str(getattr(ri, f)) except AttributeError: pass known_providers_by_name = {p.name: p for p in self._providers} providers_user_dir = self._get_providers_dirs()['user'] while True: name = ui.question( title="New provider name", text="Unique name to identify 'provider' for %s" % url, default=name) filename = pathjoin(providers_user_dir, '%s.cfg' % name) if name in known_providers_by_name: if ui.yesno( title="Known provider %s" % name, text= "Provider with name %s already known. Do you want to " "use it for this session?" % name, default=True): return known_providers_by_name[name] elif path.lexists(filename): ui.error("File %s already exists, choose another name" % filename) else: break if not credential_name: credential_name = name if not url_re: url_re = re.escape(url) if url else None while True: url_re = ui.question( title="New provider regular expression", text="A (Python) regular expression to specify for which URLs " "this provider should be used", default=url_re) if not re.match(url_re, url): ui.error("Provided regular expression doesn't match original " "url. Please re-enter") # TODO: url_re of another provider might match it as well # I am not sure if we have any kind of "priority" setting ATM # to differentiate or to to try multiple types :-/ else: break authentication_type = None if auth_types: auth_types = [t for t in auth_types if t in AUTHENTICATION_TYPES] if auth_types: authentication_type = auth_types[0] # Setup credential authentication_type = ui.question( title="Authentication type", text="What authentication type to use", default=authentication_type, choices=sorted(AUTHENTICATION_TYPES)) authenticator_class = AUTHENTICATION_TYPES[authentication_type] # TODO: need to figure out what fields that authenticator might # need to have setup and ask for them here! credential_type = ui.question( title="Credential", text="What type of credential should be used?", choices=sorted(CREDENTIAL_TYPES), default=credential_type or getattr(authenticator_class, 'DEFAULT_CREDENTIAL_TYPE')) cfg = self._CONFIG_TEMPLATE.format(**locals()) if ui.yesno(title="Save provider configuration file", text="Following configuration will be written to %s:\n%s" % (filename, cfg), default='yes'): # Just create a configuration file and reload the thing return self._store_new(url=url, authentication_type=authentication_type, authenticator_class=authenticator_class, url_re=url_re, name=name, credential_name=credential_name, credential_type=credential_type, level='user') else: return None
def enter_new(self, url=None, auth_types=[]): from datalad.ui import ui name = None if url: ri = RI(url) for f in ('hostname', 'name'): try: # might need sanitarization name = str(getattr(ri, f)) except AttributeError: pass known_providers_by_name = {p.name: p for p in self._providers} providers_user_dir = self._get_providers_dirs()['user'] while True: name = ui.question( title="New provider name", text="Unique name to identify 'provider' for %s" % url, default=name ) filename = pathjoin(providers_user_dir, '%s.cfg' % name) if name in known_providers_by_name: if ui.yesno( title="Known provider %s" % name, text="Provider with name %s already known. Do you want to " "use it for this session?" % name, default=True ): return known_providers_by_name[name] elif path.lexists(filename): ui.error( "File %s already exists, choose another name" % filename) else: break url_re = re.escape(url) if url else None while True: url_re = ui.question( title="New provider regular expression", text="A (Python) regular expression to specify for which URLs " "this provider should be used", default=url_re ) if not re.match(url_re, url): ui.error("Provided regular expression doesn't match original " "url. Please re-enter") # TODO: url_re of another provider might match it as well # I am not sure if we have any kind of "priority" setting ATM # to differentiate or to to try multiple types :-/ else: break authentication_type = None if auth_types: auth_types = [ t for t in auth_types if t in AUTHENTICATION_TYPES ] if auth_types: authentication_type = auth_types[0] # Setup credential authentication_type = ui.question( title="Authentication type", text="What authentication type to use", default=authentication_type, choices=sorted(AUTHENTICATION_TYPES) ) authenticator_class = AUTHENTICATION_TYPES[authentication_type] # TODO: need to figure out what fields that authenticator might # need to have setup and ask for them here! credential_type = ui.question( title="Credential", text="What type of credential should be used?", choices=sorted(CREDENTIAL_TYPES), default=getattr(authenticator_class, 'DEFAULT_CREDENTIAL_TYPE') ) # Just create a configuration file and reload the thing if not path.lexists(providers_user_dir): os.makedirs(providers_user_dir) cfg = """\ # Provider configuration file created to initially access # {url} [provider:{name}] url_re = {url_re} authentication_type = {authentication_type} # Note that you might need to specify additional fields specific to the # authenticator. Fow now "look into the docs/source" of {authenticator_class} # {authentication_type}_ credential = {name} [credential:{name}] # If known, specify URL or email to how/where to request credentials # url = ??? type = {credential_type} """.format(**locals()) if ui.yesno( title="Save provider configuration file", text="Following configuration will be written to %s:\n%s" % (filename, cfg), default='yes' ): with open(filename, 'wb') as f: f.write(cfg.encode('utf-8')) else: return None self.reload() # XXX see above note about possibly multiple matches etc return self.get_provider(url)
def create_repo(self, ds, reponame, organization, private, dry_run, existing): """Create a repository on the target platform Returns ------- dict Result record, with status='ok' when all is good, status='error' when unrecoverably broken, status='impossible' when recoverably broken Raises ------ Exception Any unhandled condition (in particular unexpected non-success HTTP response codes) will raise an exception. """ res = self.repo_create_request(reponame, organization, private, dry_run) if res.get('status') == 'impossible' and res.get('preexisted'): # we cannot create, because there is something in the target # spot orguser = organization or self.authenticated_user['login'] if existing == 'reconfigure': # we want to use the existing one instead # query properties, report, and be done repo_props = self.repo_get_request(orguser, reponame) res.update( status='notneeded', # return in full host_response=repo_props, # perform some normalization **self.normalize_repo_properties(repo_props)) elif existing == 'replace': # only implemented for backward compat with # create-sibling-github _msg = ('repository "%s" already exists', reponame) if ui.is_interactive: remove = ui.yesno("Do you really want to remove it?", title=_msg[0] % _msg[1], default=False) else: return dict( res, status='impossible', message=(_msg[0] + " Remove it manually first or " "rerun DataLad in an interactive shell " "to confirm this action.", _msg[1]), ) if not remove: return dict( res, status='impossible', message=_msg, ) # remove the beast in cold blood self.repo_delete_request( organization or self.authenticated_user['login'], reponame) # try creating now return self.create_repo(ds, reponame, organization, private, dry_run, existing) # TODO intermediate error handling? return res
def func2(x): assert x == 1 eq_(ui.yesno("title"), True) eq_(ui.question("title2"), "maybe so") assert_raises(AssertionError, ui.question, "asking more than we know") return x*2
def __call__(match, dataset=None, search=None, report=None, report_matched=False, format='custom', regex=False): lgr.debug("Initiating search for match=%r and dataset %r", match, dataset) try: ds = require_dataset(dataset, check_installed=True, purpose='dataset search') if ds.id is None: raise NoDatasetArgumentFound( "This does not seem to be a dataset (no DataLad dataset ID " "found). 'datalad create --force %s' can initialize " "this repository as a DataLad dataset" % ds.path) except NoDatasetArgumentFound: exc_info = sys.exc_info() if dataset is None: if not ui.is_interactive: raise NoDatasetArgumentFound( "No DataLad dataset found. Specify a dataset to be " "searched, or run interactively to get assistance " "installing a queriable superdataset." ) # none was provided so we could ask user either he possibly wants # to install our beautiful mega-duper-super-dataset? # TODO: following logic could possibly benefit other actions. if os.path.exists(LOCAL_CENTRAL_PATH): central_ds = Dataset(LOCAL_CENTRAL_PATH) if central_ds.is_installed(): if ui.yesno( title="No DataLad dataset found at current location", text="Would you like to search the DataLad " "superdataset at %r?" % LOCAL_CENTRAL_PATH): pass else: reraise(*exc_info) else: raise NoDatasetArgumentFound( "No DataLad dataset found at current location. " "The DataLad superdataset location %r exists, " "but does not contain an dataset." % LOCAL_CENTRAL_PATH) elif ui.yesno( title="No DataLad dataset found at current location", text="Would you like to install the DataLad " "superdataset at %r?" % LOCAL_CENTRAL_PATH): from datalad.api import install central_ds = install(LOCAL_CENTRAL_PATH, source='///') ui.message( "From now on you can refer to this dataset using the " "label '///'" ) else: reraise(*exc_info) lgr.info( "Performing search using DataLad superdataset %r", central_ds.path ) for res in central_ds.search( match, search=search, report=report, report_matched=report_matched, format=format, regex=regex): yield res return else: raise cache_dir = opj(opj(ds.path, get_git_dir(ds.path)), 'datalad', 'cache') mcache_fname = opj(cache_dir, 'metadata.p%d' % pickle.HIGHEST_PROTOCOL) meta = None if os.path.exists(mcache_fname): lgr.debug("use cached metadata of '{}' from {}".format(ds, mcache_fname)) meta, checksum = pickle.load(open(mcache_fname, 'rb')) # TODO add more sophisticated tests to decide when the cache is no longer valid if checksum != ds.repo.get_hexsha(): # errrr, try again below meta = None # don't put in 'else', as yet to be written tests above might fail and require # regenerating meta data if meta is None: lgr.info("Loading and caching local meta-data... might take a few seconds") if not exists(cache_dir): os.makedirs(cache_dir) meta = get_metadata(ds, guess_type=False, ignore_subdatasets=False, ignore_cache=False) # merge all info on datasets into a single dict per dataset meta = flatten_metadata_graph(meta) # extract graph, if any meta = meta.get('@graph', meta) # build simple queriable representation if not isinstance(meta, list): meta = [meta] # sort entries by location (if present) sort_keys = ('location', 'description', 'id') meta = sorted(meta, key=lambda m: tuple(m.get(x, "") for x in sort_keys)) # use pickle to store the optimized graph in the cache pickle.dump( # graph plus checksum from what it was built (meta, ds.repo.get_hexsha()), open(mcache_fname, 'wb')) lgr.debug("cached meta data graph of '{}' in {}".format(ds, mcache_fname)) if report in ('', ['']): report = [] elif report and not isinstance(report, list): report = [report] match = assure_list(match) search = assure_list(search) # convert all to lower case for case insensitive matching search = {x.lower() for x in search} def get_in_matcher(m): """Function generator to provide closure for a specific value of m""" mlower = m.lower() def matcher(s): return mlower in s.lower() return matcher matchers = [ re.compile(match_).search if regex else get_in_matcher(match_) for match_ in match ] # location should be reported relative to current location # We will assume that noone chpwd while we are yielding ds_path_prefix = get_path_prefix(ds.path) # So we could provide a useful message whenever there were not a single # dataset with specified `--search` properties observed_properties = set() # for every meta data set for mds in meta: hit = False hits = [False] * len(matchers) matched_fields = set() if not mds.get('type', mds.get('schema:type', None)) == 'Dataset': # we are presently only dealing with datasets continue # TODO consider the possibility of nested and context/graph dicts # but so far we were trying to build simple lists of dicts, as much # as possible if not isinstance(mds, dict): raise NotImplementedError("nested meta data is not yet supported") # manual loop for now for k, v in iteritems(mds): if search: k_lower = k.lower() if k_lower not in search: if observed_properties is not None: # record for providing a hint later observed_properties.add(k_lower) continue # so we have a hit, no need to track observed_properties = None if isinstance(v, dict) or isinstance(v, list): v = text_type(v) for imatcher, matcher in enumerate(matchers): if matcher(v): hits[imatcher] = True matched_fields.add(k) if all(hits): hit = True # no need to do it longer than necessary if not report_matched: break if hit: location = mds.get('location', '.') report_ = matched_fields.union(report if report else {}) \ if report_matched else report if report_ == ['*']: report_dict = mds elif report_: report_dict = {k: mds[k] for k in report_ if k in mds} if report_ and not report_dict: lgr.debug( 'meta data match for %s, but no to-be-reported ' 'properties (%s) found. Present properties: %s', location, ", ".join(report_), ", ".join(sorted(mds)) ) else: report_dict = {} # it was empty but not None -- asked to # not report any specific field if isinstance(location, (list, tuple)): # could be that the same dataset installed into multiple # locations. For now report them separately for l in location: yield opj(ds_path_prefix, l), report_dict else: yield opj(ds_path_prefix, location), report_dict if search and observed_properties is not None: import difflib suggestions = { s: difflib.get_close_matches(s, observed_properties) for s in search } suggestions_str = "\n ".join( "%s for %s" % (", ".join(choices), s) for s, choices in iteritems(suggestions) if choices ) lgr.warning( "Found no properties which matched one of the one you " "specified (%s). May be you meant one among: %s.\n" "Suggestions:\n" " %s", ", ".join(search), ", ".join(observed_properties), suggestions_str if suggestions_str.strip() else "none" )