def tabular(client, datasets): """Format datasets with a tabular output.""" from renku.models._tabulate import tabulate click.echo( tabulate( datasets, headers=OrderedDict(( ('uid', 'id'), ('display_name', None), ('version', None), ('created', None), ('creators_csv', 'creators'), )), ))
def tabular(client, datasets=None): """Format datasets with a tabular output.""" from renku.models._tabulate import tabulate datasets = datasets or client.datasets click.echo( tabulate( datasets.values(), headers=OrderedDict(( ('short_id', 'id'), ('name', None), ('created', None), ('authors_csv', 'authors'), )), ))
def tabular(client, records): """Format dataset files with a tabular output. :param client: LocalClient instance. :param records: Filtered collection. """ from renku.models._tabulate import tabulate echo_via_pager( tabulate( records, headers=OrderedDict(( ('added', None), ('creators_csv', 'creators'), ('dataset', None), ('full_path', 'path'), )), ))
def import_(ctx, client, uri, name, extract): """Import data from a 3rd party provider. Supported providers: [Zenodo, ] """ provider, err = ProviderFactory.from_uri(uri) if err and provider is None: raise BadParameter('Could not process {0}.\n{1}'.format(uri, err)) try: record = provider.find_record(uri) dataset_ = record.as_dataset(client) files_ = dataset_.files click.echo( tabulate( files_, headers=OrderedDict(( ('checksum', None), ('filename', 'name'), ('size_in_mb', 'size (mb)'), ('filetype', 'type'), )) ) ) text_prompt = 'Do you wish to download this version?' if record.is_last_version(uri) is False: text_prompt = WARNING + 'Newer version found.\n' + text_prompt except KeyError as e: raise BadParameter(( 'Could not process {0}.\n' 'Unable to fetch metadata due to {1}'.format(uri, e) )) except LookupError: raise BadParameter( ('Could not process {0}.\n' 'URI not found.'.format(uri)) ) if files_ and click.confirm(text_prompt): data_folder = tempfile.mkdtemp() pool_size = min( int(os.getenv('RENKU_POOL_SIZE', mp.cpu_count() // 2)), 4 ) freeze_support() # Windows support pool = mp.Pool( pool_size, # Windows support initializer=tqdm.set_lock, initargs=(RLock(), ) ) processing = [ pool.apply_async( download_file, args=( i, extract, data_folder, file_, ) ) for i, file_ in enumerate(files_) ] for p in processing: p.wait() pool.close() dataset_name = name or dataset_.display_name if write_dataset(client, dataset_name): add_to_dataset( client, urls=[str(p) for p in Path(data_folder).glob('*')], name=dataset_name, with_metadata=dataset_ ) click.secho('OK', fg='green')
def import_(ctx, client, uri, name, extract): """Import data from a 3rd party provider. Supported providers: [Zenodo, Dataverse] """ provider, err = ProviderFactory.from_uri(uri) if err and provider is None: raise BadParameter('Could not process {0}.\n{1}'.format(uri, err)) elif err: click.echo(WARNING + err) try: record = provider.find_record(uri) dataset_ = record.as_dataset(client) files_ = dataset_.files click.echo( tabulate(files_, headers=OrderedDict(( ('checksum', None), ('filename', 'name'), ('size_in_mb', 'size (mb)'), ('filetype', 'type'), )))) text_prompt = 'Do you wish to download this version?' if record.is_last_version(uri) is False: text_prompt = WARNING + 'Newer version found at {}\n'.format( record.links.get('latest_html')) + text_prompt except KeyError as e: raise BadParameter( ('Could not process {0}.\n' 'Unable to fetch metadata due to {1}'.format(uri, e))) except LookupError: raise BadParameter(('Could not process {0}.\n' 'URI not found.'.format(uri))) if files_ and click.confirm(text_prompt): data_folder = tempfile.mkdtemp() pool_size = min(int(os.getenv('RENKU_POOL_SIZE', mp.cpu_count() // 2)), 4) manager = mp.Manager() id_queue = manager.Queue() for i in range(pool_size): id_queue.put(i) def _init(lock, id_queue): """Set up tqdm lock and worker process index. See https://stackoverflow.com/a/42817946 Fixes tqdm line position when |files| > terminal-height so only |workers| progressbars are shown at a time """ global current_process_position current_process_position = id_queue.get() tqdm.set_lock(lock) freeze_support() # Windows support pool = mp.Pool( pool_size, # Windows support initializer=_init, initargs=(RLock(), id_queue)) processing = [ pool.apply_async(download_file, args=( extract, data_folder, file_, )) for file_ in files_ ] try: for p in processing: p.get() # Will internally do the wait() as well. except HTTPError as e: raise BadParameter(('Could not process {0}.\n' 'URI not found.'.format(e.request.url))) pool.close() dataset_name = name or dataset_.display_name if write_dataset(client, dataset_name): add_to_dataset(client, urls=[str(p) for p in Path(data_folder).glob('*')], name=dataset_name, with_metadata=dataset_) click.secho('OK', fg='green')