Exemplos de ProducerConsumerProgressLog em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: datalad.support.parallel

Classe / Tipo: ProducerConsumerProgressLog

Exemplos em hotexamples.com: 2

ProducerConsumerProgressLog em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de datalad.support.parallel.ProducerConsumerProgressLog em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

ProducerConsumerProgressLog(2)

Métodos Frequentes

ProducerConsumerProgressLog (2)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: save.py Projeto: ypid/datalad

def __call__( path=None, message=None, dataset=None, version_tag=None, recursive=False, recursion_limit=None, updated=False, message_file=None, to_git=None, jobs=None, amend=False, ): if message and message_file: raise ValueError( "Both a message and message file were specified for save()") if amend and recursive: raise ValueError("Cannot amend a commit recursively.") path = ensure_list(path) if message_file: with open(message_file) as mfh: message = mfh.read() # we want 'normal' to achieve the most compact argument list # for git calls # untracked_mode = 'no' if updated else 'normal' # TODO however, Repo.add() would refuse to add any dotfiles # in a directory that is itself untracked, hence the only # choice is to go with potentially crazy long lists # until https://github.com/datalad/datalad/issues/1454 # has a resolution untracked_mode = 'no' if updated else 'all' # there are three basic scenarios: # 1. save modifications to any already tracked content # 2. save any content (including removal of deleted content) # to bring things to a clean state # 3. like (2), but only operate on a given subset of content # identified by paths # - all three have to work in conjunction with --recursive # - the difference between (1) and (2) should be no more # that a switch from --untracked=no to --untracked=all # in Repo.save() # we do not support # - simultaneous operations on multiple datasets from disjoint # dataset hierarchies, hence a single reference dataset must be # identifiable from the either # - curdir or # - the `dataset` argument. # This avoids complex annotation loops and hierarchy tracking. # - any modification upwards from the root dataset ds = require_dataset(dataset, check_installed=True, purpose='saving') # use status() to do all discovery and annotation of paths paths_by_ds = {} for s in Status()( # ATTN: it is vital to pass the `dataset` argument as it, # and not a dataset instance in order to maintain the path # semantics between here and the status() call dataset=dataset, path=path, untracked=untracked_mode, report_filetype=False, recursive=recursive, recursion_limit=recursion_limit, on_failure='ignore', # for save without recursion only commit matters eval_subdataset_state='full' if recursive else 'commit', result_renderer='disabled'): if s['status'] == 'error': # Downstream code can't do anything with these. Let the caller # decide their fate. yield s continue # fish out status dict for this parent dataset ds_status = paths_by_ds.get(s['parentds'], {}) # reassemble path status info as repo.status() would have made it ds_status[ut.Path(s['path'])] = \ {k: v for k, v in s.items() if k not in ( 'path', 'parentds', 'refds', 'status', 'action', 'logger')} paths_by_ds[s['parentds']] = ds_status lgr.debug('Determined %i datasets for saving from input arguments', len(paths_by_ds)) # figure out what datasets to process, start with the ones containing # the paths that were given as arguments discovered_datasets = list(paths_by_ds.keys()) if dataset: # if a reference dataset was given we want to save all the way up # to it, so let's throw it into the mix discovered_datasets.append(ds.path) # sort the datasets into (potentially) disjoint hierarchies, # or a single one, if a reference dataset was given dataset_hierarchies = get_tree_roots(discovered_datasets) for rootds, children in dataset_hierarchies.items(): edges = {} discover_dataset_trace_to_targets(rootds, children, [], edges, includeds=children) for superds, subdss in edges.items(): superds_status = paths_by_ds.get(superds, {}) for subds in subdss: subds_path = ut.Path(subds) sub_status = superds_status.get(subds_path, {}) if not (sub_status.get("state") == "clean" and sub_status.get("type") == "dataset"): # TODO actually start from an entry that may already # exist in the status record superds_status[subds_path] = dict( # shot from the hip, some status config # to trigger this specific super/sub # relation to be saved state='untracked', type='dataset') paths_by_ds[superds] = superds_status def save_ds(args, version_tag=None): pdspath, paths = args pds = Dataset(pdspath) pds_repo = pds.repo # pop status for this dataset, we are not coming back to it pds_status = { # for handing over to the low-level code, we recode any # path relative to the real repo location, this avoid # cumbersome symlink handling without context in the # lower levels pds_repo.pathobj / p.relative_to(pdspath): props for p, props in paths.items() } start_commit = pds_repo.get_hexsha() if not all(p['state'] == 'clean' for p in pds_status.values()) or \ (amend and message): for res in pds_repo.save_( message=message, # make sure to have the `path` arg be None, as we want # to prevent and bypass any additional repo.status() # calls paths=None, # prevent whining of GitRepo git=True if not hasattr(ds.repo, 'annexstatus') else to_git, # we are supplying the full status already, do not # detect anything else untracked='no', _status=pds_status, amend=amend): # TODO remove stringification when datalad-core can handle # path objects, or when PY3.6 is the lowest supported # version for k in ('path', 'refds'): if k in res: res[k] = str( # recode path back to dataset path anchor pds.pathobj / res[k].relative_to(pds_repo.pathobj)) yield res # report on the dataset itself dsres = dict( action='save', type='dataset', path=pds.path, refds=ds.path, status='ok' if start_commit != pds_repo.get_hexsha() else 'notneeded', logger=lgr, ) if not version_tag: yield dsres return try: # method requires str version_tag = str(version_tag) pds_repo.tag(version_tag) dsres.update(status='ok', version_tag=version_tag) yield dsres except CommandError as e: if dsres['status'] == 'ok': # first we yield the result for the actual save # TODO: we will get duplicate dataset/save record obscuring # progress reporting. yoh thought to decouple "tag" from "save" # messages but was worrying that original authors would disagree yield dsres.copy() # and now complain that tagging didn't work dsres.update(status='error', message=('cannot tag this version: %s', e.stderr.strip())) yield dsres if not paths_by_ds: # Special case: empty repo. There's either an empty commit only or # none at all. An empty one we can amend otherwise there's nothing # to do. if amend and ds.repo.get_hexsha(): yield from save_ds((ds.pathobj, dict()), version_tag=version_tag) else: yield dict(action='save', type='dataset', path=ds.path, refds=ds.path, status='notneeded', logger=lgr) return # TODO: in principle logging could be improved to go not by a dataset # but by path(s) within subdatasets. That should provide a bit better ETA # and more "dynamic" feedback than jumpy datasets count. # See addurls where it is implemented that way by providing agg and another # log_filter yield from ProducerConsumerProgressLog( sorted(paths_by_ds.items(), key=lambda v: v[0], reverse=True), partial(save_ds, version_tag=version_tag), safe_to_consume=no_subds_in_futures, producer_future_key=lambda ds_items: ds_items[0], jobs=jobs, log_filter=_log_filter_save_dataset, unit="datasets", lgr=lgr, )

Exemplo n.º 2

0

Exibir arquivo

Arquivo: get.py Projeto: m-hess/datalad

def _recursive_install_subds_underneath(ds, recursion_limit, reckless, start=None, refds_path=None, description=None, jobs=None, producer_only=False): if isinstance(recursion_limit, int) and recursion_limit <= 0: return # install using helper that give some flexibility regarding where to # get the module from # Keep only paths, to not drag full instances of Datasets along, # they are cheap to instantiate sub_paths_considered = [] subs_notneeded = [] def gen_subs_to_install(): # producer for sub in ds.subdatasets(path=start, return_type='generator', result_renderer='disabled'): sub_path = sub['path'] sub_paths_considered.append(sub_path) if sub.get('gitmodule_datalad-recursiveinstall', '') == 'skip': lgr.debug( "subdataset %s is configured to be skipped on recursive installation", sub_path) continue # TODO: Yarik is lost among all parentds, ds, start, refds_path so is not brave enough to # assume any from the record, thus will pass "ds.path" around to consumer yield ds.path, ReadOnlyDict(sub), recursion_limit def consumer(ds_path__sub__limit): ds_path, sub, recursion_limit = ds_path__sub__limit subds = Dataset(sub['path']) if sub.get('state', None) != 'absent': rec = get_status_dict('install', ds=subds, status='notneeded', logger=lgr, refds=refds_path) subs_notneeded.append(rec) yield rec # do not continue, even if an intermediate dataset exists it # does not imply that everything below it does too else: # TODO: here we need another "ds"! is it within "sub"? yield from _install_subds_from_flexible_source( Dataset(ds_path), sub, reckless=reckless, description=description) if not subds.is_installed(): # an error result was emitted, and the external consumer can decide # what to do with it, but there is no point in recursing into # something that should be there, but isn't lgr.debug('Subdataset %s could not be installed, skipped', subds) return # recurse # we can skip the start expression, we know we are within for res in _recursive_install_subds_underneath( subds, recursion_limit=recursion_limit - 1 if isinstance(recursion_limit, int) else recursion_limit, reckless=reckless, refds_path=refds_path, jobs=jobs, producer_only=True # we will be adding to producer queue ): producer_consumer.add_to_producer_queue(res) producer = gen_subs_to_install() if producer_only: yield from producer else: producer_consumer = ProducerConsumerProgressLog( producer, consumer, # no safe_to_consume= is needed since we are doing only at a single level ATM label="Installing", unit="datasets", jobs=jobs, lgr=lgr) yield from producer_consumer