def _submit_fn(self, userfn, *args, **kwds): ctx = self._get_ctx() result = False retry_cnt = 0 while True: try: if ctx is not None: result = userfn(ctx, *args, **kwds) else: result = userfn(*args, **kwds) except Exception as e: if retry_cnt < self._retry: retry_cnt += 1 log.warn(output_messages['WARN_WORKER_EXCEPTION'] % (e, retry_cnt), class_name=POOL_CLASS_NAME) self._retry_wait(retry_cnt) continue else: log.error(output_messages['ERROR_WORKER_FAILURE'] % (e, retry_cnt), class_name=POOL_CLASS_NAME) self._release_ctx(ctx) raise e break log.debug(output_messages['DEBUG_WORKER_SUCESS'] % (retry_cnt + 1), class_name=POOL_CLASS_NAME) self._release_ctx(ctx) self._progress() return result
def remote_add(repotype, ml_git_remote, global_conf=False): file = get_config_path(global_conf) conf = yaml_load(file) if repotype in conf: if conf[repotype]['git'] is None or not len(conf[repotype]['git']) > 0: log.info(output_messages['INFO_ADD_REMOTE'] % (ml_git_remote, repotype), class_name=ADMIN_CLASS_NAME) else: log.warn(output_messages['WARN_HAS_CONFIGURED_REMOTE'], class_name=ADMIN_CLASS_NAME) log.info(output_messages['INFO_CHANGING_REMOTE'] % (conf[repotype]['git'], ml_git_remote, repotype), class_name=ADMIN_CLASS_NAME) else: log.info(output_messages['INFO_ADD_REMOTE'] % (ml_git_remote, repotype), class_name=ADMIN_CLASS_NAME) try: conf[repotype]['git'] = ml_git_remote except Exception: conf[repotype] = {} conf[repotype]['git'] = ml_git_remote yaml_save(conf, file)
def checkout(self, tag, samples, options): try: metadata_path = get_metadata_path(self.__config) except RootPathException as e: log.warn(e, class_name=REPOSITORY_CLASS_NAME) metadata_path = self._initialize_repository_on_the_fly() dt_tag, lb_tag = self._checkout(tag, samples, options) options['with_dataset'] = False options['with_labels'] = False if dt_tag is not None: try: self.__repo_type = 'dataset' m = Metadata('', metadata_path, self.__config, self.__repo_type) log.info('Initializing related dataset download', class_name=REPOSITORY_CLASS_NAME) if not m.check_exists(): m.init() self._checkout(dt_tag, samples, options) except Exception as e: log.error('LocalRepository: [%s]' % e, class_name=REPOSITORY_CLASS_NAME) if lb_tag is not None: try: self.__repo_type = 'labels' m = Metadata('', metadata_path, self.__config, self.__repo_type) log.info('Initializing related labels download', class_name=REPOSITORY_CLASS_NAME) if not m.check_exists(): m.init() self._checkout(lb_tag, samples, options) except Exception as e: log.error('LocalRepository: [%s]' % e, class_name=REPOSITORY_CLASS_NAME)
def _update_file_status(self, cache, filepath, fullpath, scid, st, value): status = Status.a.name prev_hash = value['hash'] scid_ret = scid is_flexible = self._mutability == Mutability.FLEXIBLE.value is_strict = self._mutability == Mutability.STRICT.value not_unlocked = value['mtime'] != st.st_mtime and 'untime' not in value bare_mode = os.path.exists( os.path.join(self._path, 'metadata', self._spec, 'bare')) if (is_flexible and not_unlocked) or is_strict: status = Status.c.name prev_hash = None scid_ret = None file_path = Cache(cache).get_keypath(value['hash']) if os.path.exists(file_path): os.unlink(file_path) elif bare_mode and self._mutability == Mutability.MUTABLE.value: print('\n') log.warn( 'The file %s already exists in the repository. If you commit, the' ' file will be overwritten.' % filepath, class_name=MULTI_HASH_CLASS_NAME) self.update_full_index(posix_path(filepath), fullpath, status, scid, prev_hash) return scid_ret
def storage_factory(config, storage_string): storages = {StorageType.S3.value: S3Storage, StorageType.S3H.value: S3MultihashStorage, StorageType.AZUREBLOBH.value: AzureMultihashStorage, StorageType.GDRIVEH.value: GoogleDriveMultihashStorage, StorageType.GDRIVE.value: GoogleDriveStorage, StorageType.SFTPH.value: SFtpStorage} sp = storage_string.split('/') config_bucket_name, bucket_name = None, None try: storage_type = sp[0][:-1] bucket_name = sp[2] config_bucket_name = [] log.debug(output_messages['DEBUG_STORAGE_AND_BUCKET'] % (storage_type, bucket_name), class_name=STORAGE_FACTORY_CLASS_NAME) for k in config[STORAGE_CONFIG_KEY][storage_type]: config_bucket_name.append(k) if bucket_name not in config_bucket_name: log.warn(output_messages['WARN_EXCPETION_CREATING_STORAGE'] % ( bucket_name, storage_type, config_bucket_name), class_name=STORAGE_FACTORY_CLASS_NAME) return None bucket = config[STORAGE_CONFIG_KEY][storage_type][bucket_name] return storages[storage_type](bucket_name, bucket) except ProfileNotFound as pfn: log.error(pfn, class_name=STORAGE_FACTORY_CLASS_NAME) return None
def _update_file_status(self, cache, filepath, fullpath, scid, st, value): status = Status.a.name prev_hash = value['hash'] scid_ret = scid is_flexible = self._mutability == MutabilityType.FLEXIBLE.value is_strict = self._mutability == MutabilityType.STRICT.value not_unlocked = value['mtime'] != st.st_mtime and 'untime' not in value bare_mode = os.path.exists( os.path.join(self._path, 'metadata', self._spec, 'bare')) if (is_flexible and not_unlocked) or is_strict: if value['status'] == Status.c.name and 'previous_hash' in value: prev_hash = value['previous_hash'] if scid == prev_hash: prev_hash = None status = Status.u.name log.debug(output_messages['DEBUG_RESTORED_FILE'].format( posix_path(filepath)), class_name=MULTI_HASH_CLASS_NAME) else: status = Status.c.name scid_ret = None file_path = Cache(cache).get_keypath(value['hash']) if os.path.exists(file_path): os.unlink(file_path) elif bare_mode and self._mutability == MutabilityType.MUTABLE.value: print('\n') log.warn(output_messages['WARN_FILE_EXISTS_IN_REPOSITORY'] % filepath, class_name=MULTI_HASH_CLASS_NAME) self.update_full_index(posix_path(filepath), fullpath, status, scid, prev_hash) return scid_ret
def _submit_fn(self, userfn, *args, **kwds): ctx = self._get_ctx() result = False retry_cnt = 0 while True: try: if ctx is not None: result = userfn(ctx, *args, **kwds) else: result = userfn(*args, **kwds) except Exception as e: if retry_cnt < self._retry: retry_cnt += 1 log.warn('Worker exception - [%s] -- retry [%d]' % (e, retry_cnt), class_name=POOL_CLASS_NAME) self._retry_wait(retry_cnt) continue else: log.error('Worker failure - [%s] -- [%d] attempts' % (e, retry_cnt), class_name=POOL_CLASS_NAME) self._release_ctx(ctx) raise e break log.debug('Worker success at attempt [%d]' % (retry_cnt + 1), class_name=POOL_CLASS_NAME) self._release_ctx(ctx) self._progress() return result
def store_factory(config, store_string): stores = { StoreType.S3.value: S3Store, StoreType.S3H.value: S3MultihashStore, StoreType.AZUREBLOBH.value: AzureMultihashStore, StoreType.GDRIVEH.value: GoogleDriveMultihashStore, StoreType.GDRIVE.value: GoogleDriveStore } sp = store_string.split('/') config_bucket_name, bucket_name = None, None try: store_type = sp[0][:-1] bucket_name = sp[2] config_bucket_name = [] log.debug('Store [%s] ; bucket [%s]' % (store_type, bucket_name), class_name=STORE_FACTORY_CLASS_NAME) for k in config['store'][store_type]: config_bucket_name.append(k) if bucket_name not in config_bucket_name: log.warn( 'Exception creating store -- Configuration not found for bucket [%s]. ' 'The available buckets in config file for store type [%s] are: %s' % (bucket_name, store_type, config_bucket_name), class_name=STORE_FACTORY_CLASS_NAME) return None bucket = config['store'][store_type][bucket_name] return stores[store_type](bucket_name, bucket) except ProfileNotFound as pfn: log.error(pfn, class_name=STORE_FACTORY_CLASS_NAME) return None
def storage_del(storage_type, bucket, global_conf=False): if not valid_storage_type(storage_type): return try: config_path = get_config_path(global_conf) conf = yaml_load(config_path) except Exception as e: log.error(e, class_name=ADMIN_CLASS_NAME) return storage_exists = STORAGE_CONFIG_KEY in conf and storage_type in conf[ STORAGE_CONFIG_KEY] and bucket in conf[STORAGE_CONFIG_KEY][storage_type] if not storage_exists: log.warn(output_messages['WARN_STORAGE_NOT_IN_CONFIG'] % (storage_type, bucket), class_name=ADMIN_CLASS_NAME) return del conf[STORAGE_CONFIG_KEY][storage_type][bucket] log.info(output_messages['INFO_REMOVED_STORAGE'] % (storage_type, bucket), class_name=ADMIN_CLASS_NAME) yaml_save(conf, config_path)
def store_del(store_type, bucket, global_conf=False): if not valid_store_type(store_type): return try: config_path = get_config_path(global_conf) conf = yaml_load(config_path) except Exception as e: log.error(e, class_name=ADMIN_CLASS_NAME) return store_exists = 'store' in conf and store_type in conf[ 'store'] and bucket in conf['store'][store_type] if not store_exists: log.warn('Store [%s://%s] not found in configuration file.' % (store_type, bucket), class_name=ADMIN_CLASS_NAME) return del conf['store'][store_type][bucket] log.info('Removed store [%s://%s] from configuration file.' % (store_type, bucket), class_name=ADMIN_CLASS_NAME) yaml_save(conf, config_path)
def get_entity_tag(specpath, repotype, entity): entity_tag = None try: spec = yaml_load(specpath) entity_tag = spec[repotype][entity]['tag'] except Exception: log.warn('Repository: the ' + entity + ' does not exist for related download.') return entity_tag
def initialize_metadata(self, entity_type): super(Metadata, self).__init__(self.__config, entity_type) try: self.init() except Exception as e: log.warn('Could not initialize metadata for %s. %s' % (entity_type, e), class_name=METADATA_CLASS_NAME)
def initialize_metadata(self, entity_type): super(Metadata, self).__init__(self.__config, entity_type) try: self.init() except Exception as e: log.warn(output_messages['WARN_CANNOT_INITIALIZE_METADATA_FOR'] % (entity_type, e), class_name=METADATA_CLASS_NAME)
def _get_user_input(message, default=None, required=False): value = input(message) if not value.strip(): if required: log.warn(output_messages['ERROR_EMPTY_VALUE']) return _get_user_input(message, default, required) return default return value
def get_entity_tag(specpath, repo_type, entity): entity_tag = None entity_spec_key = get_spec_key(repo_type) try: spec = yaml_load(specpath) related_entity_spec_key = get_spec_key(entity) entity_tag = spec[entity_spec_key][related_entity_spec_key]['tag'] except Exception: log.warn(output_messages['WARN_NOT_EXIST_FOR_RELATED_DOWNLOAD'] % entity) return entity_tag
def process(value, state): import inspect frame = inspect.currentframe() try: opt = frame.f_back.f_locals.get('opt') finally: del frame if opt in deprecated: msg = "'{}' has been deprecated, use '{}' instead;" log.warn(msg.format(opt, preferred)) return orig_process(value, state)
def _check_corrupted_files(self, spec, repo): try: corrupted_files = repo.get_corrupted_files(spec) if corrupted_files is not None and len(corrupted_files) > 0: print('\n') log.warn( 'The following files cannot be added because they are corrupted:', class_name=REPOSITORY_CLASS_NAME) for file in corrupted_files: print('\t %s' % file) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return
def __init_manager(self, type_entity): try: get_root_path() config = config_load() if not config[type_entity]['git']: log.warn( output_messages['WARN_REPOSITORY_NOT_FOUND_FOR_ENTITY'] % type_entity, class_name=LocalEntityManager.__name__) return self._manager = MetadataManager(config, repo_type=type_entity) if not self._manager.check_exists(): self._manager.init() except Exception as e: log.error(e, class_name=LocalEntityManager.__name__)
def add(self, path, manifestpath, files=[]): self.wp = pool_factory(pb_elts=0, pb_desc='files') if len(files) > 0: single_files = filter(lambda x: os.path.isfile(os.path.join(path, x)), files) self.wp.progress_bar_total_inc(len(list(single_files))) for f in files: fullpath = os.path.join(path, f) if os.path.isdir(fullpath): self._add_dir(path, manifestpath, f) elif os.path.isfile(fullpath): self._add_single_file(path, manifestpath, f) else: log.warn('[%s] Not found!' % fullpath, class_name=MULTI_HASH_CLASS_NAME) else: if os.path.isdir(path): self._add_dir(path, manifestpath) self.wp.progress_bar_close()
def handle_parse_result(self, ctx, opts, args): using_required_option = self.name in opts using_dependent_options = all( opt.replace('-', '_') in opts for opt in self.required_option) option_name = self.name.replace('_', '-') if not using_required_option and using_dependent_options: msg = output_messages['ERROR_REQUIRED_OPTION_MISSING'].format( option_name, ', '.join(self.required_option), option_name) if not is_wizard_enabled(): raise MissingParameter(ctx=ctx, param=self, message=msg) requested_value = wizard_for_field(ctx, None, msg, required=True) opts[self.name] = requested_value return super(OptionRequiredIf, self).handle_parse_result(ctx, opts, args) elif using_required_option and not using_dependent_options: log.warn(output_messages['WARN_USELESS_OPTION'].format( option_name, ', '.join(self.required_option))) return super(OptionRequiredIf, self).handle_parse_result(ctx, opts, args)
def store_factory(config, store_string): stores = {StoreType.S3.value: S3Store, StoreType.S3H.value: S3MultihashStore, StoreType.AZUREBLOBH.value: AzureMultihashStore, StoreType.GDRIVEH.value: GoogleDriveMultihashStore, StoreType.GDRIVE.value: GoogleDriveStore} sp = store_string.split('/') config_bucket_name, bucket_name = None, None try: store_type = sp[0][:-1] bucket_name = sp[2] config_bucket_name = [] log.debug('Store [%s] ; bucket [%s]' % (store_type, bucket_name), class_name=STORE_FACTORY_CLASS_NAME) for k in config['store'][store_type]: config_bucket_name.append(k) bucket = config['store'][store_type][bucket_name] return stores[store_type](bucket_name, bucket) except ProfileNotFound as pfn: log.error(pfn, class_name=STORE_FACTORY_CLASS_NAME) return None except Exception: log.warn('Exception creating store -- bucket name conflicting between config file [%s] and spec file [%s]' % ( config_bucket_name, bucket_name), class_name=STORE_FACTORY_CLASS_NAME) return None
def add(self, path, manifestpath, files=[]): self.wp = pool_factory(pb_elts=0, pb_desc='files') ignore_rules = get_ignore_rules(path) if len(files) > 0: single_files = filter( lambda x: os.path.isfile(os.path.join(path, x)), files) self.wp.progress_bar_total_inc(len(list(single_files))) for f in files: fullpath = os.path.join(path, f) if os.path.isdir(fullpath): self._add_dir(path, manifestpath, f, ignore_rules=ignore_rules) elif os.path.isfile(fullpath): if not should_ignore_file(ignore_rules, path): self._add_single_file(path, manifestpath, f) else: log.warn(output_messages['WARN_NOT_FOUND'] % fullpath, class_name=MULTI_HASH_CLASS_NAME) else: if os.path.isdir(path): self._add_dir(path, manifestpath, ignore_rules=ignore_rules) self.wp.progress_bar_close()
def check_and_update(self, key, value, hfs, filepath, fullpath, cache): st = os.stat(fullpath) if key == filepath and value['ctime'] == st.st_ctime and value['mtime'] == st.st_mtime: log.debug('File [%s] already exists in ml-git repository' % filepath, class_name=MULTI_HASH_CLASS_NAME) return None elif key == filepath and value['ctime'] != st.st_ctime or value['mtime'] != st.st_mtime: log.debug('File [%s] was modified' % filepath, class_name=MULTI_HASH_CLASS_NAME) scid = hfs.get_scid(fullpath) if value['hash'] != scid: status = Status.a.name prev_hash = value['hash'] scid_ret = scid is_flexible = self._mutability == Mutability.FLEXIBLE.value is_strict = self._mutability == Mutability.STRICT.value not_unlocked = value['mtime'] != st.st_mtime and 'untime' not in value bare_mode = os.path.exists(os.path.join(self._path, 'metadata', self._spec, 'bare')) if (is_flexible and not_unlocked) or is_strict: status = Status.c.name prev_hash = None scid_ret = None file_path = Cache(cache).get_keypath(value['hash']) if os.path.exists(file_path): os.unlink(file_path) elif bare_mode and self._mutability == Mutability.MUTABLE.value: print('\n') log.warn('The file %s already exists in the repository. If you commit, the' ' file will be overwritten.' % filepath, class_name=MULTI_HASH_CLASS_NAME) self.update_full_index(posix_path(filepath), fullpath, status, scid, prev_hash) return scid_ret return None