Exemplo n.º 1
0
    def _submit_fn(self, userfn, *args, **kwds):
        ctx = self._get_ctx()

        result = False
        retry_cnt = 0
        while True:
            try:
                if ctx is not None:
                    result = userfn(ctx, *args, **kwds)
                else:
                    result = userfn(*args, **kwds)
            except Exception as e:
                if retry_cnt < self._retry:
                    retry_cnt += 1
                    log.warn(output_messages['WARN_WORKER_EXCEPTION'] %
                             (e, retry_cnt),
                             class_name=POOL_CLASS_NAME)
                    self._retry_wait(retry_cnt)
                    continue
                else:
                    log.error(output_messages['ERROR_WORKER_FAILURE'] %
                              (e, retry_cnt),
                              class_name=POOL_CLASS_NAME)
                    self._release_ctx(ctx)
                    raise e
            break

        log.debug(output_messages['DEBUG_WORKER_SUCESS'] % (retry_cnt + 1),
                  class_name=POOL_CLASS_NAME)
        self._release_ctx(ctx)
        self._progress()

        return result
Exemplo n.º 2
0
def remote_add(repotype, ml_git_remote, global_conf=False):
    file = get_config_path(global_conf)
    conf = yaml_load(file)

    if repotype in conf:
        if conf[repotype]['git'] is None or not len(conf[repotype]['git']) > 0:
            log.info(output_messages['INFO_ADD_REMOTE'] %
                     (ml_git_remote, repotype),
                     class_name=ADMIN_CLASS_NAME)
        else:
            log.warn(output_messages['WARN_HAS_CONFIGURED_REMOTE'],
                     class_name=ADMIN_CLASS_NAME)
            log.info(output_messages['INFO_CHANGING_REMOTE'] %
                     (conf[repotype]['git'], ml_git_remote, repotype),
                     class_name=ADMIN_CLASS_NAME)
    else:
        log.info(output_messages['INFO_ADD_REMOTE'] %
                 (ml_git_remote, repotype),
                 class_name=ADMIN_CLASS_NAME)
    try:
        conf[repotype]['git'] = ml_git_remote
    except Exception:
        conf[repotype] = {}
        conf[repotype]['git'] = ml_git_remote
    yaml_save(conf, file)
Exemplo n.º 3
0
 def checkout(self, tag, samples, options):
     try:
         metadata_path = get_metadata_path(self.__config)
     except RootPathException as e:
         log.warn(e, class_name=REPOSITORY_CLASS_NAME)
         metadata_path = self._initialize_repository_on_the_fly()
     dt_tag, lb_tag = self._checkout(tag, samples, options)
     options['with_dataset'] = False
     options['with_labels'] = False
     if dt_tag is not None:
         try:
             self.__repo_type = 'dataset'
             m = Metadata('', metadata_path, self.__config,
                          self.__repo_type)
             log.info('Initializing related dataset download',
                      class_name=REPOSITORY_CLASS_NAME)
             if not m.check_exists():
                 m.init()
             self._checkout(dt_tag, samples, options)
         except Exception as e:
             log.error('LocalRepository: [%s]' % e,
                       class_name=REPOSITORY_CLASS_NAME)
     if lb_tag is not None:
         try:
             self.__repo_type = 'labels'
             m = Metadata('', metadata_path, self.__config,
                          self.__repo_type)
             log.info('Initializing related labels download',
                      class_name=REPOSITORY_CLASS_NAME)
             if not m.check_exists():
                 m.init()
             self._checkout(lb_tag, samples, options)
         except Exception as e:
             log.error('LocalRepository: [%s]' % e,
                       class_name=REPOSITORY_CLASS_NAME)
Exemplo n.º 4
0
    def _update_file_status(self, cache, filepath, fullpath, scid, st, value):
        status = Status.a.name
        prev_hash = value['hash']
        scid_ret = scid
        is_flexible = self._mutability == Mutability.FLEXIBLE.value
        is_strict = self._mutability == Mutability.STRICT.value
        not_unlocked = value['mtime'] != st.st_mtime and 'untime' not in value
        bare_mode = os.path.exists(
            os.path.join(self._path, 'metadata', self._spec, 'bare'))
        if (is_flexible and not_unlocked) or is_strict:
            status = Status.c.name
            prev_hash = None
            scid_ret = None

            file_path = Cache(cache).get_keypath(value['hash'])
            if os.path.exists(file_path):
                os.unlink(file_path)
        elif bare_mode and self._mutability == Mutability.MUTABLE.value:
            print('\n')
            log.warn(
                'The file %s already exists in the repository. If you commit, the'
                ' file will be overwritten.' % filepath,
                class_name=MULTI_HASH_CLASS_NAME)
        self.update_full_index(posix_path(filepath), fullpath, status, scid,
                               prev_hash)
        return scid_ret
Exemplo n.º 5
0
def storage_factory(config, storage_string):
    storages = {StorageType.S3.value: S3Storage, StorageType.S3H.value: S3MultihashStorage,
                StorageType.AZUREBLOBH.value: AzureMultihashStorage,
                StorageType.GDRIVEH.value: GoogleDriveMultihashStorage,
                StorageType.GDRIVE.value: GoogleDriveStorage,
                StorageType.SFTPH.value: SFtpStorage}
    sp = storage_string.split('/')
    config_bucket_name, bucket_name = None, None

    try:
        storage_type = sp[0][:-1]
        bucket_name = sp[2]
        config_bucket_name = []
        log.debug(output_messages['DEBUG_STORAGE_AND_BUCKET'] % (storage_type, bucket_name), class_name=STORAGE_FACTORY_CLASS_NAME)
        for k in config[STORAGE_CONFIG_KEY][storage_type]:
            config_bucket_name.append(k)
        if bucket_name not in config_bucket_name:
            log.warn(output_messages['WARN_EXCPETION_CREATING_STORAGE'] % (
                bucket_name, storage_type, config_bucket_name), class_name=STORAGE_FACTORY_CLASS_NAME)
            return None
        bucket = config[STORAGE_CONFIG_KEY][storage_type][bucket_name]
        return storages[storage_type](bucket_name, bucket)
    except ProfileNotFound as pfn:
        log.error(pfn, class_name=STORAGE_FACTORY_CLASS_NAME)
        return None
Exemplo n.º 6
0
 def _update_file_status(self, cache, filepath, fullpath, scid, st, value):
     status = Status.a.name
     prev_hash = value['hash']
     scid_ret = scid
     is_flexible = self._mutability == MutabilityType.FLEXIBLE.value
     is_strict = self._mutability == MutabilityType.STRICT.value
     not_unlocked = value['mtime'] != st.st_mtime and 'untime' not in value
     bare_mode = os.path.exists(
         os.path.join(self._path, 'metadata', self._spec, 'bare'))
     if (is_flexible and not_unlocked) or is_strict:
         if value['status'] == Status.c.name and 'previous_hash' in value:
             prev_hash = value['previous_hash']
             if scid == prev_hash:
                 prev_hash = None
                 status = Status.u.name
                 log.debug(output_messages['DEBUG_RESTORED_FILE'].format(
                     posix_path(filepath)),
                           class_name=MULTI_HASH_CLASS_NAME)
         else:
             status = Status.c.name
             scid_ret = None
             file_path = Cache(cache).get_keypath(value['hash'])
             if os.path.exists(file_path):
                 os.unlink(file_path)
     elif bare_mode and self._mutability == MutabilityType.MUTABLE.value:
         print('\n')
         log.warn(output_messages['WARN_FILE_EXISTS_IN_REPOSITORY'] %
                  filepath,
                  class_name=MULTI_HASH_CLASS_NAME)
     self.update_full_index(posix_path(filepath), fullpath, status, scid,
                            prev_hash)
     return scid_ret
Exemplo n.º 7
0
    def _submit_fn(self, userfn, *args, **kwds):
        ctx = self._get_ctx()

        result = False
        retry_cnt = 0
        while True:
            try:
                if ctx is not None:
                    result = userfn(ctx, *args, **kwds)
                else:
                    result = userfn(*args, **kwds)
            except Exception as e:
                if retry_cnt < self._retry:
                    retry_cnt += 1
                    log.warn('Worker exception - [%s] -- retry [%d]' %
                             (e, retry_cnt),
                             class_name=POOL_CLASS_NAME)
                    self._retry_wait(retry_cnt)
                    continue
                else:
                    log.error('Worker failure - [%s] -- [%d] attempts' %
                              (e, retry_cnt),
                              class_name=POOL_CLASS_NAME)
                    self._release_ctx(ctx)
                    raise e
            break

        log.debug('Worker success at attempt [%d]' % (retry_cnt + 1),
                  class_name=POOL_CLASS_NAME)
        self._release_ctx(ctx)
        self._progress()

        return result
Exemplo n.º 8
0
def store_factory(config, store_string):
    stores = {
        StoreType.S3.value: S3Store,
        StoreType.S3H.value: S3MultihashStore,
        StoreType.AZUREBLOBH.value: AzureMultihashStore,
        StoreType.GDRIVEH.value: GoogleDriveMultihashStore,
        StoreType.GDRIVE.value: GoogleDriveStore
    }
    sp = store_string.split('/')
    config_bucket_name, bucket_name = None, None

    try:
        store_type = sp[0][:-1]
        bucket_name = sp[2]
        config_bucket_name = []
        log.debug('Store [%s] ; bucket [%s]' % (store_type, bucket_name),
                  class_name=STORE_FACTORY_CLASS_NAME)
        for k in config['store'][store_type]:
            config_bucket_name.append(k)
        if bucket_name not in config_bucket_name:
            log.warn(
                'Exception creating store -- Configuration not found for bucket [%s]. '
                'The available buckets in config file for store type [%s] are: %s'
                % (bucket_name, store_type, config_bucket_name),
                class_name=STORE_FACTORY_CLASS_NAME)
            return None
        bucket = config['store'][store_type][bucket_name]
        return stores[store_type](bucket_name, bucket)
    except ProfileNotFound as pfn:
        log.error(pfn, class_name=STORE_FACTORY_CLASS_NAME)
        return None
Exemplo n.º 9
0
def storage_del(storage_type, bucket, global_conf=False):
    if not valid_storage_type(storage_type):
        return

    try:
        config_path = get_config_path(global_conf)
        conf = yaml_load(config_path)
    except Exception as e:
        log.error(e, class_name=ADMIN_CLASS_NAME)
        return

    storage_exists = STORAGE_CONFIG_KEY in conf and storage_type in conf[
        STORAGE_CONFIG_KEY] and bucket in conf[STORAGE_CONFIG_KEY][storage_type]

    if not storage_exists:
        log.warn(output_messages['WARN_STORAGE_NOT_IN_CONFIG'] %
                 (storage_type, bucket),
                 class_name=ADMIN_CLASS_NAME)
        return

    del conf[STORAGE_CONFIG_KEY][storage_type][bucket]
    log.info(output_messages['INFO_REMOVED_STORAGE'] % (storage_type, bucket),
             class_name=ADMIN_CLASS_NAME)

    yaml_save(conf, config_path)
Exemplo n.º 10
0
def store_del(store_type, bucket, global_conf=False):
    if not valid_store_type(store_type):
        return

    try:
        config_path = get_config_path(global_conf)
        conf = yaml_load(config_path)
    except Exception as e:
        log.error(e, class_name=ADMIN_CLASS_NAME)
        return

    store_exists = 'store' in conf and store_type in conf[
        'store'] and bucket in conf['store'][store_type]

    if not store_exists:
        log.warn('Store [%s://%s] not found in configuration file.' %
                 (store_type, bucket),
                 class_name=ADMIN_CLASS_NAME)
        return

    del conf['store'][store_type][bucket]
    log.info('Removed store [%s://%s] from configuration file.' %
             (store_type, bucket),
             class_name=ADMIN_CLASS_NAME)

    yaml_save(conf, config_path)
Exemplo n.º 11
0
def get_entity_tag(specpath, repotype, entity):
    entity_tag = None
    try:
        spec = yaml_load(specpath)
        entity_tag = spec[repotype][entity]['tag']
    except Exception:
        log.warn('Repository: the ' + entity + ' does not exist for related download.')
    return entity_tag
Exemplo n.º 12
0
 def initialize_metadata(self, entity_type):
     super(Metadata, self).__init__(self.__config, entity_type)
     try:
         self.init()
     except Exception as e:
         log.warn('Could not initialize metadata for %s. %s' %
                  (entity_type, e),
                  class_name=METADATA_CLASS_NAME)
Exemplo n.º 13
0
 def initialize_metadata(self, entity_type):
     super(Metadata, self).__init__(self.__config, entity_type)
     try:
         self.init()
     except Exception as e:
         log.warn(output_messages['WARN_CANNOT_INITIALIZE_METADATA_FOR'] %
                  (entity_type, e),
                  class_name=METADATA_CLASS_NAME)
Exemplo n.º 14
0
def _get_user_input(message, default=None, required=False):
    value = input(message)
    if not value.strip():
        if required:
            log.warn(output_messages['ERROR_EMPTY_VALUE'])
            return _get_user_input(message, default, required)
        return default
    return value
Exemplo n.º 15
0
def get_entity_tag(specpath, repo_type, entity):
    entity_tag = None
    entity_spec_key = get_spec_key(repo_type)
    try:
        spec = yaml_load(specpath)
        related_entity_spec_key = get_spec_key(entity)
        entity_tag = spec[entity_spec_key][related_entity_spec_key]['tag']
    except Exception:
        log.warn(output_messages['WARN_NOT_EXIST_FOR_RELATED_DOWNLOAD'] % entity)
    return entity_tag
Exemplo n.º 16
0
 def process(value, state):
     import inspect
     frame = inspect.currentframe()
     try:
         opt = frame.f_back.f_locals.get('opt')
     finally:
         del frame
     if opt in deprecated:
         msg = "'{}' has been deprecated, use '{}' instead;"
         log.warn(msg.format(opt, preferred))
     return orig_process(value, state)
Exemplo n.º 17
0
 def _check_corrupted_files(self, spec, repo):
     try:
         corrupted_files = repo.get_corrupted_files(spec)
         if corrupted_files is not None and len(corrupted_files) > 0:
             print('\n')
             log.warn(
                 'The following files cannot be added because they are corrupted:',
                 class_name=REPOSITORY_CLASS_NAME)
             for file in corrupted_files:
                 print('\t %s' % file)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
Exemplo n.º 18
0
 def __init_manager(self, type_entity):
     try:
         get_root_path()
         config = config_load()
         if not config[type_entity]['git']:
             log.warn(
                 output_messages['WARN_REPOSITORY_NOT_FOUND_FOR_ENTITY'] %
                 type_entity,
                 class_name=LocalEntityManager.__name__)
             return
         self._manager = MetadataManager(config, repo_type=type_entity)
         if not self._manager.check_exists():
             self._manager.init()
     except Exception as e:
         log.error(e, class_name=LocalEntityManager.__name__)
Exemplo n.º 19
0
 def add(self, path, manifestpath, files=[]):
     self.wp = pool_factory(pb_elts=0, pb_desc='files')
     if len(files) > 0:
         single_files = filter(lambda x: os.path.isfile(os.path.join(path, x)), files)
         self.wp.progress_bar_total_inc(len(list(single_files)))
         for f in files:
             fullpath = os.path.join(path, f)
             if os.path.isdir(fullpath):
                 self._add_dir(path, manifestpath, f)
             elif os.path.isfile(fullpath):
                 self._add_single_file(path, manifestpath, f)
             else:
                 log.warn('[%s] Not found!' % fullpath, class_name=MULTI_HASH_CLASS_NAME)
     else:
         if os.path.isdir(path):
             self._add_dir(path, manifestpath)
     self.wp.progress_bar_close()
Exemplo n.º 20
0
 def handle_parse_result(self, ctx, opts, args):
     using_required_option = self.name in opts
     using_dependent_options = all(
         opt.replace('-', '_') in opts for opt in self.required_option)
     option_name = self.name.replace('_', '-')
     if not using_required_option and using_dependent_options:
         msg = output_messages['ERROR_REQUIRED_OPTION_MISSING'].format(
             option_name, ', '.join(self.required_option), option_name)
         if not is_wizard_enabled():
             raise MissingParameter(ctx=ctx, param=self, message=msg)
         requested_value = wizard_for_field(ctx, None, msg, required=True)
         opts[self.name] = requested_value
         return super(OptionRequiredIf,
                      self).handle_parse_result(ctx, opts, args)
     elif using_required_option and not using_dependent_options:
         log.warn(output_messages['WARN_USELESS_OPTION'].format(
             option_name, ', '.join(self.required_option)))
     return super(OptionRequiredIf,
                  self).handle_parse_result(ctx, opts, args)
Exemplo n.º 21
0
def store_factory(config, store_string):
    stores = {StoreType.S3.value: S3Store, StoreType.S3H.value: S3MultihashStore,
              StoreType.AZUREBLOBH.value: AzureMultihashStore, StoreType.GDRIVEH.value: GoogleDriveMultihashStore,
              StoreType.GDRIVE.value: GoogleDriveStore}
    sp = store_string.split('/')
    config_bucket_name, bucket_name = None, None

    try:
        store_type = sp[0][:-1]
        bucket_name = sp[2]
        config_bucket_name = []
        log.debug('Store [%s] ; bucket [%s]' % (store_type, bucket_name), class_name=STORE_FACTORY_CLASS_NAME)
        for k in config['store'][store_type]:
            config_bucket_name.append(k)
        bucket = config['store'][store_type][bucket_name]
        return stores[store_type](bucket_name, bucket)
    except ProfileNotFound as pfn:
        log.error(pfn, class_name=STORE_FACTORY_CLASS_NAME)
        return None
    except Exception:
        log.warn('Exception creating store -- bucket name conflicting between config file [%s] and spec file [%s]' % (
            config_bucket_name, bucket_name), class_name=STORE_FACTORY_CLASS_NAME)
        return None
Exemplo n.º 22
0
 def add(self, path, manifestpath, files=[]):
     self.wp = pool_factory(pb_elts=0, pb_desc='files')
     ignore_rules = get_ignore_rules(path)
     if len(files) > 0:
         single_files = filter(
             lambda x: os.path.isfile(os.path.join(path, x)), files)
         self.wp.progress_bar_total_inc(len(list(single_files)))
         for f in files:
             fullpath = os.path.join(path, f)
             if os.path.isdir(fullpath):
                 self._add_dir(path,
                               manifestpath,
                               f,
                               ignore_rules=ignore_rules)
             elif os.path.isfile(fullpath):
                 if not should_ignore_file(ignore_rules, path):
                     self._add_single_file(path, manifestpath, f)
             else:
                 log.warn(output_messages['WARN_NOT_FOUND'] % fullpath,
                          class_name=MULTI_HASH_CLASS_NAME)
     else:
         if os.path.isdir(path):
             self._add_dir(path, manifestpath, ignore_rules=ignore_rules)
     self.wp.progress_bar_close()
Exemplo n.º 23
0
    def check_and_update(self, key, value, hfs, filepath, fullpath, cache):
        st = os.stat(fullpath)
        if key == filepath and value['ctime'] == st.st_ctime and value['mtime'] == st.st_mtime:
            log.debug('File [%s] already exists in ml-git repository' % filepath, class_name=MULTI_HASH_CLASS_NAME)
            return None
        elif key == filepath and value['ctime'] != st.st_ctime or value['mtime'] != st.st_mtime:
            log.debug('File [%s] was modified' % filepath, class_name=MULTI_HASH_CLASS_NAME)
            scid = hfs.get_scid(fullpath)
            if value['hash'] != scid:
                status = Status.a.name
                prev_hash = value['hash']
                scid_ret = scid

                is_flexible = self._mutability == Mutability.FLEXIBLE.value
                is_strict = self._mutability == Mutability.STRICT.value
                not_unlocked = value['mtime'] != st.st_mtime and 'untime' not in value

                bare_mode = os.path.exists(os.path.join(self._path, 'metadata', self._spec, 'bare'))
                if (is_flexible and not_unlocked) or is_strict:
                    status = Status.c.name
                    prev_hash = None
                    scid_ret = None

                    file_path = Cache(cache).get_keypath(value['hash'])
                    if os.path.exists(file_path):
                        os.unlink(file_path)
                elif bare_mode and self._mutability == Mutability.MUTABLE.value:
                    print('\n')
                    log.warn('The file %s already exists in the repository. If you commit, the'
                             ' file will be overwritten.' % filepath,
                             class_name=MULTI_HASH_CLASS_NAME)

                self.update_full_index(posix_path(filepath), fullpath, status, scid, prev_hash)

                return scid_ret
        return None