def _validate_source(self, config, source_name): name = config['type'] if not name: return plugin = self.plugins.get("vergeml.io", name) if not plugin: raise _invalid_option(f"data.{source_name}.type", help_topic='data', suggestion=did_you_mean(self.plugins.keys('vergeml.io'), name)) source = Source.discover(plugin) options = list(filter(lambda o: o.name != 'type', source.options)) validator = ValidateOptions(options, source_name, self.plugins) config = {source_name: deepcopy(config)} dict_del_path(config, source_name + ".type") rest = apply_config(config, {name: validator}) if rest: k = dict_paths(rest)[0] candidates = [opt.name for opt in source.options] raise _invalid_option(f"data.{source_name}.{k}", help_topic=name, suggestion=did_you_mean(candidates, k), kind='key') else: values = dict(data=validator.values) if source_name not in values['data']: values['data'][source_name] = {} values['data'][source_name]['type'] = name dict_merge(self.values, values)
def apply_config(raw, validators): raw = _normalize(raw, validators) # find invalid options invalid = deepcopy(raw) for k, config in validators.items(): options = [opt for opt in config.options() if opt.alias is None] for option in options: if dict_has_path(invalid, option.name): dict_del_path(invalid, option.name) if k in invalid or (not k and invalid): first = dict_paths(invalid, k)[0] candidates = [opt.name for opt in options] suggestion = did_you_mean(candidates, first) raise _invalid_option(first, help_topic=config.name, suggestion=suggestion, kind='key') # validate and cast options for option in options: if dict_has_path(raw, option.name): value = dict_get_path(raw, option.name) dict_del_path(raw, option.name) config.configure(option, value) return raw
def _raise_unknown_option(key, valid, options, help_topic): unknown = list(set(options) - set(valid)) if bool(unknown): first = unknown[0] suggestion = did_you_mean(valid, first) raise _invalid_option(f'{key}.{first}', help_topic=help_topic, suggestion=suggestion, kind='key')
def _parse_data_cache(res, section): if 'cache' in section: value = section['cache'] if not value in _VALID_CACHE_VALUES: suggestion = did_you_mean(_VALID_CACHE_VALUES, value) raise _invalid_option('data.cache', help_topic='cache', suggestion=suggestion) res['cache'] = value
def _parse_opts(self, rest): longopts = [] shortopts = "" for opt in self.options: # Arguments and @names are dealt with elsewhere. if opt.is_at_option() or opt.is_argument_option(): continue # Prepare getopt syntax for long options. if opt.flag: assert opt.has_type(str, bool) longopts.append(opt.name) else: longopts.append(opt.name + "=") # Getopt for short options if opt.short: assert opt.short not in shortopts if opt.has_type(bool): shortopts += opt.short else: shortopts += opt.short + ":" try: # Run getopt. Returns parsed arguments and leftover. args, extra = getopt.getopt(rest, shortopts, longopts) except getopt.GetoptError as err: # in case of an error hint, display a nicer error message. if err.opt: cand_s = list(shortopts.replace(":", "")) cand_l = list(map(lambda o: o.rstrip("="), longopts)) suggestion = did_you_mean(cand_s + cand_l, err.opt) dashes = '-' if len(err.opt) == 1 else '--' raise VergeMLError(f"Invalid option {dashes}{err.opt}", suggestion, help_topic=self.name) else: raise VergeMLError(f"Invalid option.", help_topic=self.name) return args, extra
def get_custom_architecture(name, trainings_dir, output_layer): from keras.models import load_model, Model name = name.lstrip("@") model = load_model(os.path.join(trainings_dir, name, 'checkpoints', 'model.h5')) try: if isinstance(output_layer, int): layer = model.layers[output_layer] else: layer = model.get_layer(output_layer) except Exception: if isinstance(output_layer, int): raise VergeMLError(f'output-layer {output_layer} not found - model has only {len(model.layers)} layers.') else: candidates = list(map(lambda l: l.name, model.layers)) raise VergeMLError(f'output-layer named {output_layer} not found.', suggestion=did_you_mean(candidates, output_layer)) model = Model(inputs=model.input, outputs=layer.output) return model
def validate_value(self, value): if not self.validate: return if not self.is_required() and value in (None, 'null', 'Null', 'NULL'): return if isinstance(self.validate, (tuple, list)) and value not in self.validate: suggestion = None if all(map(lambda e: isinstance(e, str), self.validate)): suggestion = did_you_mean(self.validate, value) raise self._invalid_value(value, suggestion) elif callable(self.validate): self.validate(self, value) elif isinstance(self.validate, str): for validate in self.validate.split(","): validate = validate.strip() try: value = float(value) except ValueError: raise self._invalid_value(value) op, num_str = re.match(_VALIDATE_REGEX, validate).group(1, 2) num = float(num_str) if op == '>': if not value > num: raise self._invalid_value( value, f"Must be greater than {num_str}") elif op == '<': if not value < num: raise self._invalid_value( value, f"Must be less than {num_str}") if op == '>=': if not value >= num: raise self._invalid_value( value, f"Must be greater or equal to {num_str}") elif op == '<=': if not value <= num: raise self._invalid_value( value, f"Must be less than or equal to {num_str}")
def _load_model_plugin(self, project_file, data_file, model_via_flag): if self._config['model']: # load the model plugin modelname = self._config['model'] self.model_plugin = self.plugins.get("vergeml.model", modelname) if not self.model_plugin: message = f"Unknown model name '{modelname}'" suggestion = (did_you_mean(self.plugins.keys('vergeml.model'), modelname) or "See 'ml help models'.") global ENV # pylint: disable=W0603 # if model was passed in via --model if model_via_flag and ENV == self: message = f"Invalid value for option --model: {message}" else: res = None if data_file: # first check if model was defined in the data file res = _check_definition(data_file, 'model', 'value') if not res and project_file: # next check the project file res = _check_definition(project_file, 'model', 'value') if res: filename, definition = res line, column, length = definition # display a nice error message message = display_err_in_file(filename, line, column, f"{message} {suggestion}", length) # set suggestion to None since it is now contained in message suggestion = None raise VergeMLError(message, suggestion) else: # instantiate the model plugin self.model_plugin = self.model_plugin(modelname, self.plugins)
def __init__(self, model=None, project_file=None, samples_dir=None, test_split=None, val_split=None, cache_dir=None, random_seed=None, trainings_dir=None, project_dir=None, AI=None, is_global_instance=False, config={}, plugins=PLUGINS, display=DISPLAY): """Configure, train and save the results. :param model: Name of the model plugin. :param project_file: Optional path to the project file. :param samples_dir: The directory where samples can be found. [default: samples] :param test_split: The test split. [default: 10%] :param val_split: The val split. [default: 10%] :param cache_dir: The directory used for caching [default: .cache] :param random_seed: Random seed. [default 2204] :param trainings_dir: The directory to save training results to. [default: trainings] :param project_dir: The directory of the project. [default: current directory] :param AI: Optional name of a trained AI. :is_global_instance: If true, this env can be accessed under the global var env.ENV. [default: false] :config: Additional configuration to pass to env, i.e. if not using a project file """ super().__init__() # when called from the command line, we need to have a global instance if is_global_instance: global ENV ENV = self # setup the display self.display = display # set the name of the AI if given self.AI = AI # this holds the model object (not the name of the model) self.model = None # the results class (responsible for updating data.yaml with the latest results during training) self.results = None # when a training is started, this holds the object responsible for coordinating the training self.training = None # hold a proxy to the data loader self._data = None self.plugins = plugins # set up the base options from constructor arguments self._config = {} self._config['samples-dir'] = samples_dir self._config['test-split'] = test_split self._config['val-split'] = val_split self._config['cache-dir'] = cache_dir self._config['random-seed'] = random_seed self._config['trainings-dir'] = trainings_dir self._config['model'] = model validators = {} # add validators for commands for k, v in plugins.all('vergeml.cmd').items(): cmd = Command.discover(v) validators[cmd.name] = ValidateOptions(cmd.options, k, plugins=plugins) # now it gets a bit tricky - we need to peek at the model name # to find the right validators to create for model commands. peek_model_name = model peek_trainings_dir = trainings_dir # to do this, we have to first have a look at the project file try: project_doc = load_yaml_file(project_file) if project_file else {} # only update model name if empty (project file does not override command line) peek_model_name = peek_model_name or project_doc.get('model', None) # pick up trainings-dir in the same way peek_trainings_dir = peek_trainings_dir or project_doc.get( 'trainings-dir', None) # if we don't have a trainings dir yet, set to default peek_trainings_dir = peek_trainings_dir or os.path.join( project_dir or "", "trainings") # now, try to load the data.yaml file and see if we have a model definition there data_doc = load_yaml_file(peek_trainings_dir, AI, "data.yaml") if AI else {} # if we do, this overrides everything, also the one from the command line peek_model_name = data_doc.get('model', peek_model_name) # finally, if we have a model name, set up validators if peek_model_name: for fn in Command.find_functions(plugins.get( "vergeml.model", peek_model_name), plugins=plugins): cmd = Command.discover(fn) validators[cmd.name] = ValidateOptions( cmd.options, cmd.name, plugins) except Exception: # in this case we don't care if something went wrong - the error # will be reported later pass # finally, validators for device and data sections validators['device'] = ValidateDevice('device', plugins) validators['data'] = ValidateData('data', plugins) # merge project file if project_file: doc = _load_and_configure(project_file, 'project file', validators) # the project file DOES NOT override values passed to the environment # TODO reserved: hyperparameters and results for k, v in doc.items(): if not k in self._config or self._config[k] is None: self._config[k] = v # after the project file is loaded, fill missing values project_dir = project_dir or '' defaults = { 'samples-dir': os.path.join(project_dir, "samples"), 'test-split': '10%', 'val-split': '10%', 'cache-dir': os.path.join(project_dir, ".cache"), 'random-seed': 2204, 'trainings-dir': os.path.join(project_dir, "trainings"), } for k, v in defaults.items(): if self._config[k] is None: self._config[k] = v # verify split values for split in ('val-split', 'test-split'): spltype, splval = parse_split(self._config[split]) if spltype == 'dir': path = os.path.join(project_dir, splval) if not os.path.exists(path): raise VergeMLError( f"Invalid value for option {split} - no such directory: {splval}", f"Please set {split} to a percentage, number or directory.", hint_key=split, hint_type='value', help_topic='split') self._config[split] = path # need to have data_file variable in outer scope for later when reporting errors data_file = None if self.AI: ai_path = os.path.join(self._config['trainings-dir'], self.AI) if not os.path.exists(ai_path): raise VergeMLError("AI not found: {}".format(self.AI)) # merge data.yaml data_file = os.path.join(self._config['trainings-dir'], self.AI, 'data.yaml') if not os.path.exists(data_file): raise VergeMLError( "data.yaml file not found for AI {}: {}".format( self.AI, data_file)) doc = load_yaml_file(data_file, 'data file') self._config['hyperparameters'] = doc.get('hyperparameters', {}) self._config['results'] = doc.get('results', {}) self._config['model'] = doc.get('model') self.results = _Results(self, data_file) try: # merge device and data config self._config.update(apply_config(config, validators)) except VergeMLError as e: # improve the error message when this runs on the command line if is_global_instance and e.hint_key: key = e.hint_key e.message = f"Option --{key}: " + e.message raise e if self._config['model']: # load the model plugin modelname = self._config['model'] self.model = plugins.get("vergeml.model", modelname) if not self.model: message = f"Unknown model name '{modelname}'" suggestion = did_you_mean(plugins.keys('vergeml.model'), modelname) or "See 'ml help models'." # if model was passed in via --model if model and is_global_instance: message = f"Invalid value for option --model: {message}" else: res = None if not res and data_file: # first check if model was defined in the data file res = _check_definition(data_file, 'model', 'value') if not res and project_file: # next check the project file res = _check_definition(project_file, 'model', 'value') if res: filename, definition = res line, column, length = definition # display a nice error message message = display_err_in_file( filename, line, column, f"{message} {suggestion}", length) # set suggestion to None since it is now contained in message suggestion = None raise VergeMLError(message, suggestion) else: # instantiate the model plugin self.model = self.model(modelname, plugins) # update env from validators for _, plugin in validators.items(): for k, v in plugin.values.items(): self._config[k] = v # always set up numpy and python self.configure('python') self.configure('numpy')
def parse(self, argv, env_options={}): """Parse the command and return the result.""" res = {} ai_names, rest = parse_ai_names(argv) # subcommand subcommand_param = next((filter(lambda o: bool(o.subcommand), self.options)), None) if subcommand_param: if not ":" in rest[0]: raise VergeMLError(f"Missing {subcommand_param.name}.", help_topic=self.name) command, subcommand = rest[0].split(":", 1) assert command == self.name argv = deepcopy(argv) argv[argv.index(rest[0])] = subcommand plugin = self.plugins.get(subcommand_param.subcommand, subcommand) if not plugin: raise VergeMLError(f"Invalid {subcommand_param.name}.", help_topic=self.name) cmd = Command.discover(plugin) try: res = cmd.parse(argv, env_options) res[subcommand_param.name] = subcommand for opt in cmd.options: if opt.name not in res: res[opt.name] = opt.default return res except VergeMLError as e: e.help_topic = f"{command}:{subcommand}" raise e # AI params ai_param = next((filter(lambda o: o.is_ai_option(), self.options)), None) if ai_param: if ai_param.type in ('AI', None, str): ai_conf = 'required' elif ai_param.type == 'Optional[AI]': ai_conf = 'optional' elif ai_param.type in (list, 'list', 'List[AI]'): ai_conf = 'list' else: ai_conf = 'none' if (ai_conf == 'optional' and len(ai_names) > 1) or \ (ai_conf == 'required' and len(ai_names) != 1) or \ (ai_conf == 'none' and len(ai_names) != 0): raise self._invalid_arguments(help_topic=self.name) if ai_conf in ('required', 'optional'): res[ai_param.name] = next(iter(ai_names), None) elif ai_conf == 'list': res[ai_param.name] = ai_names # command name assert self.name == rest.pop(0) # in case of free form commands, just return AI and rest if self.free_form: ai_res = None if ai_param: ai_res = res.get(ai_param.name) return (ai_res, rest) longopts = [] shortopts = "" for opt in self.options: if opt.is_ai_option() or opt.is_argument_option(): continue if opt.flag: assert opt.type in (bool, None) longopts.append(opt.name) else: longopts.append(opt.name + "=") if opt.short: letter = opt.short assert letter not in shortopts if opt.type == bool: shortopts += letter else: shortopts += letter + ":" try: args, extra = getopt.getopt(rest, shortopts, longopts) except getopt.GetoptError as err: if err.opt: candidates = list(shortopts.replace(":", "")) + list(map(lambda o: o.rstrip("="), longopts)) suggestion = did_you_mean(candidates, err.opt) dashes = '-' if len(err.opt) == 1 else '--' raise VergeMLError(f"Invalid option {dashes}{err.opt}", suggestion, help_topic=self.name) else: raise VergeMLError(f"Invalid option.", help_topic=self.name) shorts_dict = {} longs_dict = {} for k, v in args: if k.startswith("--"): longs_dict[k.lstrip("-")] = v else: shorts_dict[k.lstrip("-")] = v extra_param = next((filter(lambda o: o.is_argument_option(), self.options)), None) if extra_param: if extra_param.is_optional(): extra_conf = 'optional' elif isinstance(extra_param.type, str) and extra_param.type.startswith("List"): extra_conf = 'list' elif hasattr(extra_param.type, '__origin__') and extra_param.type.__origin__ == list: extra_conf = 'list' elif extra_param.type == list: extra_conf = 'list' else: extra_conf = 'required' else: extra_conf = 'none' if (extra_conf == 'optional' and len(extra) > 1) or \ (extra_conf == 'none' and len(extra) != 0): raise self._invalid_arguments(help_topic=self.name) elif extra_conf == 'required' and len(extra) == 0: raise self._invalid_arguments(f"Missing argument {extra_param.name}.", help_topic=self.name) elif extra_conf == 'required' and len(extra) > 1: raise self._invalid_arguments(f"Invalid arguments.", help_topic=self.name) if extra_conf in ('optional', 'required'): res[extra_param.name] = next(iter(extra), None) elif extra_conf == 'list': res[extra_param.name] = extra for opt in self.options: if opt.is_ai_option() or opt.is_argument_option(): continue value = None if opt.flag: if opt.name in longs_dict: value = True elif opt.name in longs_dict: value = longs_dict[opt.name] if opt.short: letter = opt.short if letter in shorts_dict: if opt.type == bool: value = True else: value = shorts_dict[letter] if value is None and opt.name in env_options: value = env_options[opt.name] if value is None and not opt.is_optional(): raise self._invalid_arguments(message=f'Missing argument --{opt.name}.', help_topic=self.name) elif value is not None: try: value = opt.cast_value(value) value = opt.transform_value(value) opt.validate_value(value) res[opt.name] = value except VergeMLError as err: err.message = f"Invalid value for option --{opt.name}." raise err return res
def get_help(self, env=None, topic="", short=False): if topic: model_commands = {} if env and env.model: for fn in Command.find_functions(env.model): cmd = Command.discover(fn) model_commands[cmd.name] = cmd if topic == "-a": return self.format_topics(env) # show VergeML options elif topic == "options": return self.format_options() # display the glossary elif topic == "glossary": return self.format_glossary() # show available models elif topic == "models": return _with_header(self.format_models(), help="models", topic=topic) # explain the data.input section elif topic == "input": return _with_header(self.format_input_list(), help="configuration", topic=topic) # explain the data.cache section elif topic == "cache": return _with_header(format_info_text(_CACHE_HELP), help="configuration", topic=topic) # explain the data.output section elif topic == "output": return _with_header(format_info_text(_OUTPUT_HELP), help="configuration", topic=topic) # explain preprocessing elif topic in ("preprocess", "preprocessing"): return _with_header(self.format_preprocessing_list(), help="configuration", topic=topic) # explain the data section elif topic == "data": return _with_header(format_info_text(_DATA_HELP.strip()), help="configuration", topic=topic) # explain the device section elif topic == "device": return _with_header(format_info_text(_DEVICE_HELP.strip()), help="configuration", topic=topic) # show a random robot elif topic == "random robot": robot = ascii_robot(datetime.datetime.now(), random_robot_name(datetime.datetime.now())) return f"\n{robot}\n" elif ":" in topic and topic.split( ":", 1)[0] in self.plugins.keys('vergeml.cmd'): command, subcommand = topic.split(":") cmd = Command.discover(self.plugins.get( 'vergeml.cmd', command)) subcommand_option = next( filter(lambda o: bool(o.subcommand), cmd.options), None) if not subcommand_option: raise VergeMLError(f"{command} takes no subcommand", help_topic=command) plugin = self.plugins.get(subcommand_option.subcommand, subcommand) if not plugin: raise VergeMLError(f"Invalid {subcommand_option.name}", help_topic=command) cmd = Command.discover(plugin) return cmd.usage(short, parent_command=command) # display model command help elif topic in model_commands: return model_commands[topic].usage(short) # display command help elif topic in self.plugins.keys('vergeml.cmd'): cmd = Command.discover(self.plugins.get('vergeml.cmd', topic)) return cmd.usage(short) elif topic in self.plugins.keys('vergeml.operation'): return _with_header(self.format_source_or_operation( topic, 'vergeml.operation', Operation), help="preprocessing operation", topic=topic) elif topic in self.plugins.keys('vergeml.io'): return _with_header(self.format_source_or_operation( topic, 'vergeml.io', Source), help="data source", topic=topic) elif topic in self.plugins.keys('vergeml.model'): return _with_header(self.format_model(topic), help="models", topic=topic) # show a glossary entry elif glossary.long_descr(topic): topic = glossary.SYNONYMS.get(topic, topic) return _with_header(format_info_text( glossary.long_descr(topic)), help="glossary", topic=topic) # show base options help elif topic in dict(HELP_OPTIONS): return _with_header(format_info_text( dict(HELP_OPTIONS).get(topic)), help="base options", topic=topic) else: candidates = set() candidates.update(map(lambda h: h[0], _GENERAL_HELP)) candidates.update(self.plugins.keys("vergeml.cmd")) candidates.update(map(lambda h: h[0], _CONFIGURATION_HELP)) candidates.update(self.plugins.keys("vergeml.io")) candidates.update(self.plugins.keys("vergeml.operation")) candidates.update(self.plugins.keys("vergeml.model")) if env and env.model: for fn in Command.find_functions(env.model): cmd = Command.discover(fn) candidates.add(cmd.name) candidates.update(glossary.LONG_DESCR.keys()) candidates.update(glossary.SYNONYMS.keys()) suggestion = did_you_mean(list(candidates), topic) if suggestion: return f"No help found for topic '{topic}'. " + suggestion else: return f"No help found for topic '{topic}'." else: return self.format_general_help(env, short=short)
def run(argv, plugins=PLUGINS): try: argv = _forgive_wrong_option_order(argv) args, config, rest = _parsebase(argv, plugins=plugins) except getopt.GetoptError as err: if err.opt: opt = err.opt.lstrip("-") dashes = '-' if len(opt) == 1 else '--' raise VergeMLError(f"Invalid option {dashes}{opt}.", help_topic='options') else: raise VergeMLError(f"Invalid option.", help_topic='options') if 'version' in args: print_version() exit() args = _prepare_args(args) ai_names, after_names = parse_ai_names(rest) AI = next(iter(ai_names), None) env = _env_from_args(args, config, AI, plugins=plugins) if after_names: cmdname = after_names.pop(0) else: cmdname = 'help' rest = ['help'] if ":" in cmdname: cmdname = cmdname.split(":")[0] plugin = None cmd_plugin = plugins.get('vergeml.cmd', cmdname) if cmd_plugin: plugin = cmd_plugin(cmdname, plugins=plugins) elif env.model: for model_fn in Command.find_functions(env.model): if cmdname == Command.discover(model_fn).name: plugin = model_fn break if not plugin: # collect all possible command names command_names = set(plugins.keys('vergeml.cmd')) if env.model: model_commands = set(map(lambda f:Command.discover(f).name, Command.find_functions(env.model))) command_names.update(model_commands) raise VergeMLError(f"Invalid command '{cmdname}'.", suggestion=did_you_mean(command_names, cmdname), help_topic='*help*') try: env_conf = env.get(cmdname) or {} cmd = Command.discover(plugin) assert cmd args = cmd.parse(rest, env_conf) if not cmd.free_form: # set defaults for opt in cmd.options: if opt.name not in args: args[opt.name] = opt.default # merge back into env for k,v in args.items(): env.set(f"{cmdname}.{k}", v) env.set("command", cmdname) env.set_defaults(cmdname, args, plugins=plugins) try: # return the result for unit testing return plugin(args, env) finally: if env.training is not None: env.cancel_training() except KeyboardInterrupt: # silence the stacktrace pass
def _parse_data_source(res, section, key, plugins): if key in section: source_section = section[key] # Type must be set explicitly when input or output is specified in the config file. if not 'type' in source_section: raise VergeMLError( f"Missing option 'type'.", suggestion='Please specify the {key} type of your data', help_topic='data', hint_type='key', hint_key=f'data.{key}') type_ = source_section['type'] res[key] = {'type': type_} # Find the source definition plugin = plugins.get("vergeml.io", type_) if not plugin: suggestion = did_you_mean(plugins.keys('vergeml.io'), type_) raise _invalid_option(f"data.{key}.type", help_topic='data', suggestion=suggestion) source = Source.discover(plugin) # check for unknown options _raise_unknown_option(f'data.{key}', map(lambda o: o.name, source.options), set(source_section.keys()) - set(['type']), 'data') options = list(filter(lambda o: o.name != 'type', source.options)) # validate and transform each option for option in options: try: if option.name in source_section: value = source_section[option.name] value = option.cast_value(value) value = option.transform_value(value) option.validate_value(value) res[key][option.name] = value # deal with missing options elif option.is_required(): raise VergeMLError( f"Missing option '{option.name}'.", suggestion=f'Please add the missing option.', help_topic='data', hint_type='key', hint_key=f'data.{key}') elif option.default: res[key][option.name] = option.default except VergeMLError as err: err.hint_key = f'data.{key}.{option.name}' raise err