def __init__(self, env): self.env = env self.fns = OrderedDict() for model_fn in Command.find_functions(env.model): cmd = Command.discover(model_fn) if cmd.kind == 'predict': self.fns[cmd.name] = (cmd, model_fn) if not len(self.fns): raise VergeMLError(f"@{env.AI} can't be run as a REST service.")
def main(argv=None, plugins=PLUGINS): if argv is None: argv = sys.argv[1:] _configure_logging() try: run(argv, plugins=plugins) except VergeMLError as e: # NOTE- when the error is encountered before the environment is created, it will be empty. from vergeml.env import ENV # in case there is an error with the config file, but the user just says 'ml help <topic>' # where topic is the topic suggested by VergeML, try to display the help message # instead of the error the user is experiencing if ["help", e.help_topic] == argv: help = HelpCommand('help') from vergeml.env import ENV print(help.get_help(ENV, e.help_topic)) else: # display the error. Can't use logging.error because for an unknown reason pytest does not # capture stderr when using logging, so fall back to print err_string = str(e).strip() print("Error! " + err_string, file=sys.stderr) # find all command topics commands = list(plugins.keys('vergeml.cmd')) if ENV and ENV.model_plugin: fns = Command.find_functions(ENV.model_plugin) mcommands = list(map(lambda f: Command.discover(f).name, fns)) commands.extend(mcommands) # if the error is just one line and there is command help available, display the help message too. if e.help_topic and len(err_string.splitlines( )) == 1 and e.help_topic in plugins.keys('vergeml.cmd'): print("") help = HelpCommand('help') help_topic = "" if e.help_topic == "*help*" else e.help_topic print(help.get_help(ENV, help_topic, short=True)) # else just hint at the help topic elif e.help_topic: print("", file=sys.stderr) help_topic = "" if e.help_topic == "*help*" else " " + e.help_topic print(f"See 'ml help" + help_topic + "'.", file=sys.stderr) except Exception as err: # pylint: disable=W0703 if err.__class__.__name__ == 'ResourceExhaustedError': print("Error! Your GPU ran out of memory.") print( "Try lowering resource usage by decreasing model parameters such as batch size." ) else: raise err
def format_general_help(self, env=None, short=False): buffer = io.StringIO() print(USAGE, file=buffer) print("", file=buffer) terms = set(glossary.LONG_DESCR.keys()) terms.update(self.plugins.keys('vergeml.cmd')) rng = random.Random() rng.seed(datetime.datetime.now()) random_term = rng.choice(list(terms)) if env and env.model: print(f"Current Model: {env.get('model')}", file=buffer) print("", file=buffer) print("General Help:", file=buffer) for topic, descr in _GENERAL_HELP: print(" {:<16} {}".format(topic, descr), file=buffer) print("", file=buffer) print("Commands:", file=buffer) for cmd_name in self.plugins.keys('vergeml.cmd'): descr = Command.discover(self.plugins.get('vergeml.cmd', cmd_name)).descr print(" {:<16} {}".format(cmd_name, descr), file=buffer) print("", file=buffer) if env and env.model: print("Model Commands:", file=buffer) for fn in Command.find_functions(env.model): cmd = Command.discover(fn) print(" {:<16} {}".format(cmd.name, cmd.descr), file=buffer) print("", file=buffer) if not short: print( "See 'ml help <command>' or 'ml help <topic>' to read about a specific subcommand or topic.", file=buffer) print(f"For example, try 'ml help {random_term}'", file=buffer) return buffer.getvalue().strip()
def __init__(self, model=None, project_file=None, samples_dir=None, test_split=None, val_split=None, cache_dir=None, random_seed=None, trainings_dir=None, project_dir=None, AI=None, is_global_instance=False, config={}, plugins=PLUGINS, display=DISPLAY): """Configure, train and save the results. :param model: Name of the model plugin. :param project_file: Optional path to the project file. :param samples_dir: The directory where samples can be found. [default: samples] :param test_split: The test split. [default: 10%] :param val_split: The val split. [default: 10%] :param cache_dir: The directory used for caching [default: .cache] :param random_seed: Random seed. [default 2204] :param trainings_dir: The directory to save training results to. [default: trainings] :param project_dir: The directory of the project. [default: current directory] :param AI: Optional name of a trained AI. :is_global_instance: If true, this env can be accessed under the global var env.ENV. [default: false] :config: Additional configuration to pass to env, i.e. if not using a project file """ super().__init__() # when called from the command line, we need to have a global instance if is_global_instance: global ENV ENV = self # setup the display self.display = display # set the name of the AI if given self.AI = AI # this holds the model object (not the name of the model) self.model = None # the results class (responsible for updating data.yaml with the latest results during training) self.results = None # when a training is started, this holds the object responsible for coordinating the training self.training = None # hold a proxy to the data loader self._data = None self.plugins = plugins # set up the base options from constructor arguments self._config = {} self._config['samples-dir'] = samples_dir self._config['test-split'] = test_split self._config['val-split'] = val_split self._config['cache-dir'] = cache_dir self._config['random-seed'] = random_seed self._config['trainings-dir'] = trainings_dir self._config['model'] = model validators = {} # add validators for commands for k, v in plugins.all('vergeml.cmd').items(): cmd = Command.discover(v) validators[cmd.name] = ValidateOptions(cmd.options, k, plugins=plugins) # now it gets a bit tricky - we need to peek at the model name # to find the right validators to create for model commands. peek_model_name = model peek_trainings_dir = trainings_dir # to do this, we have to first have a look at the project file try: project_doc = load_yaml_file(project_file) if project_file else {} # only update model name if empty (project file does not override command line) peek_model_name = peek_model_name or project_doc.get('model', None) # pick up trainings-dir in the same way peek_trainings_dir = peek_trainings_dir or project_doc.get( 'trainings-dir', None) # if we don't have a trainings dir yet, set to default peek_trainings_dir = peek_trainings_dir or os.path.join( project_dir or "", "trainings") # now, try to load the data.yaml file and see if we have a model definition there data_doc = load_yaml_file(peek_trainings_dir, AI, "data.yaml") if AI else {} # if we do, this overrides everything, also the one from the command line peek_model_name = data_doc.get('model', peek_model_name) # finally, if we have a model name, set up validators if peek_model_name: for fn in Command.find_functions(plugins.get( "vergeml.model", peek_model_name), plugins=plugins): cmd = Command.discover(fn) validators[cmd.name] = ValidateOptions( cmd.options, cmd.name, plugins) except Exception: # in this case we don't care if something went wrong - the error # will be reported later pass # finally, validators for device and data sections validators['device'] = ValidateDevice('device', plugins) validators['data'] = ValidateData('data', plugins) # merge project file if project_file: doc = _load_and_configure(project_file, 'project file', validators) # the project file DOES NOT override values passed to the environment # TODO reserved: hyperparameters and results for k, v in doc.items(): if not k in self._config or self._config[k] is None: self._config[k] = v # after the project file is loaded, fill missing values project_dir = project_dir or '' defaults = { 'samples-dir': os.path.join(project_dir, "samples"), 'test-split': '10%', 'val-split': '10%', 'cache-dir': os.path.join(project_dir, ".cache"), 'random-seed': 2204, 'trainings-dir': os.path.join(project_dir, "trainings"), } for k, v in defaults.items(): if self._config[k] is None: self._config[k] = v # verify split values for split in ('val-split', 'test-split'): spltype, splval = parse_split(self._config[split]) if spltype == 'dir': path = os.path.join(project_dir, splval) if not os.path.exists(path): raise VergeMLError( f"Invalid value for option {split} - no such directory: {splval}", f"Please set {split} to a percentage, number or directory.", hint_key=split, hint_type='value', help_topic='split') self._config[split] = path # need to have data_file variable in outer scope for later when reporting errors data_file = None if self.AI: ai_path = os.path.join(self._config['trainings-dir'], self.AI) if not os.path.exists(ai_path): raise VergeMLError("AI not found: {}".format(self.AI)) # merge data.yaml data_file = os.path.join(self._config['trainings-dir'], self.AI, 'data.yaml') if not os.path.exists(data_file): raise VergeMLError( "data.yaml file not found for AI {}: {}".format( self.AI, data_file)) doc = load_yaml_file(data_file, 'data file') self._config['hyperparameters'] = doc.get('hyperparameters', {}) self._config['results'] = doc.get('results', {}) self._config['model'] = doc.get('model') self.results = _Results(self, data_file) try: # merge device and data config self._config.update(apply_config(config, validators)) except VergeMLError as e: # improve the error message when this runs on the command line if is_global_instance and e.hint_key: key = e.hint_key e.message = f"Option --{key}: " + e.message raise e if self._config['model']: # load the model plugin modelname = self._config['model'] self.model = plugins.get("vergeml.model", modelname) if not self.model: message = f"Unknown model name '{modelname}'" suggestion = did_you_mean(plugins.keys('vergeml.model'), modelname) or "See 'ml help models'." # if model was passed in via --model if model and is_global_instance: message = f"Invalid value for option --model: {message}" else: res = None if not res and data_file: # first check if model was defined in the data file res = _check_definition(data_file, 'model', 'value') if not res and project_file: # next check the project file res = _check_definition(project_file, 'model', 'value') if res: filename, definition = res line, column, length = definition # display a nice error message message = display_err_in_file( filename, line, column, f"{message} {suggestion}", length) # set suggestion to None since it is now contained in message suggestion = None raise VergeMLError(message, suggestion) else: # instantiate the model plugin self.model = self.model(modelname, plugins) # update env from validators for _, plugin in validators.items(): for k, v in plugin.values.items(): self._config[k] = v # always set up numpy and python self.configure('python') self.configure('numpy')
def format_topics(self, env): buffer = io.StringIO() print("General Help:", file=buffer) for topic, descr in _GENERAL_HELP: print(" {:<16} {}".format(topic, descr), file=buffer) print("", file=buffer) print("Commands:", file=buffer) for cmd_name in self.plugins.keys('vergeml.cmd'): descr = Command.discover(self.plugins.get('vergeml.cmd', cmd_name)).descr print(" {:<16} {}".format(cmd_name, descr), file=buffer) print("", file=buffer) if env and env.model: print("Model Commands:", file=buffer) for fn in Command.find_functions(env.model): cmd = Command.discover(fn) print(" {:<16} {}".format(cmd.name, cmd.descr), file=buffer) print("", file=buffer) print("Configuration:", file=buffer) for topic, descr in _CONFIGURATION_HELP: print(" {:<16} {}".format(topic, descr), file=buffer) print("", file=buffer) inputs = [] for k in self.plugins.keys('vergeml.io'): plugin = self.plugins.get('vergeml.io', k) source = Source.discover(plugin) inputs.append((k, source.descr)) print("Data Input:", file=buffer) print(_get_table(inputs, IND=2, colon=False), file=buffer) print("", file=buffer) ops = {} for k in self.plugins.keys('vergeml.operation'): plugin = self.plugins.get('vergeml.operation', k) op = Operation.discover(plugin) topic = op.topic or "general" descr = op.descr ops.setdefault(topic, []) ops[topic].append((k, descr)) for k, v in sorted(ops.items()): topic = k.capitalize() print(f"{topic} Operations:", file=buffer) print(format_info_text(_get_table(v), indent=2), file=buffer) print("", file=buffer) models = [] for name in self.plugins.keys("vergeml.model"): plugin = self.plugins.get('vergeml.model', name) model = Model.discover(plugin) models.append((name, model.descr)) if models: print(_get_table(models), file=buffer) print("Glossary:", file=buffer) items = ", ".join(glossary.LONG_DESCR.keys()) print(format_info_text(items, indent=2), file=buffer) return buffer.getvalue().strip()
def get_help(self, env=None, topic="", short=False): if topic: model_commands = {} if env and env.model: for fn in Command.find_functions(env.model): cmd = Command.discover(fn) model_commands[cmd.name] = cmd if topic == "-a": return self.format_topics(env) # show VergeML options elif topic == "options": return self.format_options() # display the glossary elif topic == "glossary": return self.format_glossary() # show available models elif topic == "models": return _with_header(self.format_models(), help="models", topic=topic) # explain the data.input section elif topic == "input": return _with_header(self.format_input_list(), help="configuration", topic=topic) # explain the data.cache section elif topic == "cache": return _with_header(format_info_text(_CACHE_HELP), help="configuration", topic=topic) # explain the data.output section elif topic == "output": return _with_header(format_info_text(_OUTPUT_HELP), help="configuration", topic=topic) # explain preprocessing elif topic in ("preprocess", "preprocessing"): return _with_header(self.format_preprocessing_list(), help="configuration", topic=topic) # explain the data section elif topic == "data": return _with_header(format_info_text(_DATA_HELP.strip()), help="configuration", topic=topic) # explain the device section elif topic == "device": return _with_header(format_info_text(_DEVICE_HELP.strip()), help="configuration", topic=topic) # show a random robot elif topic == "random robot": robot = ascii_robot(datetime.datetime.now(), random_robot_name(datetime.datetime.now())) return f"\n{robot}\n" elif ":" in topic and topic.split( ":", 1)[0] in self.plugins.keys('vergeml.cmd'): command, subcommand = topic.split(":") cmd = Command.discover(self.plugins.get( 'vergeml.cmd', command)) subcommand_option = next( filter(lambda o: bool(o.subcommand), cmd.options), None) if not subcommand_option: raise VergeMLError(f"{command} takes no subcommand", help_topic=command) plugin = self.plugins.get(subcommand_option.subcommand, subcommand) if not plugin: raise VergeMLError(f"Invalid {subcommand_option.name}", help_topic=command) cmd = Command.discover(plugin) return cmd.usage(short, parent_command=command) # display model command help elif topic in model_commands: return model_commands[topic].usage(short) # display command help elif topic in self.plugins.keys('vergeml.cmd'): cmd = Command.discover(self.plugins.get('vergeml.cmd', topic)) return cmd.usage(short) elif topic in self.plugins.keys('vergeml.operation'): return _with_header(self.format_source_or_operation( topic, 'vergeml.operation', Operation), help="preprocessing operation", topic=topic) elif topic in self.plugins.keys('vergeml.io'): return _with_header(self.format_source_or_operation( topic, 'vergeml.io', Source), help="data source", topic=topic) elif topic in self.plugins.keys('vergeml.model'): return _with_header(self.format_model(topic), help="models", topic=topic) # show a glossary entry elif glossary.long_descr(topic): topic = glossary.SYNONYMS.get(topic, topic) return _with_header(format_info_text( glossary.long_descr(topic)), help="glossary", topic=topic) # show base options help elif topic in dict(HELP_OPTIONS): return _with_header(format_info_text( dict(HELP_OPTIONS).get(topic)), help="base options", topic=topic) else: candidates = set() candidates.update(map(lambda h: h[0], _GENERAL_HELP)) candidates.update(self.plugins.keys("vergeml.cmd")) candidates.update(map(lambda h: h[0], _CONFIGURATION_HELP)) candidates.update(self.plugins.keys("vergeml.io")) candidates.update(self.plugins.keys("vergeml.operation")) candidates.update(self.plugins.keys("vergeml.model")) if env and env.model: for fn in Command.find_functions(env.model): cmd = Command.discover(fn) candidates.add(cmd.name) candidates.update(glossary.LONG_DESCR.keys()) candidates.update(glossary.SYNONYMS.keys()) suggestion = did_you_mean(list(candidates), topic) if suggestion: return f"No help found for topic '{topic}'. " + suggestion else: return f"No help found for topic '{topic}'." else: return self.format_general_help(env, short=short)
def run(argv, plugins=PLUGINS): try: argv = _forgive_wrong_option_order(argv) args, config, rest = _parsebase(argv, plugins=plugins) except getopt.GetoptError as err: if err.opt: opt = err.opt.lstrip("-") dashes = '-' if len(opt) == 1 else '--' raise VergeMLError(f"Invalid option {dashes}{opt}.", help_topic='options') else: raise VergeMLError(f"Invalid option.", help_topic='options') if 'version' in args: print_version() exit() args = _prepare_args(args) ai_names, after_names = parse_ai_names(rest) AI = next(iter(ai_names), None) env = _env_from_args(args, config, AI, plugins=plugins) if after_names: cmdname = after_names.pop(0) else: cmdname = 'help' rest = ['help'] if ":" in cmdname: cmdname = cmdname.split(":")[0] plugin = None cmd_plugin = plugins.get('vergeml.cmd', cmdname) if cmd_plugin: plugin = cmd_plugin(cmdname, plugins=plugins) elif env.model: for model_fn in Command.find_functions(env.model): if cmdname == Command.discover(model_fn).name: plugin = model_fn break if not plugin: # collect all possible command names command_names = set(plugins.keys('vergeml.cmd')) if env.model: model_commands = set(map(lambda f:Command.discover(f).name, Command.find_functions(env.model))) command_names.update(model_commands) raise VergeMLError(f"Invalid command '{cmdname}'.", suggestion=did_you_mean(command_names, cmdname), help_topic='*help*') try: env_conf = env.get(cmdname) or {} cmd = Command.discover(plugin) assert cmd args = cmd.parse(rest, env_conf) if not cmd.free_form: # set defaults for opt in cmd.options: if opt.name not in args: args[opt.name] = opt.default # merge back into env for k,v in args.items(): env.set(f"{cmdname}.{k}", v) env.set("command", cmdname) env.set_defaults(cmdname, args, plugins=plugins) try: # return the result for unit testing return plugin(args, env) finally: if env.training is not None: env.cancel_training() except KeyboardInterrupt: # silence the stacktrace pass