def write_preview(self, output_dir: str, split: str, sample: Sample): # make sure x and y have the right types if not isinstance(sample.x, ImageType): raise VergeMLError("Can't write sample with type: {}".format( type(sample.x))) if not isinstance(sample.y, Labels): raise VergeMLError("Can't write ground truth with type: {}".format( type(sample.y))) # get the right filename in .data to write the sample to data_dir = os.path.join(output_dir, ".data") name = fixext(os.path.basename(sample.meta['filename']), sample.x) path = self.preview_filename(os.path.join(data_dir, name)) sample.x.save(path) # create directories and hyperlinks so that split and label are visible in a file # manager for label in sample.y: link_dir = os.path.join(output_dir, split, label) if not os.path.exists(link_dir): os.makedirs(link_dir) link_path = self.preview_filename(os.path.join(link_dir, name)) os.symlink(os.path.abspath(path), link_path)
def transform_xy(self, x, y, rng): imgs = [img for img in (x, y) if isinstance(img, ImageType)] if not len(imgs): raise VergeMLError("random_crop needs samples of type image") maxwidth = min([img.size[0] for img in imgs]) maxheight = min([img.size[1] for img in imgs]) if maxwidth < self.width: raise VergeMLError("Can't crop sample with width {} to {}.".format( maxwidth, self.width)) if maxheight < self.height: raise VergeMLError( "Can't crop sample with height {} to {}.".format( maxheight, self.height)) maxx = maxwidth - self.width maxy = maxheight - self.height xco = rng.randint(0, maxx) yco = rng.randint(0, maxy) params = xco, yco, xco + self.width, yco + self.height if isinstance(x, ImageType): x = x.crop(params) if isinstance(y, ImageType): y = y.crop(params) return x, y
def load_yaml_file(filename, label='config file', loader=yaml.Loader): """Load a yaml config file. """ try: with open(filename, "r") as file: res = yaml.load(file.read(), Loader=loader) or {} if not isinstance(res, dict): msg = f"Please ensure that {label} consists of key value pairs." raise VergeMLError(f"Invalid {label}: {filename}", msg) return res except yaml.YAMLError as err: if hasattr(err, 'problem_mark'): mark = getattr(err, 'problem_mark') problem = getattr(err, 'problem') message = f"Could not read {label} {filename}:" message += "\n" + display_err_in_file(filename, mark.line, mark.column, problem) elif hasattr(err, 'problem'): problem = getattr(err, 'problem') message = f"Could not read {label} {filename}: {problem}" else: message = f"Could not read {label} {filename}: YAML Error" suggestion = f"There is a syntax error in your {label} - please fix it and try again." raise VergeMLError(message, suggestion) except OSError as err: msg = "Please ensure the file exists and you have the required access privileges." raise VergeMLError( f"Could not open {label} {filename}: {err.strerror}", msg)
def __init__(self, width: int, height: int, x: int = None, y: int = None, position: str = "center", apply=None): super().__init__(apply) if bool(x) ^ bool(y): raise VergeMLError( "Must specify both x and y when using absolute coordinates") VALID_POSITIONS = ("top-left", "top-right", "bottom-left", "bottom-right", "center") if not position in VALID_POSITIONS: raise VergeMLError("position must be one of: {}".format( ", ".join(VALID_POSITIONS))) self.width = width self.height = height self.x = x self.y = y self.position = position
def _parse_subcommand(self, argv, rest): sub_option = next((filter(lambda o: bool(o.subcommand), self.options)), None) if sub_option: if not ":" in rest[0]: raise VergeMLError(f"Missing {sub_option.name}.", help_topic=self.name) cmd_name, sub_name = rest[0].split(":", 1) assert cmd_name == self.name argv = deepcopy(argv) argv[argv.index(rest[0])] = sub_name plugin = self.plugins.get(sub_option.subcommand, sub_name) if not plugin: raise VergeMLError(f"Invalid {sub_option.name}.", help_topic=self.name) cmd = Command.discover(plugin) try: res = cmd.parse(argv) res[sub_option.name] = sub_name return res except VergeMLError as err: err.help_topic = f"{cmd_name}:{sub_name}" raise err else: return None
def __call__(self, args, env): trainings_dir = env.get('trainings-dir') ais_with_tbstats = [] if not os.path.exists(trainings_dir): raise VergeMLError( "No trainings found.", "To run tensorboard, please train an AI first.") for dir in os.listdir(trainings_dir): if dir.startswith("."): continue stats_dir = os.path.join(trainings_dir, dir, "stats") if not os.path.exists(stats_dir): continue if not any( map(lambda d: d.startswith("events.out"), os.listdir(stats_dir))): continue ais_with_tbstats.append(dir) AIs = args["@AIs"] or ais_with_tbstats if not AIs: raise VergeMLError( "No trainings found.", "To run tensorboard, please train an AI first.") for AI in AIs: if AI not in ais_with_tbstats: raise VergeMLError( "Not tensorboard stats found for @{}".format(AI)) dirs = [] for AI in AIs: dirs.append(AI + ":" + os.path.join(trainings_dir, AI, "stats")) dirs = ",".join(dirs) cmd = ["tensorboard", "--logdir", dirs, "--port", str(args['port'])] if 'host' in args and args['host']: cmd.append('--host') cmd.append(args['host']) url = None try: for line in _run_command(cmd): line = line.decode('utf-8').rstrip() match = re.match(r".*?(http:[^ ]*)", line) if match: url = match.group(1) webbrowser.open(url) print(line) except FileNotFoundError: raise VergeMLError( "Command 'tensorboard' not found.", "Please install tensorboard (pip install tensorboard)")
def samples_dir(self): """Return the samples_dir or throw an error if it does not exist. """ samples_dir = self._config['samples-dir'] if not os.path.exists(samples_dir): raise VergeMLError(f'Could not find samples directory: {samples_dir}') elif not os.path.isdir(samples_dir): raise VergeMLError(f'Configured samples-dir is not a directory: {samples_dir}') return samples_dir
def evaluate_args(cnn, trainings_dir, variant, alpha, size): if not cnn.startswith('@') and not cnn in ARCHITECTURES: raise VergeMLError("Unknown CNN: {}".format(cnn)) elif cnn.startswith('@'): name = cnn.lstrip('@') path = os.path.join(trainings_dir, name, 'checkpoints', 'model.h5') if not os.path.isfile(path): raise VergeMLError("Unknown CNN: {}".format(cnn)) if cnn == 'densenet': if variant == '*auto*': variant = DENSENET_VARIANTS[0] if not variant in DENSENET_VARIANTS: raise VergeMLError("Invalid densenet variant: {}".format(variant)) elif cnn == 'mobilenet': if size not in MOBILENET_SIZES and size != "*auto*": raise VergeMLError("Invalid mobilenet size: {}".format(size)) if alpha not in MOBILENET_ALPHA_VALUES: raise VergeMLError("Invalid alpha value: {}".format(alpha)) elif cnn == 'mobilenet-v2': if size not in MOBILENET_V2_SIZES and size != "*auto*": raise VergeMLError("Invalid mobilenet size: {}".format(size)) if alpha not in MOBILENET_V2_ALPHA_VALUES: raise VergeMLError("Invalid alpha value: {}".format(alpha)) elif cnn == 'nasnet': if not variant in NASNET_VARIANTS and variant != '*auto*': raise VergeMLError("Invalid nasnet variant: {}".format(variant))
def _parse_args(args, env): args = args[1] comps = [] for idx, arg in enumerate(args): if arg in ('-gt', '-lt', '-eq', '-neq', '-gte', '-lte'): start, end = idx - 1, idx + 1 if start < 0 or end >= len(args): raise VergeMLError("Invalid options.", help_topic='list') comps.append((start, end)) cargs = [] for start, end in reversed(comps): cargs.append(args[start:end+1]) del args[start:end+1] cmd = deepcopy(Command.discover(ListCommand)) cmd.free_form = False args.insert(0, 'list') args = cmd.parse(args) # If existent, read settings from the config file config = parse_command(cmd, env.get(cmd.name)) # Set missing args from the config file for k, arg in config.items(): args.setdefault(k, arg) # Set missing args from default for opt in cmd.options: if opt.name not in args and (opt.default is not None or not opt.is_required()): args[opt.name] = opt.default return args, cargs
def _load_yaml_and_configure(self, path, label, cache, device, device_memory): # pylint: disable=R0913 doc = load_yaml_file(path, label) try: doc['device'] = parse_device(doc.get('device', {}), device_id=device, device_memory=device_memory) doc['data'] = parse_data(doc.get('data', {}), cache=cache, plugins=self.plugins) if 'random-seed' in doc and not isinstance(doc['random-seed'], int): raise VergeMLError('Invalid value option random-seed.', 'random-seed must be an integer value.', hint_type='value', hint_key='random-seed') except VergeMLError as err: if err.hint_key: with open(path) as file: definition = yaml_find_definition(file, err.hint_key, err.hint_type) if definition: line, column, length = definition err.message = display_err_in_file(path, line, column, str(err), length) # clear suggestion because it is already contained in the error message. err.suggestion = None raise err else: raise err else: raise err return doc
def split(self, num_samples: int): """Split the dataset in train, val and test sets by percentage or absolute count. It works by receiving the total number of samples and a configuration object, and calculates an array of indices per split.: :param num_samples: the total number of samples :return: a tuple of indices for (train, val, test) """ val_num = int(num_samples * self.val_perc // 100) if self.val_perc else self.val_num or 0 test_num = int(num_samples * self.test_perc // 100) if self.test_perc else self.test_num or 0 if val_num + test_num > num_samples: hint_key = None hint_type = None if self.val_num: hint_key = 'val' hint_type = 'val-split' elif self.test_num: hint_key = 'val' hint_type = 'test-split' raise VergeMLError("There are not enough samples to provide the configured number for the val and test split", "If you use absolute numbers for 'val-split' or 'test-split', try to lower them", help_topic='split', hint_key=hint_key, hint_type=hint_type) rng = random.Random(self.random_seed) indices = rng.sample(range(num_samples), num_samples) val, test, train = indices[:val_num], indices[val_num:val_num + test_num], indices[val_num + test_num:] return train, val, test
def __call__(self, args, env): samples_dir = env.get('samples-dir') for label in ("cat", "dog"): dest = os.path.join(samples_dir, label) if os.path.exists(dest): raise VergeMLError( "Directory {} already exists in samples dir: {}".format( label, dest)) print("Downloading cats and dogs to {}.".format(samples_dir)) src_dir = self.download_files([(_URL, "catsdogs.zip")], env) path = os.path.join(src_dir, "catsdogs.zip") print("Extracting data.") zipf = zipfile.ZipFile(path, 'r') zipf.extractall(src_dir) zipf.close() for file, dest in (("PetImages/Dog", "dog"), ("PetImages/Cat", "cat")): shutil.copytree(os.path.join(src_dir, file), os.path.join(samples_dir, dest)) shutil.rmtree(src_dir) # WTF? os.unlink(os.path.join(samples_dir, "cat", "666.jpg")) os.unlink(os.path.join(samples_dir, "dog", "11702.jpg")) print("Finished downloading cats and dogs.")
def _invalid_option(key, help_topic=None, suggestion=None, kind='value'): label = "Invalid value for option" if kind == 'value' else "Invalid option" return VergeMLError(f"{label} '{key}'.", suggestion, help_topic=help_topic, hint_type=kind, hint_key=key)
def __init__(self, variants, apply=None): super().__init__(apply) if not isinstance(variants, int): raise VergeMLError("The parameter 'variants' of 'augment' must be of type 'int'.") self.variants = variants
def _load_and_configure(file, label, validators): doc = load_yaml_file(file, label) try: doc = apply_config(doc, validators) if 'random-seed' in doc and not isinstance(doc['random-seed'], int): raise VergeMLError('Invalid value option random-seed.', 'random-seed must be an integer value.', hint_type='value', hint_key='random-seed') except VergeMLError as e: if e.hint_key: key, kind = e.hint_key, e.hint_type with open(file) as f: definition = yaml_find_definition(f, key, kind) if definition: line, column, length = definition message = display_err_in_file(file, line, column, str(e), length) e.message = message # clear suggestion because it is already contained in the formatted error message. e.suggestion = None raise e else: raise e else: raise e return doc
def _get_classes_from_json(self): for filename in ("labels.txt", "classes.json"): path = os.path.join(self.samples_dir, filename) if not os.path.exists(path): raise VergeMLError("{} is missing".format(filename)) with open(path) as f: if filename == "labels.txt": items = filter( None, map(methodcaller("strip"), f.read().splitlines())) labels = Labels(items) else: self.classes = json.load(f) files = {} # prefix the sample with input_dir for k, v in self.classes['files'].items(): # on windows and linux, separator is / path = k.split("/") path.insert(0, self.samples_dir) fname = os.path.join(*path) files[fname] = v self.classes['files'] = files self.meta['labels'] = labels
def _prepare_args(args): """Prepare args by appending the project dir and setting defaults. """ args = deepcopy(args) project_dir = args.get('project-dir', '') if not 'file' in args: default_file = os.path.join(project_dir, "vergeml.yaml") if os.path.exists(default_file): args['file'] = default_file if 'file' in args: args['project-file'] = args['file'] del args['file'] if 'random-seed' in args: try: args['random-seed'] = int(args['random-seed']) except ValueError: raise VergeMLError("Invalid value for --random-seed.", "--random-seed must be an integer value.", ('value', 'random-seed')) cache_opts = ('none', 'disk', 'mem', 'disk-in', 'mem-in') if 'cache' in args: if args['cache'] not in cache_opts: raise VergeMLError("Invalid value for --cache.", "Must be one of: " + ", ".join(cache_opts), help_topic='cache') if 'device' in args: if not re.match(r"^(gpu:[0-9]+|gpu|cpu|auto)", args['device']): raise VergeMLError( "Invalid value for --device.", "Please specify a valid device, e.g gpu:0 or cpu.", help_topic='device') if 'device-memory' in args: if not re.match(r"(([1-9]?[0-9]|100)%|(0\.[0-9]+)|1\.0)|auto", args['device-memory']): raise VergeMLError( "Invalid value for --device-memory.", "Please specify device memory as a percentage, e.g. 100%.", help_topic='device') return args
def __call__(self, args, env): samples_dir = env.get('samples-dir') if not os.path.exists(samples_dir): raise VergeMLError( "samples dir does not exist: {}".format(samples_dir)) plugin = self.plugins.get('vergeml.download', args['dataset'])() plugin(args, env)
def _setup_ops(self): """Set up ops from env. """ # set up preprocessing operations self.ops = [] for conf in self.env.get('data.preprocess') or []: if isinstance(conf, str): conf = dict(name=conf) else: conf = conf.copy() # every preprocessing operations needs a name property name = conf.get('op', None) if not name: raise VergeMLError("Name missing in data.preprocess item.") del conf['op'] # instantiate the preprocessing plugin plugin = self.plugins.get('vergeml.operation', name) if not plugin: raise VergeMLError( "preprocess plugin not found: {}".format(name)) # check arguments intro = introspect(plugin) mandatory = set(intro.args[1:]).difference( set(intro.defaults.keys())) missing = set(mandatory).difference(conf.keys()) unknown = set(conf.keys()).difference(intro.args[1:]) # TODO type checking # report missing or unknown arguments if missing: msg = "preprocess operation {} is missing argument(s): {}" raise VergeMLError(msg.format(name, missing)) if unknown: msg = "preprocess operation {} received unknown argument(s): {}" raise VergeMLError(msg.format(name, unknown)) operation = plugin(**conf) self.ops.append(operation)
def _parse_opts(self, rest): longopts = [] shortopts = "" for opt in self.options: # Arguments and @names are dealt with elsewhere. if opt.is_at_option() or opt.is_argument_option(): continue # Prepare getopt syntax for long options. if opt.flag: assert opt.has_type(str, bool) longopts.append(opt.name) else: longopts.append(opt.name + "=") # Getopt for short options if opt.short: assert opt.short not in shortopts if opt.has_type(bool): shortopts += opt.short else: shortopts += opt.short + ":" try: # Run getopt. Returns parsed arguments and leftover. args, extra = getopt.getopt(rest, shortopts, longopts) except getopt.GetoptError as err: # in case of an error hint, display a nicer error message. if err.opt: cand_s = list(shortopts.replace(":", "")) cand_l = list(map(lambda o: o.rstrip("="), longopts)) suggestion = did_you_mean(cand_s + cand_l, err.opt) dashes = '-' if len(err.opt) == 1 else '--' raise VergeMLError(f"Invalid option {dashes}{err.opt}", suggestion, help_topic=self.name) else: raise VergeMLError(f"Invalid option.", help_topic=self.name) return args, extra
def get_custom_architecture(name, trainings_dir, output_layer): from keras.models import load_model, Model name = name.lstrip("@") model = load_model(os.path.join(trainings_dir, name, 'checkpoints', 'model.h5')) try: if isinstance(output_layer, int): layer = model.layers[output_layer] else: layer = model.get_layer(output_layer) except Exception: if isinstance(output_layer, int): raise VergeMLError(f'output-layer {output_layer} not found - model has only {len(model.layers)} layers.') else: candidates = list(map(lambda l: l.name, model.layers)) raise VergeMLError(f'output-layer named {output_layer} not found.', suggestion=did_you_mean(candidates, output_layer)) model = Model(inputs=model.input, outputs=layer.output) return model
def __init__(self, env): self.env = env self.fns = OrderedDict() for model_fn in Command.find_functions(env.model): cmd = Command.discover(model_fn) if cmd.kind == 'predict': self.fns[cmd.name] = (cmd, model_fn) if not len(self.fns): raise VergeMLError(f"@{env.AI} can't be run as a REST service.")
def get_imagenet_architecture(architecture, variant, size, alpha, output_layer, include_top=False, weights='imagenet'): from keras import applications, Model if include_top: assert output_layer == 'last' if size == 'auto': size = get_image_size(architecture, variant, size) shape = (size, size, 3) if architecture == 'densenet': if variant == 'auto': variant = 'densenet-121' if variant == 'densenet-121': model = applications.DenseNet121(weights=weights, include_top=include_top, input_shape=shape) elif variant == 'densenet-169': model = applications.DenseNet169(weights=weights, include_top=include_top, input_shape=shape) elif variant == 'densenet-201': model = applications.DenseNet201(weights=weights, include_top=include_top, input_shape=shape) elif architecture == 'inception-resnet-v2': model = applications.InceptionResNetV2(weights=weights, include_top=include_top, input_shape=shape) elif architecture == 'mobilenet': model = applications.MobileNet(weights=weights, include_top=include_top, input_shape=shape, alpha=alpha) elif architecture == 'mobilenet-v2': model = applications.MobileNetV2(weights=weights, include_top=include_top, input_shape=shape, alpha=alpha) elif architecture == 'nasnet': if variant == 'auto': variant = 'large' if variant == 'large': model = applications.NASNetLarge(weights=weights, include_top=include_top, input_shape=shape) else: model = applications.NASNetMobile(weights=weights, include_top=include_top, input_shape=shape) elif architecture == 'resnet-50': model = applications.ResNet50(weights=weights, include_top=include_top, input_shape=shape) elif architecture == 'vgg-16': model = applications.VGG16(weights=weights, include_top=include_top, input_shape=shape) elif architecture == 'vgg-19': model = applications.VGG19(weights=weights, include_top=include_top, input_shape=shape) elif architecture == 'xception': model = applications.Xception(weights=weights, include_top=include_top, input_shape=shape) elif architecture == 'inception-v3': model = applications.InceptionV3(weights=weights, include_top=include_top, input_shape=shape) if output_layer != 'last': try: if isinstance(output_layer, int): layer = model.layers[output_layer] else: layer = model.get_layer(output_layer) except Exception: raise VergeMLError('layer not found: {}'.format(output_layer)) model = Model(inputs=model.input, outputs=layer.output) return model
def __call__(self, args, env): import numpy as np import matplotlib.pyplot as plt from sklearn.metrics import average_precision_score from sklearn.metrics import precision_recall_curve from vergeml.plots import load_labels, load_predictions try: labels = load_labels(env) except FileNotFoundError: raise VergeMLError("Can't plot PR curve - not supported by model.") nclasses = len(labels) if args['class'] not in labels: raise VergeMLError("Unknown class: " + args['class']) try: y_test, y_score = load_predictions(env, nclasses) except FileNotFoundError: raise VergeMLError("Can't plot PR curve - not supported by model.") # From: # https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html#sphx-glr-auto-examples-model-selection-plot-precision-recall-py ix = labels.index(args['class']) y_test = y_test[:, ix].astype(np.int) y_score = y_score[:, ix] precision, recall, _ = precision_recall_curve(y_test, y_score) average_precision = average_precision_score(y_test, y_score) plt.step(recall, precision, color='b', alpha=0.2, where='post') plt.fill_between(recall, precision, alpha=0.2, color='b', step='post') plt.xlabel('Recall ({})'.format(args['class'])) plt.ylabel('Precision ({})'.format(args['class'])) plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Precision-Recall curve for @{0}: AP={1:0.2f}'.format( args['@AI'], average_precision)) plt.show()
def __call__(self, args, env): output_dir = args['<directory>'] if not os.path.exists(output_dir): os.makedirs(output_dir) res = _preview(env.data, output_dir, args['split'], args['num-samples']) if not res: raise VergeMLError("Command preprocess not supported.")
def transform(self, img, rng): width, height = img.size if width < self.width: raise VergeMLError("Can't crop sample with width {} to {}.".format( width, self.width)) if height < self.height: raise VergeMLError( "Can't crop sample with height {} to {}.".format( height, self.height)) if self.x or self.y: if width < self.width + self.x: raise VergeMLError( "Can't crop sample with width {} to {} from x {}.".format( width, self.width, self.x)) if height < self.height + self.y: raise VergeMLError( "Can't crop sample with height {} to {} from y {}.".format( height, self.height, self.y)) x = self.x y = self.y elif self.position == "top-left": x, y = 0, 0 elif self.position == "top-right": x, y = width - self.width, 0 elif self.position == "bottom-left": x, y = 0, height - self.height elif self.position == "bottom-right": x, y = width - self.width, height - self.height elif self.position == "center": x, y = math.floor(width / 2 - self.width / 2), math.floor(height / 2 - self.height / 2) params = x, y, x + self.width, y + self.height return img.crop(params)
def _setup_input(self): """Set up input from env. """ # get the name of the input plugin input_name = self.env.get('data.input.type') if not input_name: raise VergeMLError("data.input.type is not defined.") # get input configuration and merge base config input_conf = self.env.get('data.input').copy() input_conf.update(self._base_env_config()) # instantiate the input plugin input_class = self.plugins.get('vergeml.io', input_name) if not input_class: raise VergeMLError("input name not found: {}".format(input_name)) # TODO validate configuration and set defaults del input_conf['type'] self.input = input_class(input_conf)
def _wrap_call(cmd, fun, args, env): fn_args = deepcopy(args) config_name = cmd.name if env.current_command: # find the previous command and check for sub option sub_option = next( filter(lambda c: c.subcommand, env.current_command[0].options), None) if sub_option and args.get(sub_option.name) == cmd.name: # we are a sub command config_name = env.current_command[0].name + '.' + cmd.name # Free form commands deal with this manually if not cmd.free_form: # If existent, read settings from the config file config = parse_command(cmd, env.get(config_name)) # Set missing args from the config file for k, arg in config.items(): fn_args.setdefault(k, arg) # Set missing args from default for opt in cmd.options: if opt.name not in fn_args and (opt.default is not None or not opt.is_required()): fn_args[opt.name] = opt.default # When required arguments are missing now, raise an error for opt in cmd.options: if opt.is_required() and opt.name not in fn_args: # TODO show --name only when called via the command line raise VergeMLError(f'Missing argument --{opt.name}.', help_topic=cmd.name) # Let the environment know about the name of the command being # executed env.current_command = (cmd, fn_args) # Set up defaults for the command. This will also give models a chance # to alter the configuration of the environment before command # execution. env.set_defaults(cmd.name, fn_args) return fun(fn_args, env)
def _validate_preprocess(self, value): operations = [] for ix, config in enumerate(value): if not isinstance(config, dict): raise VergeMLError(f"Invalid entry in preprocess - must be key value pairs.", "Please fix the entry in the project file.", help_topic="preprocess", hint_type='key', hint_key='data.preprocess.' + str(ix)) elif not 'op' in config: raise VergeMLError(f"Invalid entry in preprocess - missing 'op' key.", "Please fix the entry in the project file.", help_topic="preprocess", hint_type='key', hint_key='data.preprocess.' + str(ix)) op_name = config['op'] plugin = self.plugins.get("vergeml.operation", op_name) if not plugin: raise VergeMLError(f"Invalid entry in preprocess - unknown operation '{op_name}'.", "Please fix the entry in the project file.", help_topic="preprocess", hint_type='value', hint_key="data.preprocess.{ix}.op") op = Operation.discover(plugin) options = list(filter(lambda o: o.name != 'op', op.options)) validator = ValidatePreprocess(options, op_name, self.plugins) config = deepcopy(config) del config['op'] try: apply_config(config, {None: validator}) except VergeMLError as err: err.hint_key = "data.preprocess.{ix}." + err.hint_key raise err validator.values['op'] = op_name operations.append(validator.values) dict_merge(self.values, dict(data=dict(preprocess=operations)))
def _load_trained_model(self): """Load a trained models hyperparameters and results """ train_mod_path = os.path.join(self._config['trainings-dir'], self.trained_model) if not os.path.exists(train_mod_path): raise VergeMLError("Trained model not found: {}".format(self.trained_model)) # Merge data.yaml data_file = os.path.join(self._config['trainings-dir'], self.trained_model, 'data.yaml') if not os.path.exists(data_file): raise VergeMLError("data.yaml file not found for {}: {}".format( self.trained_model, data_file)) doc = load_yaml_file(data_file, 'data file') self._config.update({ 'hyperparameters': doc.get('hyperparameters', {}), 'results': doc.get('results', {}), 'model': doc.get('model') }) self.results = _Results(self, data_file) return data_file