def get_export_params( config, output_dir=None, project=None, name=None, model_version=None, exporter_type=None, return_labels=None, is_remote=None, ): """Combine export parameters from the config file and cli arguments. :param config: `dict` The export block of the config. :param output_dir: `str` The base of export paths. (defaults to './models') :param project: `str` The name of the project this model is for. :param name: `str` The name of this model (often the use case for it, `ner`, `intent` etc). :param model_version: `str` The version of this model. :param exporter_type: `str` The name of the exporter to use (defaults to 'default') :param return_labels: `str` Should labels be returned? (defaults to False) :param is_remote: `str` Should the bundle be split into client and server dirs. :returns: `Tuple[str, str, str, str, str, bool, bool]` The output_dir, project, name, model_version, exporter_type, return_labels, and remote """ project = project if project is not None else config.get('project') name = name if name is not None else config.get('name') output_dir = output_dir if output_dir is not None else config.get('output_dir', './models') output_dir = os.path.expanduser(output_dir) model_version = model_version if model_version is not None else config.get('model_version') exporter_type = exporter_type if exporter_type is not None else config.get('type', config.get('exporter_type', 'default')) return_labels = return_labels if return_labels is not None else config.get('return_labels', False) return_labels = str2bool(return_labels) is_remote = is_remote if is_remote is not None else config.get('is_remote', True) is_remote = str2bool(is_remote) return output_dir, project, name, model_version, exporter_type, return_labels, is_remote
def _infer_type_or_str(x): try: return str2bool(x) except: try: return float(x) except ValueError: return x
def _infer_type_or_str(x): try: return str2bool(x) except: try: return float(x) except ValueError: return x
def test_get_export_config(): config = { 'project': rand_str(), 'name': rand_str(), 'output_dir': os.path.join(rand_str(), rand_str()), 'model_version': str(random.randint(1, 5)), 'exporter_type': rand_str(), 'return_labels': random.choice(['true', 'false']), 'is_remote': random.choice(['true', 'false']), } o, p, n, v, e, l, r = get_export_params(config) assert o == config['output_dir'] assert p == config['project'] assert n == config['name'] assert v == config['model_version'] assert e == config['exporter_type'] assert l == str2bool(config['return_labels']) assert r == str2bool(config['is_remote'])
def test(): in_ = make_data() c = make_data() config = { 'output_dir': c.dir, 'project': c.proj, 'name': c.name, 'model_version': c.version, 'exporter_type': rand_str(), 'return_labels': random.choice(['true', 'false']), 'is_remote': random.choice(['true', 'false']), } in_output, gold_output = choice(in_.dir, config, 'output_dir') gold_output = './models' if gold_output is None else gold_output in_project, gold_project = choice(in_.proj, config, 'project') in_name, gold_name = choice(in_.name, config, 'name') in_version, gold_version = choice(in_.version, config, 'model_version') in_export, gold_export = choice(rand_str(), config, 'exporter_type') gold_export = gold_export if gold_export is not None else 'default' in_labels, gold_labels = choice(random.choice(['true', 'false']), config, 'return_labels') gold_labels = str2bool( gold_labels) if gold_labels is not None else False in_remote, gold_remote = choice(random.choice(['true', 'false']), config, 'is_remote') gold_remote = str2bool( gold_remote) if gold_remote is not None else True o, p, n, v, e, l, r = get_export_params( config, in_output, in_project, in_name, in_version, in_export, in_labels, in_remote, ) assert o == gold_output assert p == gold_project assert n == gold_name assert v == gold_version assert e == gold_export assert l == gold_labels assert r == gold_remote
def load(self, task_name=None): if self.name == 'tf': from eight_mile.tf.layers import set_tf_log_level, set_tf_eager_debug set_tf_log_level(os.getenv("MEAD_TF_LOG_LEVEL", "ERROR")) set_tf_eager_debug(str2bool(os.getenv("MEAD_TF_EAGER_DEBUG", "FALSE"))) base_pkg_name = 'baseline.{}'.format(self.name) # Backends may not be downloaded to the cache, they must exist locally mod = import_user_module(base_pkg_name) import_user_module('baseline.{}.optz'.format(self.name)) import_user_module('baseline.{}.embeddings'.format(self.name)) import_user_module('mead.{}.exporters'.format(self.name)) if task_name is not None: try: import_user_module(f'{base_pkg_name}.{task_name}') except: logger.warning(f"No module found [{base_pkg_name}.{task_name}]") self.transition_mask = mod.transition_mask
def train(self, checkpoint=None): """This method delegates to several sub-hooks in order to complete training. 1. call `_load_dataset()` which initializes the `DataFeed` fields of this class 2. call `baseline.save_vectorizers()` which write out the bound `vectorizers` fields to a file in the `basedir` 3. call `baseline.train.fit()` which executes the training procedure and yields a saved model 4. call `baseline.zip_files()` which zips all files in the `basedir` with the same `PID` as this process 5. call `_close_reporting_hooks()` which lets the reporting hooks know that the job is finished :return: Nothing """ self._reorganize_params() baseline.save_vectorizers(self.get_basedir(), self.vectorizers) self._load_dataset() model_params = self.config_params['model'] model_params['features'] = self._get_features() model_params['labels'] = self._get_labels() model_params['task'] = self.task_name() train_params = self.config_params['train'] train_params['checkpoint'] = checkpoint baseline.train.fit(model_params, self.train_data, self.valid_data, self.test_data, **train_params) if str2bool(self.config_params.get('zip_checkpoint', True)): baseline.zip_files(self.get_basedir()) self._close_reporting_hooks()