def create_model(embeddings, d_model, d_ff, num_heads, num_layers, rpr_k, d_k, activation, checkpoint_name, device): if len(rpr_k) == 0 or rpr_k[0] < 1: rpr_k = [None] else: rpr_k = listify(rpr_k) logger.info("Creating tied encoder decoder model") hps = { "dsz": d_model, "hsz": d_model, "d_ff": d_ff, "dropout": 0.0, "num_heads": num_heads, "layers": num_layers, "encoder_type": "transformer", "decoder_type": "transformer", "src_lengths_key": "x_lengths", "d_k": d_k, "activation": activation, "rpr_k": rpr_k } model = TiedEmbeddingsSeq2SeqModel({'x': embeddings}, None, **hps) if checkpoint_name.endswith('npz'): load_transformer_seq2seq_npz(model, checkpoint_name) else: model.load_state_dict( torch.load(checkpoint_name, map_location=torch.device(device))) print(model) return model
def register_model(cls, task, name=None): """Register a function as a plug-in""" if name is None: name = cls.__name__ names = listify(name) if task not in BASELINE_MODELS: BASELINE_MODELS[task] = {} if task not in BASELINE_LOADERS: BASELINE_LOADERS[task] = {} if hasattr(cls, 'create'): def create(*args, **kwargs): return cls.create(*args, **kwargs) else: def create(*args, **kwargs): return cls(*args, **kwargs) for alias in names: if alias in BASELINE_MODELS[task]: raise Exception( 'Error: attempt to re-define previously registered handler {} (old: {}, new: {}) for task {} in registry' .format(alias, BASELINE_MODELS[task], cls, task)) BASELINE_MODELS[task][alias] = create if hasattr(cls, 'load'): BASELINE_LOADERS[task][alias] = cls.load return cls
def fit(model_params, ts, vs, es=None, **kwargs): do_early_stopping = bool(kwargs.get('do_early_stopping', True)) epochs = int(kwargs.get('epochs', 20)) model_file = get_model_file('seq2seq', 'pytorch', kwargs.get('basedir')) num_loader_workers = int(kwargs.get('num_loader_workers', 0)) pin_memory = bool(kwargs.get('pin_memory', True)) ts = DataLoader(ts, num_workers=num_loader_workers, batch_size=None, pin_memory=pin_memory) vs = DataLoader(vs, batch_size=None, pin_memory=pin_memory) es = DataLoader(es, batch_size=None, pin_memory=pin_memory) if es is not None else None best_metric = 0 if do_early_stopping: early_stopping_metric = kwargs.get('early_stopping_metric', 'perplexity') early_stopping_cmp, best_metric = get_metric_cmp(early_stopping_metric, kwargs.get('early_stopping_cmp')) patience = kwargs.get('patience', epochs) logger.info('Doing early stopping on [%s] with patience [%d]', early_stopping_metric, patience) reporting_fns = listify(kwargs.get('reporting', [])) logger.info('reporting %s', reporting_fns) after_train_fn = kwargs.get('after_train_fn', None) trainer = create_trainer(model_params, **kwargs) last_improved = 0 for epoch in range(epochs): trainer.train(ts, reporting_fns) if after_train_fn is not None: after_train_fn(trainer.model) test_metrics = trainer.test(vs, reporting_fns, phase='Valid') if do_early_stopping is False: trainer.save(model_file) elif early_stopping_cmp(test_metrics[early_stopping_metric], best_metric): last_improved = epoch best_metric = test_metrics[early_stopping_metric] logger.info('New best %.3f', best_metric) trainer.save(model_file) elif (epoch - last_improved) > patience: logger.info('Stopping due to persistent failures to improve') break if do_early_stopping is True: logger.info('Best performance on %s: %.3f at epoch %d', early_stopping_metric, best_metric, last_improved) if es is not None: model = torch.load(model_file) trainer = Seq2SeqTrainerPyTorch(model, **kwargs) test_metrics = trainer.test(es, reporting_fns, phase='Test') return test_metrics
def register_remote(cls, name=None): """Register a class as a plug-in""" if name is None: name = cls.__name__ names = listify(name) for alias in names: if alias in BASELINE_REMOTES: raise Exception( 'Error: attempt to re-define previously registered hander {} (old: {}, new: {}) in registry' .format(alias, BASELINE_REMOTES, cls)) BASELINE_REMOTES[alias] = cls return cls
def init_stacked(self, input_dim: int, **kwargs) -> BaseLayer: """Produce a stacking operation that will be used in the model :param input_dim: The input dimension size :param kwargs: See below :keyword arguments: * *hsz* (``list``), defaults to nothing, in which case this function is pass-through * *stacked_name* (``str``) Optional override to stacking name :return: A stacking operation (or None) """ hszs = listify(kwargs.get('hsz', [])) if not hszs: return PassThru(input_dim) name = kwargs.get('stacked_name') return DenseStack(input_dim, hszs, pdrop_value=self.pdrop_value, name=name)
def create_lr_scheduler(**kwargs): """Create a learning rate scheduler. :Keyword Arguments: * *lr_scheduler_type* `str` or `list` The name of the learning rate scheduler if list then the first scheduler should be a warmup scheduler. """ sched_type = kwargs.get("lr_scheduler_type") if sched_type is None: return None sched_type = listify(sched_type) if len(sched_type) == 2: warm = MEAD_LAYERS_LR_SCHEDULERS.get(sched_type[0])(**kwargs) assert isinstance(warm, WarmupLearningRateScheduler ), "First LR Scheduler must be a warmup scheduler." rest = MEAD_LAYERS_LR_SCHEDULERS.get(sched_type[1])(**kwargs) return MEAD_LAYERS_LR_SCHEDULERS.get("composite")(warm=warm, rest=rest, **kwargs) Constructor = MEAD_LAYERS_LR_SCHEDULERS.get(sched_type[0]) return Constructor(**kwargs)
def fit(model_params, ts, vs, es, **kwargs): """ Train a classifier using PyTorch :param model_params: The model to train :param ts: A training data set :param vs: A validation data set :param es: A test data set, can be None :param kwargs: See below :Keyword Arguments: * *do_early_stopping* (``bool``) -- Stop after eval data is not improving. Default to True * *epochs* (``int``) -- how many epochs. Default to 20 * *outfile* -- Model output file, defaults to classifier-model.pyth * *patience* -- How many epochs where evaluation is no longer improving before we give up * *reporting* -- Callbacks which may be used on reporting updates * *optim* -- Optimizer to use, defaults to `sgd` * *eta, lr* (``float``) -- Learning rate, defaults to 0.01 * *mom* (``float``) -- Momentum (SGD only), defaults to 0.9 if optim is `sgd` :return: """ do_early_stopping = bool(kwargs.get('do_early_stopping', True)) verbose = kwargs.get( 'verbose', { 'console': kwargs.get('verbose_console', False), 'file': kwargs.get('verbose_file', None) }) epochs = int(kwargs.get('epochs', 20)) model_file = get_model_file('classify', 'pytorch', kwargs.get('basedir')) output = kwargs.get('output') txts = kwargs.get('txts') num_loader_workers = int(kwargs.get('num_loader_workers', 0)) pin_memory = bool(kwargs.get('pin_memory', True)) if not isinstance(ts, DataLoader): ts = DataLoader(ts, num_workers=num_loader_workers, batch_size=None, pin_memory=pin_memory) if not isinstance(vs, DataLoader): vs = DataLoader(vs, batch_size=None, pin_memory=pin_memory) if es and not isinstance(es, DataLoader): es = DataLoader(es, batch_size=None, pin_memory=pin_memory) best_metric = 0 if do_early_stopping: early_stopping_metric = kwargs.get('early_stopping_metric', 'acc') early_stopping_cmp, best_metric = get_metric_cmp( early_stopping_metric, kwargs.get('early_stopping_cmp')) patience = kwargs.get('patience', epochs) logger.info('Doing early stopping on [%s] with patience [%d]', early_stopping_metric, patience) reporting_fns = listify(kwargs.get('reporting', [])) logger.info('reporting %s', reporting_fns) trainer = create_trainer(model_params, **kwargs) last_improved = 0 for epoch in range(epochs): trainer.train(ts, reporting_fns) test_metrics = trainer.test(vs, reporting_fns) if do_early_stopping is False: trainer.save(model_file) elif early_stopping_cmp(test_metrics[early_stopping_metric], best_metric): last_improved = epoch best_metric = test_metrics[early_stopping_metric] logger.info('New best %.3f', best_metric) trainer.save(model_file) elif (epoch - last_improved) > patience: logger.info('Stopping due to persistent failures to improve') break if do_early_stopping is True: logger.info('Best performance on %s: %.3f at epoch %d', early_stopping_metric, best_metric, last_improved) if es is not None: logger.info('Reloading best checkpoint') model = torch.load(model_file) trainer = create_trainer(model, **kwargs) test_metrics = trainer.test(es, reporting_fns, phase='Test', verbose=verbose, output=output, txts=txts) return test_metrics
def fit_eager_distributed(model_params, ts, vs, es=None, **kwargs): """ Train an language model using TensorFlow with `tf.dataset`. This is the default behavior for training. :param model_params: The model (or parameters to create the model) to train :param ts: A training data set :param vs: A validation data set :param es: A test data set, can be None :param kwargs: See below :Keyword Arguments: * *do_early_stopping* (``bool``) -- Stop after evaluation data is no longer improving. Defaults to True * *verbose* (`dict`) A dictionary containing `console` boolean and `file` name if on * *epochs* (``int``) -- how many epochs. Default to 20 * *outfile* -- Model output file, defaults to classifier-model.pyth * *patience* -- How many epochs where evaluation is no longer improving before we give up * *reporting* -- Callbacks which may be used on reporting updates * *nsteps* (`int`) -- If we should report every n-steps, this should be passed * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e * *clip* (`int`) -- If we are doing gradient clipping, what value to use * *optim* (`str`) -- The name of the optimizer we are using * *lr* (`float`) -- The learning rate we are using * *mom* (`float`) -- If we are using SGD, what value to use for momentum * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9` * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999` * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8 :return: None """ epochs = int(kwargs.get('epochs', 5)) patience = int(kwargs.get('patience', epochs)) model_file = get_model_file('lm', 'tf', kwargs.get('basedir')) do_early_stopping = bool(kwargs.get('do_early_stopping', True)) best_metric = 0 if do_early_stopping: early_stopping_metric = kwargs.get('early_stopping_metric', 'avg_loss') early_stopping_cmp, best_metric = get_metric_cmp( early_stopping_metric, kwargs.get('early_stopping_cmp')) patience = kwargs.get('patience', epochs) print('Doing early stopping on [%s] with patience [%d]' % (early_stopping_metric, patience)) reporting_fns = listify(kwargs.get('reporting', [])) print('reporting', reporting_fns) batchsz = kwargs['batchsz'] test_batchsz = kwargs.get('test_batchsz', batchsz) tgt_key = model_params.get('tgt_key') train_dataset = tf.data.Dataset.from_tensor_slices(to_tensors(ts)) train_dataset = train_dataset.shuffle(buffer_size=SHUF_BUF_SZ) train_dataset = train_dataset.batch(batchsz, drop_remainder=True) train_dataset = train_dataset.prefetch(NUM_PREFETCH) valid_dataset = tf.data.Dataset.from_tensor_slices(to_tensors(vs)) valid_dataset = valid_dataset.batch(batchsz, drop_remainder=True) valid_dataset = valid_dataset.prefetch(NUM_PREFETCH) trainer = LanguageModelTrainerDistributedTf(model_params, **kwargs) train_dataset = trainer.distribute(train_dataset) valid_dataset = trainer.distribute(valid_dataset) last_improved = 0 SET_TRAIN_FLAG(True) for epoch in range(epochs): trainer.train(train_dataset, reporting_fns, steps=len(ts)) test_metrics = trainer.test(valid_dataset, reporting_fns, phase='Valid', steps=len(vs)) if do_early_stopping is False: trainer.checkpoint() trainer.model.save(model_file) elif early_stopping_cmp(test_metrics[early_stopping_metric], best_metric): last_improved = epoch best_metric = test_metrics[early_stopping_metric] print('New best %.3f' % best_metric) trainer.checkpoint() trainer.model.save(model_file) elif (epoch - last_improved) > patience: print('Stopping due to persistent failures to improve') break if do_early_stopping is True: print('Best performance on %s: %.3f at epoch %d' % (early_stopping_metric, best_metric, last_improved)) if es is not None: print('Reloading best checkpoint') trainer.recover_last_checkpoint() trainer.strategy = tf.distribute.OneDeviceStrategy('/device:GPU:0') test_dataset = tf.data.Dataset.from_tensor_slices(to_tensors(es)) test_dataset = test_dataset.batch(test_batchsz, drop_remainder=False) test_dataset = test_dataset.prefetch(NUM_PREFETCH) test_dataset = trainer.distribute(test_dataset) trainer.test(test_dataset, reporting_fns, phase='Test', steps=len(es))
def fit(model_params, ts, vs, es, **kwargs): """ Train a classifier using TensorFlow with a `feed_dict`. This is the previous default behavior for training. To use this, you need to pass `fit_func: feed_dict` in your MEAD config :param model_params: The model to train :param ts: A training data set :param vs: A validation data set :param es: A test data set, can be None :param kwargs: See below :Keyword Arguments: * *do_early_stopping* (``bool``) -- Stop after evaluation data is no longer improving. Defaults to True * *verbose* (`dict`) A dictionary containing `console` boolean and `file` name if on * *epochs* (``int``) -- how many epochs. Default to 20 * *outfile* -- Model output file, defaults to classifier-model.pyth * *patience* -- How many epochs where evaluation is no longer improving before we give up * *reporting* -- Callbacks which may be used on reporting updates * *nsteps* (`int`) -- If we should report every n-steps, this should be passed * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e * *clip* (`int`) -- If we are doing gradient clipping, what value to use * *optim* (`str`) -- The name of the optimizer we are using * *lr* (`float`) -- The learning rate we are using * *mom* (`float`) -- If we are using SGD, what value to use for momentum * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9` * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999` * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8 :return: None """ epochs = int(kwargs.get('epochs', 5)) patience = int(kwargs.get('patience', epochs)) conll_output = kwargs.get('conll_output', None) span_type = kwargs.get('span_type', 'iob') txts = kwargs.get('txts', None) model_file = get_model_file('tagger', 'tf', kwargs.get('basedir')) TRAIN_FLAG() trainer = create_trainer(model_params, **kwargs) do_early_stopping = bool(kwargs.get('do_early_stopping', True)) verbose = bool(kwargs.get('verbose', False)) best_metric = 0 if do_early_stopping: early_stopping_metric = kwargs.get('early_stopping_metric', 'acc') early_stopping_cmp, best_metric = get_metric_cmp( early_stopping_metric, kwargs.get('early_stopping_cmp')) patience = kwargs.get('patience', epochs) print('Doing early stopping on [%s] with patience [%d]' % (early_stopping_metric, patience)) reporting_fns = listify(kwargs.get('reporting', [])) print('reporting', reporting_fns) last_improved = 0 for epoch in range(epochs): trainer.train(ts, reporting_fns) test_metrics = trainer.test(vs, reporting_fns, phase='Valid') if do_early_stopping is False: trainer.checkpoint() trainer.model.save(model_file) elif early_stopping_cmp(test_metrics[early_stopping_metric], best_metric): last_improved = epoch best_metric = test_metrics[early_stopping_metric] print('New best %.3f' % best_metric) trainer.checkpoint() trainer.model.save(model_file) elif (epoch - last_improved) > patience: print('Stopping due to persistent failures to improve') break if do_early_stopping is True: print('Best performance on %s: %.3f at epoch %d' % (early_stopping_metric, best_metric, last_improved)) if es is not None: trainer.recover_last_checkpoint() # What to do about overloading this?? evaluator = TaggerEvaluatorTf(trainer.model, span_type, verbose) timer = Timer() test_metrics = evaluator.test(es, conll_output=conll_output, txts=txts) duration = timer.elapsed() for reporting in reporting_fns: reporting(test_metrics, 0, 'Test') trainer.log.debug({'phase': 'Test', 'time': duration})
def fit(model_params, ts, vs, es=None, **kwargs): """ Train an language model using TensorFlow with a `feed_dict`. :param model_params: The model (or parameters to create the model) to train :param ts: A training data set :param vs: A validation data set :param es: A test data set, can be None :param kwargs: See below :Keyword Arguments: * *do_early_stopping* (``bool``) -- Stop after evaluation data is no longer improving. Defaults to True * *verbose* (`dict`) A dictionary containing `console` boolean and `file` name if on * *epochs* (``int``) -- how many epochs. Default to 5 * *outfile* -- Model output file * *patience* -- How many epochs where evaluation is no longer improving before we give up * *reporting* -- Callbacks which may be used on reporting updates * *nsteps* (`int`) -- If we should report every n-steps, this should be passed * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e * *clip* (`int`) -- If we are doing gradient clipping, what value to use * *optim* (`str`) -- The name of the optimizer we are using * *lr* (`float`) -- The learning rate we are using * *mom* (`float`) -- If we are using SGD, what value to use for momentum * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9` * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999` * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8 * *after_train_fn* (`func`) -- A callback to fire after ever epoch of training :return: None """ epochs = int(kwargs['epochs']) if 'epochs' in kwargs else 5 patience = int(kwargs['patience']) if 'patience' in kwargs else epochs model_file = get_model_file('lm', 'tf', kwargs.get('basedir')) after_train_fn = kwargs['after_train_fn'] if 'after_train_fn' in kwargs else None trainer = create_trainer(model_params, **kwargs) do_early_stopping = bool(kwargs.get('do_early_stopping', True)) best_metric = 1000 if do_early_stopping: early_stopping_metric = kwargs.get('early_stopping_metric', 'avg_loss') early_stopping_cmp, best_metric = get_metric_cmp(early_stopping_metric, kwargs.get('early_stopping_cmp')) patience = kwargs.get('patience', epochs) print('Doing early stopping on [%s] with patience [%d]' % (early_stopping_metric, patience)) reporting_fns = listify(kwargs.get('reporting', [])) print('reporting', reporting_fns) last_improved = 0 for epoch in range(epochs): trainer.train(ts, reporting_fns) if after_train_fn is not None: after_train_fn(trainer.model) test_metrics = trainer.test(vs, reporting_fns, phase='Valid') if do_early_stopping is False: trainer.checkpoint() trainer.model.save(model_file) elif early_stopping_cmp(test_metrics[early_stopping_metric], best_metric): last_improved = epoch best_metric = test_metrics[early_stopping_metric] print('New best %.3f' % best_metric) trainer.checkpoint() trainer.model.save(model_file) elif (epoch - last_improved) > patience: print('Stopping due to persistent failures to improve') break if do_early_stopping is True: print('Best performance on %s: %.3f at epoch %d' % (early_stopping_metric, best_metric, last_improved)) if es is not None: trainer.recover_last_checkpoint() trainer.test(es, reporting_fns, phase='Test')
def fit_datasets(model_params, ts, vs, es=None, **kwargs): """ Train a tagger using TensorFlow with `tf.dataset`. This is the default behavior for training. :param model_params: The model (or parameters to create the model) to train :param ts: A training data set :param vs: A validation data set :param es: A test data set, can be None :param kwargs: See below :Keyword Arguments: * *do_early_stopping* (``bool``) -- Stop after evaluation data is no longer improving. Defaults to True * *verbose* (`dict`) A dictionary containing `console` boolean and `file` name if on * *epochs* (``int``) -- how many epochs. Default to 20 * *outfile* -- Model output file, defaults to classifier-model.pyth * *patience* -- How many epochs where evaluation is no longer improving before we give up * *reporting* -- Callbacks which may be used on reporting updates * *nsteps* (`int`) -- If we should report every n-steps, this should be passed * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e * *clip* (`int`) -- If we are doing gradient clipping, what value to use * *optim* (`str`) -- The name of the optimizer we are using * *lr* (`float`) -- The learning rate we are using * *mom* (`float`) -- If we are using SGD, what value to use for momentum * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9` * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999` * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8 :return: None """ conll_output = kwargs.get('conll_output', None) span_type = kwargs.get('span_type', 'iob') txts = kwargs.get('txts', None) model_file = get_model_file('tagger', 'tf', kwargs.get('basedir')) do_early_stopping = bool(kwargs.get('do_early_stopping', True)) verbose = kwargs.get('verbose', {'console': kwargs.get('verbose_console', False), 'file': kwargs.get('verbose_file', None)}) epochs = int(kwargs.get('epochs', 20)) batchsz = kwargs['batchsz'] ## First, make tf.datasets for ts, vs and es # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/distribute/README.md # effective_batch_sz = args.batchsz*args.gpus test_batchsz = kwargs.get('test_batchsz', batchsz) # This is a little awkward: lengths_key = model_params.get('lengths_key') train_dataset = tf.data.Dataset.from_tensor_slices(to_tensors(ts, lengths_key)) train_dataset = train_dataset.shuffle(buffer_size=SHUF_BUF_SZ) train_dataset = train_dataset.batch(batchsz, drop_remainder=False) train_dataset = train_dataset.repeat(epochs + 1) train_dataset = train_dataset.prefetch(NUM_PREFETCH) valid_dataset = tf.data.Dataset.from_tensor_slices(to_tensors(vs, lengths_key)) valid_dataset = valid_dataset.batch(batchsz, drop_remainder=False) valid_dataset = valid_dataset.repeat(epochs + 1) valid_dataset = valid_dataset.prefetch(NUM_PREFETCH) iter = tf.compat.v1.data.Iterator.from_structure(tf.compat.v1.data.get_output_types(train_dataset), tf.compat.v1.data.get_output_shapes(train_dataset)) features, y = iter.get_next() # Add features to the model params model_params.update(features) model_params['y'] = y # create the initialisation operations train_init_op = iter.make_initializer(train_dataset) valid_init_op = iter.make_initializer(valid_dataset) best_metric = 0 if do_early_stopping: early_stopping_metric = kwargs.get('early_stopping_metric', 'acc') early_stopping_cmp, best_metric = get_metric_cmp(early_stopping_metric, kwargs.get('early_stopping_cmp')) patience = kwargs.get('patience', epochs) print('Doing early stopping on [%s] with patience [%d]' % (early_stopping_metric, patience)) reporting_fns = listify(kwargs.get('reporting', [])) print('reporting', reporting_fns) TRAIN_FLAG() trainer = create_trainer(model_params, **kwargs) last_improved = 0 for epoch in range(epochs): trainer.sess.run(train_init_op) trainer.train(ts, reporting_fns) trainer.sess.run(valid_init_op) test_metrics = trainer.test(vs, reporting_fns, phase='Valid') if do_early_stopping is False: trainer.checkpoint() trainer.model.save(model_file) elif early_stopping_cmp(test_metrics[early_stopping_metric], best_metric): last_improved = epoch best_metric = test_metrics[early_stopping_metric] print('New best %.3f' % best_metric) trainer.checkpoint() trainer.model.save(model_file) elif (epoch - last_improved) > patience: print('Stopping due to persistent failures to improve') break if do_early_stopping is True: print('Best performance on %s: %.3f at epoch %d' % (early_stopping_metric, best_metric, last_improved)) if es is not None: print('Reloading best checkpoint') trainer.recover_last_checkpoint() test_dataset = tf.data.Dataset.from_tensor_slices(to_tensors(es, lengths_key)) test_dataset = test_dataset.batch(test_batchsz, drop_remainder=False) test_dataset = test_dataset.repeat(epochs + 1) test_dataset = test_dataset.prefetch(NUM_PREFETCH) test_init_op = iter.make_initializer(test_dataset) trainer.sess.run(test_init_op) # What to do about overloading this?? evaluator = TaggerEvaluatorTf(trainer.model, span_type, verbose) start = time.time() test_metrics = evaluator.test(es, conll_output=conll_output, txts=txts) duration = time.time() - start for reporting in reporting_fns: reporting(test_metrics, 0, 'Test') trainer.log.debug({'phase': 'Test', 'time': duration})
def load_embeddings_overlay(global_embeddings_settings, embeddings_section, vocab, data_download_cache=DEFAULT_DATA_CACHE, name=None): """Creates a set of arbitrary sub-graph, DL-framework-specific embeddings by delegating to wired sub-module. As part of this process, we take in an index of embeddings by name, a ``dict`` of ``Counter`` objects (keyed by feature name), containing the number of times each token has been seen, and a `features` list which is a sub-section of the mead config containing the `embeddings` section for each feature. This method's job is to either create a sub-graph from a pretrained model, or to create a new random initialized sub-graph, taking into account the input vocabulary counters. The embeddings model has control to determine the actual word indices and sub-graph for the embeddings, both of which are returned from this method. If some sort of feature selection is performed, such as low count removal that would be required via the delegated methods :param global_embeddings_settings: The embeddings index passed to mead driver :param vocabs: A set of known ``Counter``s for each vocabulary consisting of a token key and count for each :param features: The `features` sub-section of the mead config :return: Returns a ``tuple`` comprised of a ``dict`` of (`feature name`, `Embedding`) and an updated vocab """ # Get the label out of the embeddings section in the features block of mead config embed_label = embeddings_section.get('label', embeddings_section.get('labels')) if name is None: name = embed_label # Get the type of embedding out of the embeddings section in the features block of mead config embed_type = embeddings_section.get('type', 'default') is_stacked = is_sequence(embed_label) if is_stacked: if embed_type != 'default': logger.warning( "You have requested a stack of pretrained embeddings but didnt request 'default' or representation" ) # Backwards compat, copy from main block if not present locally embeddings_section['unif'] = embeddings_section.get('unif', 0.1) # Backwards compat, copy from main block if not present locally embeddings_section['keep_unused'] = embeddings_section.get( 'keep_unused', False) # Overlay any backend parameters # Also, if we are in eager mode, we might have to place the embeddings explicitly on the CPU embeddings_section['cpu_placement'] = bool( embeddings_section.get('cpu_placement', False)) if embed_label is not None: # Allow local overrides to uniform initializer embed_labels = listify(embed_label) embed_files = [] for embed_label in embed_labels: embeddings_global_config_i = global_embeddings_settings[ embed_label] if 'type' in embeddings_global_config_i: embed_type_i = embeddings_global_config_i['type'] embed_type = embed_type_i if embed_type_i != 'default' and is_stacked: raise Exception( "Stacking embeddings only works for 'default' pretrained word embeddings" ) embed_file = embeddings_global_config_i.get('file') unzip_file = embeddings_global_config_i.get('unzip', True) embed_dsz = embeddings_global_config_i['dsz'] embed_sha1 = embeddings_global_config_i.get('sha1') # Should we grab vocab here too? embed_model = embeddings_global_config_i.get('model', {}) if 'dsz' not in embed_model and not is_stacked: embed_model['dsz'] = embed_dsz embeddings_section = {**embed_model, **embeddings_section} try: # We arent necessarily going to get an `embed_file`. For instance, using the HuggingFace # models in the Hub addon, the `embed_file` should be downloaded using HuggingFace's library, # not by us. In this case we want it to be None and we dont want to download it if embed_file: embed_file = EmbeddingDownloader( embed_file, embed_dsz, embed_sha1, data_download_cache, unzip_file=unzip_file).download() embed_files.append(embed_file) else: embed_files.append(None) except Exception as e: if is_stacked: raise e logger.warning( f"We were not able to download {embed_file}, passing to the addon" ) embed_files.append(embed_file) # If we have stacked embeddings (which only works with `default` model, we need to pass the list # If not, grab the first item embed_file = embed_files if is_stacked else embed_files[0] embedding_bundle = load_embeddings( name, embed_file=embed_file, known_vocab=vocab, embed_type=embed_type, data_download_cache=data_download_cache, **embeddings_section) else: # if there is no label given, assume we need random initialization vectors dsz = embeddings_section.pop('dsz') embedding_bundle = load_embeddings( name, dsz=dsz, known_vocab=vocab, embed_type=embed_type, data_download_cache=data_download_cache, **embeddings_section) return embedding_bundle
def load_embeddings(name, **kwargs): """This method negotiates loading an embeddings sub-graph AND a corresponding vocabulary (lookup from word to int) Embeddings and their addons may be downloaded from an http `GET` either via raw URL or using hub notation (hub:v1:embeddings/hub:v1:addons) This function behaves differently depending on its keyword arguments and the `embed_type`. If the registered embeddings class contains a load method on it and we are given an `embed_file`, we will assume that we need to load that file, and that the embeddings object wants its own load function used for that. This would be typical, e.g, for a user-defined sub-graph LM. For cases where no `embed_file` is provided and there is a `create` method on this class, we assume that the user wants us to build a VSM (`baseline.embeddings.PretrainedEmbeddingsModel`) ourselves, and call their create function, which will take in this VSM. The VSM is then used to provide the vocabulary back, and the `create` function invokes the class constructor with the sub-parts of VSM required to build the graph. If there is no create method provided, and there is no load function provided, we simply invoke the registered embeddings' constructor with the args, and assume there is a `get_vocab()` method on the provided implementation :param name: A unique string name for these embeddings :param kwargs: :Keyword Arguments: * *embed_type* The key identifying the embedding type in the registry :return: """ embed_type = kwargs.pop("embed_type", "default") # Dynamically load a module if its needed for module in listify(kwargs.get('module', kwargs.get('modules', []))): import_user_module(module, kwargs.get('data_download_cache')) embeddings_cls = MEAD_LAYERS_EMBEDDINGS[embed_type] filename = kwargs.get("embed_file") # If the embedding model has a load function, defer all the work to that. Basically just pass the kwargs in # and let it do its magic if hasattr(embeddings_cls, "load") and filename is not None: model = embeddings_cls.load(filename, **kwargs) return {"embeddings": model, "vocab": model.get_vocab()} # If there isnt a load function, there must be a create() function where the first arg is a type of # EmbeddingsModel elif hasattr(embeddings_cls, "create"): unif = kwargs.pop("unif", 0.1) known_vocab = kwargs.pop("known_vocab", None) keep_unused = kwargs.pop("keep_unused", False) normalize = kwargs.pop("normalized", False) preserve_vocab_indices = bool( kwargs.get('preserve_vocab_indices', False)) # if there is no filename, use random-init model if filename is None: dsz = kwargs.pop("dsz") model = RandomInitVecModel(dsz, known_vocab=known_vocab, unif_weight=unif, counts=not preserve_vocab_indices) # If there, is use the PretrainedEmbeddingsModel loader else: if is_sequence(filename) or preserve_vocab_indices: model = PretrainedEmbeddingsStack( listify(filename), known_vocab=known_vocab, normalize=normalize, counts=not preserve_vocab_indices, **kwargs) else: model = PretrainedEmbeddingsModel( filename, known_vocab=known_vocab, unif_weight=unif, keep_unused=keep_unused, normalize=normalize, **kwargs, ) # Then call create(model, name, **kwargs) return { "embeddings": embeddings_cls.create(model, name, **kwargs), "vocab": model.get_vocab() } # If we dont have a load function, but filename is none, we should just instantiate the class model = embeddings_cls(name, **kwargs) return {"embeddings": model, "vocab": model.get_vocab()}
def fit(model, ts, vs, es, **kwargs): epochs = int(kwargs['epochs']) if 'epochs' in kwargs else 5 patience = int(kwargs['patience']) if 'patience' in kwargs else epochs do_early_stopping = bool(kwargs.get('do_early_stopping', True)) model_file = get_model_file('lm', 'pytorch', kwargs.get('basedir')) num_loader_workers = int(kwargs.get('num_loader_workers', 0)) pin_memory = bool(kwargs.get('pin_memory', True)) if not isinstance(ts, DataLoader): ts = DataLoader(ts, num_workers=num_loader_workers, batch_size=None, pin_memory=pin_memory) if not isinstance(vs, DataLoader): vs = DataLoader(vs, batch_size=None, pin_memory=pin_memory) if es and not isinstance(es, DataLoader): es = DataLoader(es, batch_size=None, pin_memory=pin_memory) best_metric = 10000 if do_early_stopping: early_stopping_metric = kwargs.get('early_stopping_metric', 'avg_loss') early_stopping_cmp, best_metric = get_metric_cmp( early_stopping_metric, kwargs.get('early_stopping_cmp')) patience = kwargs.get('patience', epochs) logger.info('Doing early stopping on [%s] with patience [%d]', early_stopping_metric, patience) reporting_fns = listify(kwargs.get('reporting', [])) logger.info('reporting %s', reporting_fns) after_train_fn = kwargs.get('after_train_fn', None) trainer = create_trainer(model, **kwargs) last_improved = 0 for epoch in range(epochs): trainer.train(ts, reporting_fns) if after_train_fn is not None: after_train_fn(model) test_metrics = trainer.test(vs, reporting_fns, phase='Valid') if do_early_stopping is False: trainer.save(model_file) elif early_stopping_cmp(test_metrics[early_stopping_metric], best_metric): last_improved = epoch best_metric = test_metrics[early_stopping_metric] logger.info('New best %.3f', best_metric) trainer.save(model_file) elif (epoch - last_improved) > patience: logger.info('Stopping due to persistent failures to improve') break if do_early_stopping is True: logger.info('Best performance on %s: %.3f at epoch %d', early_stopping_metric, best_metric, last_improved) if es is not None: logger.info('Reloading best checkpoint') model = torch.load(model_file) trainer = create_trainer(model, **kwargs) test_metrics = trainer.test(es, reporting_fns, phase='Test') return test_metrics