def run(ModelClass, output_dir, pipeline_opts, model_opts): """ Runs the prediction pipeline. Loads the model and necessary files and creates the model's predictions for all data received. Args: ModelClass (type): Python Type of the Model to train output_dir: Directory to save predictions pipeline_options (Namespace): Generic predict Options batch_size: Max batch size for predicting model_options (Namespace): Model Specific options Returns: Predictions (dict): Dictionary with format {'target':predictions} """ model_name = getattr(ModelClass, "title", ModelClass.__name__) logger.info("Predict with the {} model".format(model_name)) if ModelClass == LinearWordQEClassifier: load_vocab = None model = LinearWordQEClassifier( evaluation_metric=model_opts.evaluation_metric) model.load(pipeline_opts.load_model) # predicter = LinearTester(model) else: load_vocab = pipeline_opts.load_model # load_vocab = pipeline_opts.load_vocab model = Model.create_from_file(pipeline_opts.load_model) # Set GPU or CPU. This has to be done before instantiating the optimizer device_id = None if pipeline_opts.gpu_id is not None and pipeline_opts.gpu_id >= 0: device_id = pipeline_opts.gpu_id model.to(device_id) predicter = Predicter(model) test_dataset = build_test_dataset( fieldset=ModelClass.fieldset( wmt18_format=model_opts.__dict__.get("wmt18_format")), load_vocab=load_vocab, **vars(model_opts), ) predictions = predicter.run(test_dataset, batch_size=pipeline_opts.batch_size) save_predicted_probabilities(output_dir, predictions) return predictions
def load_model(model_path): """Load a pretrained model into a `Predicter` object. Args: load_model (str): A path to the saved model file. Throws: Exception: If the path does not exist, or is not a valid model file. """ model_path = Path(model_path) if not model_path.exists(): raise Exception('Path "{}" does not exist!'.format(model_path)) model = Model.create_from_file(model_path) if not model: raise Exception('No model found in "{}"'.format(model_path)) fieldset = model.fieldset() fields = deserialize_fields_from_vocabs(fieldset.fields, model.vocabs) predicter = Predicter(model, fields=fields) return predicter
def from_directory(cls, directory, device_id=None): logger.info('Loading training state from {}'.format(directory)) root_path = Path(directory) model_path = root_path / const.MODEL_FILE model = Model.create_from_file(model_path) if device_id is not None: model.to(device_id) optimizer_path = root_path / const.OPTIMIZER optimizer_dict = load_torch_file(str(optimizer_path)) optimizer = optimizer_class(optimizer_dict['name'])(model.parameters(), lr=0.0) optimizer.load_state_dict(optimizer_dict['state_dict']) trainer = cls(model, optimizer, checkpointer=None) trainer_path = root_path / const.TRAINER state = load_torch_file(str(trainer_path)) trainer.__dict__.update(state) return trainer
def test_get_mask(): target_lengths = torch.LongTensor([1, 2, 3, 4]) source_lengths = torch.LongTensor([4, 3, 2, 1]) target_mask = [ [0, 1, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0], [0, 1, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0], ] source_mask = [ [0, 1, 1, 1, 1, 0], [0, 1, 1, 1, 0, 0], [0, 1, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0], ] source_mask = torch.ByteTensor(source_mask) target_mask = torch.ByteTensor(target_mask) gap_mask = target_mask[:, 1:] target_tags_mask = target_mask[:, 1:-1] source_tags_mask = source_mask[:, 1:-1] source = torch.LongTensor(np.random.randint(4, 100, size=(4, 6))) target = torch.LongTensor(np.random.randint(4, 100, size=(4, 6))) source_tags = torch.LongTensor(np.random.randint(0, 2, size=(4, 4))) target_tags = torch.LongTensor(np.random.randint(0, 2, size=(4, 4))) gap_tags = torch.LongTensor(np.random.randint(0, 2, size=(4, 5))) source = source.masked_fill(1 - source_mask, const.PAD_ID) target = target.masked_fill(1 - target_mask, const.PAD_ID) target_tags = target_tags.masked_fill( 1 - target_tags_mask, const.PAD_TAGS_ID ) source_tags = source_tags.masked_fill( 1 - source_tags_mask, const.PAD_TAGS_ID ) gap_tags = gap_tags.masked_fill(1 - gap_mask, const.PAD_TAGS_ID) source[:, 0] = const.START_ID stop_mask = torch.arange(6).unsqueeze(0).expand_as(source) == ( (source_lengths + 1).unsqueeze(1) ) source = source.masked_fill(stop_mask, const.STOP_ID) target[:, 0] = const.START_ID stop_mask = torch.arange(6).unsqueeze(0).expand_as(target) == ( (target_lengths + 1).unsqueeze(1) ) target = target.masked_fill(stop_mask, const.STOP_ID) batch = SimpleNamespace( **{ const.TARGET: target, const.SOURCE: source, const.TARGET_TAGS: target_tags, const.SOURCE_TAGS: source_tags, const.GAP_TAGS: gap_tags, } ) vocab = Vocabulary(collections.Counter()) vocab.stoi = { const.UNK: const.UNK_ID, const.PAD: const.PAD_ID, const.START: const.START_ID, const.STOP: const.STOP_ID, } tags_vocab = Vocabulary(collections.Counter()) tags_vocab.stoi = {const.PAD: const.PAD_TAGS_ID} model = Model( vocabs={ const.TARGET: vocab, const.SOURCE: vocab, const.TARGET_TAGS: tags_vocab, const.SOURCE_TAGS: tags_vocab, const.GAP_TAGS: tags_vocab, } ) _source_mask = model.get_mask(batch, const.SOURCE) _target_mask = model.get_mask(batch, const.TARGET) _target_tags_mask = model.get_mask(batch, const.TARGET_TAGS) _source_tags_mask = model.get_mask(batch, const.SOURCE_TAGS) _gap_mask = model.get_mask(batch, const.GAP_TAGS) assert (_source_mask == source_mask).all() assert (_target_mask == target_mask).all() assert (_target_tags_mask == target_tags_mask).all() assert (_source_tags_mask == source_tags_mask).all() assert (_gap_mask == gap_mask).all()
def retrieve_trainer(ModelClass, pipeline_options, model_options, vocabs, output_dir, device_id): """ Creates a Trainer object with an associated model. This object encapsulates the logic behind training the model and checkpointing. This method uses the received pipeline options to instantiate a Trainer object with the the requested model and hyperparameters. Args: ModelClass pipeline_options (Namespace): Generic training options resume (bool): Set to true if resuming an existing run. load_model (str): Directory containing model.torch for loading pre-created model. checkpoint_save (bool): Boolean indicating if snapshots should be saved after validation runs. warning: if false, will never save the model. checkpoint_keep_only_best (int): Indicates kiwi to keep the best `n` models. checkpoint_early_stop_patience (int): Stops training if metrics don't improve after `n` validation runs. checkpoint_validation_steps (int): Perform validation every `n` training steps. optimizer (string): The optimizer to be used in training. learning_rate (float): Starting learning rate. learning_rate_decay (float): Factor of learning rate decay. learning_rate_decay_start (int): Start decay after epoch `x`. log_interval (int): Log after `k` batches. model_options (Namespace): Model specific options. vocabs (dict): Vocab dictionary. output_dir (str or Path): Output directory for models and stats concerning training. device_id (int): The gpu id to be used in training. Set to negative to use cpu. Returns: Trainer """ if pipeline_options.resume: return Trainer.resume(local_path=output_dir, device_id=device_id) if pipeline_options.load_model: model = Model.create_from_file(pipeline_options.load_model) else: model = ModelClass.from_options(vocabs=vocabs, opts=model_options) checkpointer = Checkpoint( output_dir, pipeline_options.checkpoint_save, pipeline_options.checkpoint_keep_only_best, pipeline_options.checkpoint_early_stop_patience, pipeline_options.checkpoint_validation_steps, ) if isinstance(model, LinearWordQEClassifier): trainer = LinearWordQETrainer( model, model_options.training_algorithm, model_options.regularization_constant, checkpointer, ) else: # Set GPU or CPU; has to be before instantiating the optimizer model.to(device_id) # Optimizer OptimizerClass = optimizer_class(pipeline_options.optimizer) optimizer = OptimizerClass(model.parameters(), lr=pipeline_options.learning_rate) scheduler = None if 0.0 < pipeline_options.learning_rate_decay < 1.0: scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=pipeline_options.learning_rate_decay, patience=pipeline_options.learning_rate_decay_start, verbose=True, mode="max", ) trainer = Trainer( model, optimizer, checkpointer, log_interval=pipeline_options.log_interval, scheduler=scheduler, ) return trainer