def __init__( self, config: Config, model: SILTransformer, src_spp: Optional[sp.SentencePieceProcessor], trg_spp: Optional[sp.SentencePieceProcessor], step: int, checkpoint_path: Path, type: str = None, ): self.types: List[str] = [] if type is not None: self.types.append(type) self.step = step # Configuration priority: user config > auto config > default config. new_config = copy.deepcopy(_CONFIG_FALLBACK) merge_dict(new_config, model.auto_config()) merge_dict(new_config, config.root) new_config["params"].setdefault("num_hypotheses", new_config["infer"].get("n_best", 1)) new_config["params"].setdefault( "average_loss_in_time", new_config["train"]["batch_type"] == "tokens") new_config["infer"]["n_best"] = 1 self.config = new_config self.src_spp = src_spp self.trg_spp = trg_spp self.model: SILTransformer = clone_layer(model) self.model.initialize(self.config["data"], params=self.config["params"]) self._analyze_fn: Optional[Function] = None self.checkpoint_path = checkpoint_path self.checkpoint: Checkpoint = None
def _finalize_config(self, training=False, num_devices=1): # Configuration priority: user config > auto config > default config. config = copy.deepcopy(_CONFIG_FALLBACK) if self._auto_config: model_config = self._model.auto_config(num_replicas=num_devices) if not model_config: raise NotImplementedError("This model does not define any automatic configuration values") misc.merge_dict(config, model_config) misc.merge_dict(config, self._config) config["params"].setdefault("num_hypotheses", config["infer"].get("n_best", 1)) config["params"].setdefault("average_loss_in_time", config["train"]["batch_type"] == "tokens") if training: train_config = config["train"] batch_size = train_config.get("batch_size") # Auto tune batch size. if batch_size is None or batch_size == 0: if train_config["batch_type"] == "examples": raise ValueError("Batch size autotuning is only supported for the \"tokens\" batch type") max_batch_size = 16384 if train_config.get("effective_batch_size") is not None: max_batch_size = min(max_batch_size, train_config["effective_batch_size"]) train_config["batch_size"] = _auto_tune_batch_size( config, max_batch_size=max_batch_size, num_devices=num_devices) tf.get_logger().info( "Using parameters:\n%s", yaml.dump(config, indent=2, default_flow_style=False)) return config
def auto_config(self, num_devices=1): config = super(Transformer, self).auto_config(num_devices=num_devices) return merge_dict( config, { "params": { "average_loss_in_time": True, "label_smoothing": 0.1, "optimizer": "LazyAdamOptimizer", "optimizer_params": { "beta1": 0.9, "beta2": 0.998 }, "learning_rate": 2.0, "gradients_accum": max(1, 8 // num_devices), "decay_type": "noam_decay_v2", "decay_params": { "model_dim": self._num_units, "warmup_steps": 8000 } }, "train": { "batch_size": 3072, "batch_type": "tokens", "maximum_features_length": 100, "maximum_labels_length": 100, "keep_checkpoint_max": 8, "average_last_checkpoints": 8 } })
def auto_config(self, num_replicas=1): config = super(Transformer, self).auto_config(num_replicas=num_replicas) return merge_dict( config, { "params": { "average_loss_in_time": True, "label_smoothing": 0.1, "optimizer": "LazyAdam", "optimizer_params": { "beta_1": 0.9, "beta_2": 0.998 }, "learning_rate": 2.0, "decay_type": "NoamDecay", "decay_params": { "model_dim": self._num_units, "warmup_steps": 8000 } }, "train": { "effective_batch_size": 25000, "batch_size": 3072, "batch_type": "tokens", "maximum_features_length": 100, "maximum_labels_length": 100, "keep_checkpoint_max": 8, "average_last_checkpoints": 8 } })
def auto_config(self, num_replicas=1): config = super(LanguageModel, self).auto_config(num_replicas=num_replicas) return misc.merge_dict(config, { "infer": { "length_bucket_width": 1 # To ensure fixed length in each batch. } })
def auto_config(self, num_devices=1): config = super(DualSourceTransformer, self).auto_config(num_devices=num_devices) max_length = config["train"]["maximum_features_length"] return misc.merge_dict(config, { "train": { "maximum_features_length": [max_length, max_length] } })
def auto_config(self, num_replicas=1): config = super().auto_config(num_replicas=num_replicas) max_length = config["train"]["maximum_features_length"] return misc.merge_dict( config, {"train": { "maximum_features_length": [max_length, max_length] }})
def auto_config(self, num_replicas=1): config = super(LstmCnnCrfTagger, self).auto_config(num_replicas=num_replicas) return merge_dict(config, { "params": { "optimizer": "Adam", "learning_rate": 0.001 }, "train": { "batch_size": 32 } })
def load_config(config_paths, config=None): """Loads configuration files. Args: config_paths: A list of configuration files. config: A (possibly non empty) config dictionary to fill. Returns: The configuration dictionary. """ if config is None: config = {} for config_path in config_paths: with compat.gfile_open(config_path, mode="rb") as config_file: subconfig = yaml.load(config_file.read(), Loader=yaml.UnsafeLoader) # Add or update section in main configuration. merge_dict(config, subconfig) return config
def auto_config(self, num_devices=1): config = super(SeqTagger, self).auto_config(num_devices=num_devices) return merge_dict(config, { "params": { "optimizer": "AdamOptimizer", "learning_rate": 0.001 }, "train": { "batch_size": 32 } })
def load_config(config_paths, config=None): """Loads YAML configuration files. Args: config_paths: A list of configuration files that will be merged to a single configuration. The rightmost configuration takes priority. config: A (possibly non empty) config dictionary to fill. Returns: The configuration as Python dictionary. """ if config is None: config = {} for config_path in config_paths: with tf.io.gfile.GFile(config_path, mode="rb") as config_file: subconfig = yaml.load(config_file.read(), Loader=yaml.UnsafeLoader) # Add or update section in main configuration. merge_dict(config, subconfig) return config
def auto_config(self, num_replicas=1): config = super(_RNNBase, self).auto_config(num_replicas=num_replicas) return merge_dict(config, { "params": { "optimizer": "Adam", "learning_rate": 0.0002 }, "train": { "batch_size": 64, "maximum_features_length": 80, "maximum_labels_length": 80 } })
def auto_config(self, num_devices=1): config = super(SequenceToSequence, self).auto_config(num_devices=num_devices) return merge_dict( config, { "params": { "beam_width": 4, "length_penalty": 0.6 }, "train": { "sample_buffer_size": -1, "train_steps": 500000 } })
def _getTransliterationRunner(self, base_config=None, model_version="v2"): model_dir = os.path.join(self.get_temp_dir(), "model") shutil.copytree(os.path.join(test_data, "transliteration-aren-v2", model_version), model_dir) config = {} config["model_dir"] = model_dir config["data"] = { "source_vocabulary": os.path.join(model_dir, "ar.vocab"), "target_vocabulary": os.path.join(model_dir, "en.vocab"), } if base_config is not None: config = misc.merge_dict(config, base_config) model = load_model(model_dir) runner = Runner(model, config) return runner
def auto_config(self, num_replicas=1): config = super().auto_config(num_replicas=num_replicas) return misc.merge_dict( config, { "params": { "optimizer": "Adam", "learning_rate": 0.001, }, "train": { "batch_size": 32, }, }, )
def auto_config(self, num_devices=1): config = super(SequenceToSequence, self).auto_config(num_devices=num_devices) return merge_dict(config, { "params": { "beam_width": 4 }, "train": { "sample_buffer_size": -1, "train_steps": 500000 }, "infer": { "batch_size": 32, "bucket_width": 5 } })
def auto_config(self, num_devices=1): config = super(_RNNBase, self).auto_config(num_devices=num_devices) return merge_dict(config, { "params": { "optimizer": "AdamOptimizer", "learning_rate": 0.0002, "param_init": 0.1, "clip_gradients": 5.0 }, "train": { "batch_size": 64, "maximum_features_length": 80, "maximum_labels_length": 80 } })
def auto_config(self, num_replicas=1): config = super(ListenAttendSpell, self).auto_config(num_replicas=num_replicas) return merge_dict(config, { "params": { "optimizer": "SGD", "learning_rate": 0.2, "scheduled_sampling_type": "constant", "scheduled_sampling_read_probability": 0.9 }, "train": { "batch_size": 32, "length_bucket_width": 15, "maximum_features_length": 2450, "maximum_labels_length": 330 } })
def auto_config(self, num_replicas=1): config = super(SequenceToSequence, self).auto_config(num_replicas=num_replicas) return misc.merge_dict( config, { "params": { "beam_width": 4 }, "train": { "sample_buffer_size": -1, "max_step": 500000 }, "infer": { "batch_size": 32, "length_bucket_width": 5 } })
def auto_config(self, num_replicas=1): config = super(GPT2Small, self).auto_config(num_replicas=num_replicas) return merge_dict(config, { "params": { "average_loss_in_time": True, "optimizer": "Adam", "learning_rate": 2.5e-4, "decay_type": "CosineAnnealing", "decay_params": { "max_step": 1000000, "warmup_steps": 2000, } }, "train": { # Below options are from GPT-1. "batch_size": 64, "maximum_features_length": 512 } })
def auto_config(self, num_devices=1): config = super(GPT2Small, self).auto_config(num_devices=num_devices) return misc.merge_dict( config, { "params": { "average_loss_in_time": True, "optimizer": "AdamOptimizer", "weight_decay": 0.01, "learning_rate": 2.5e-4, "decay_type": "cosine_annealing", "decay_params": { "max_step": 1000000, "warmup_steps": 2000, } }, "train": { "bucket_width": 1, # Below options are from GPT-1. "batch_size": 64, "maximum_features_length": 512 } })
def auto_config(self, num_replicas=1): config = super().auto_config(num_replicas=num_replicas) return misc.merge_dict( config, { "params": { "beam_width": 4, }, "train": { "sample_buffer_size": -1, "max_step": 500000, }, "eval": { "length_bucket_width": 5, }, "score": { "length_bucket_width": 5, }, "infer": { "batch_size": 32, "length_bucket_width": 5, }, }, )
def __init__(self, model, config, seed=None, num_devices=1, gpu_allow_growth=False, session_config=None, auto_config=False, hvd=None): """Initializes the runner parameters. Args: model: A :class:`opennmt.models.model.Model` instance to run. config: The run configuration. seed: The random seed to set. num_devices: The number of devices (GPUs) to use for training. gpu_allow_growth: Allow GPU memory to grow dynamically. session_config: ``tf.ConfigProto`` overrides. auto_config: If ``True``, use automatic configuration values defined by :obj:`model`. hvd: Optional Horovod object. Raises: NotImplementedError: If :obj:`auto_config` is ``True`` but :obj:`model` does not define any automatic configuration values. """ self._model = model self._num_devices = num_devices self._num_replicas = hvd.size() if hvd is not None else num_devices self._seed = seed self._hvd = hvd # Configuration priority: user config > auto config > default config. self._config = copy.deepcopy(_CONFIG_FALLBACK) if auto_config: model_config = self._model.auto_config( num_devices=self._num_replicas) if not model_config: raise NotImplementedError( "This model does not define any automatic configuration values" ) misc.merge_dict(self._config, model_config) misc.merge_dict(self._config, config) self._model.initialize(self._config["data"]) tf.logging.info( "Using parameters:\n%s", yaml.dump(self._config, indent=2, default_flow_style=False)) session_config_base = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, gpu_options=tf.GPUOptions(allow_growth=gpu_allow_growth)) if self._hvd is not None: session_config_base.gpu_options.visible_device_list = str( self._hvd.local_rank()) # Disable layout optimizer for better conv1d performance, see: # https://github.com/tensorflow/tensorflow/issues/20309 # This field does not exist in TensorFlow 1.4, so guard against the # exception. try: rewrite_options = text_format.Parse( """ graph_options { rewrite_options { layout_optimizer: OFF } } """, tf.ConfigProto()) session_config_base.MergeFrom(rewrite_options) except text_format.ParseError: pass if session_config is not None: session_config_base.MergeFrom(session_config) self._session_config = session_config_base np.random.seed(seed) random.seed(seed)
def __init__(self, model, config, seed=None, num_devices=1, gpu_allow_growth=False, session_config=None, auto_config=False): """Initializes the runner parameters. Args: model: A :class:`opennmt.models.model.Model` instance to run. config: The run configuration. seed: The random seed to set. num_devices: The number of devices (GPUs) to use for training. gpu_allow_growth: Allow GPU memory to grow dynamically. session_config: ``tf.ConfigProto`` overrides. auto_config: If ``True``, use automatic configuration values defined by :obj:`model`. Raises: NotImplementedError: If :obj:`auto_config` is ``True`` but :obj:`model` does not define any automatic configuration values. """ self._model = model self._num_devices = num_devices # Configuration priority: user config > auto config > default config. self._config = copy.deepcopy(_CONFIG_FALLBACK) if auto_config: model_config = self._model.auto_config(num_devices=num_devices) if not model_config: raise NotImplementedError("This model does not define any automatic configuration values") misc.merge_dict(self._config, model_config) misc.merge_dict(self._config, config) tf.logging.info( "Using parameters: %s", json.dumps(self._config, indent=2, sort_keys=True)) session_config_base = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, gpu_options=tf.GPUOptions( allow_growth=gpu_allow_growth)) # Disable layout optimizer for better conv1d performance, see: # https://github.com/tensorflow/tensorflow/issues/20309 # This field does not exist in TensorFlow 1.4, so guard against the # exception. try: rewrite_options = text_format.Parse(""" graph_options { rewrite_options { layout_optimizer: OFF } } """, tf.ConfigProto()) session_config_base.MergeFrom(rewrite_options) except text_format.ParseError: pass if session_config is not None: session_config_base.MergeFrom(session_config) session_config = session_config_base run_config = tf.estimator.RunConfig( model_dir=self._config["model_dir"], session_config=session_config, tf_random_seed=seed) # Create a first session to enforce GPU options. # See https://github.com/OpenNMT/OpenNMT-tf/issues/80. _ = tf.Session(config=session_config) np.random.seed(seed) random.seed(seed) if "train" in self._config: if "save_summary_steps" in self._config["train"]: run_config = run_config.replace( save_summary_steps=self._config["train"]["save_summary_steps"], log_step_count_steps=self._config["train"]["save_summary_steps"]) if "save_checkpoints_steps" in self._config["train"]: run_config = run_config.replace( save_checkpoints_secs=None, save_checkpoints_steps=self._config["train"]["save_checkpoints_steps"]) if "keep_checkpoint_max" in self._config["train"]: run_config = run_config.replace( keep_checkpoint_max=self._config["train"]["keep_checkpoint_max"]) self._estimator = tf.estimator.Estimator( self._model.model_fn( num_devices=self._num_devices, eval_prediction_hooks_fn=self._make_eval_prediction_hooks_fn()), config=run_config, params=self._config["params"])