def model_fn(model_configs, mode, dataset, name=None, reuse=None, distributed_mode=False, is_chief=True, verbose=True): """ Creates NMT model for training, evaluation or inference. Args: model_configs: A dictionary of all configurations. mode: A mode. dataset: A `Dataset` object. name: A string, the name of top-level of the variable scope. reuse: Whether to reuse all variables, the parameter passed to `tf.variable_scope()`. verbose: Print model parameters if set True. distributed_mode: Whether training is on distributed mode. is_chief: Whether is the chief worker. Returns: A `EstimatorSpec` object. """ # Create model template function model_str = model_configs["model"] if model_str is None: model_str = "SequenceToSequence" # model_name = name or model_str.split(".")[-1] model_name = get_model_top_scope_name(model_str, name) if verbose: tf.logging.info("Create model: {} for {}".format(model_str, mode)) # create model instance model = eval(model_str)(params=model_configs["model_params"], mode=mode, vocab_source=dataset.vocab_source, vocab_target=dataset.vocab_target, name=model_name, verbose=verbose) # create expert_utils.Parallelism parallelism = Parallelism(mode, reuse=reuse) if mode == ModeKeys.TRAIN: opt = OptimizerWrapper(model_configs["optimizer_params"]) def _build_model(): if verbose: tf.logging.info("Building Model.......") _input_fields = eval(model_str).create_input_fields(mode) _model_output = model.build(_input_fields) if verbose: tf.logging.info("Finish Building Model.......") if mode == ModeKeys.INFER: # model_output is prediction return _input_fields, _model_output elif mode == ModeKeys.EVAL: # model_output = (loss_sum, weight_sum), attention return _input_fields, _model_output[0], _model_output[1] else: # mode == TRAIN # model_output = loss_sum, weight_sum _loss = _model_output[0] / _model_output[1] grads = opt.optimizer.compute_gradients( _loss, colocate_gradients_with_ops=True) return _input_fields, _model_output[0], _model_output[1], \ _loss, grads model_returns = parallelism(_build_model) input_fields = model_returns[0] if mode == ModeKeys.INFER: predictions = model_returns[1] return EstimatorSpec(mode, input_fields=input_fields, predictions=predictions) if mode == ModeKeys.EVAL: loss_op, attention = model_returns[1:] return EstimatorSpec( mode, input_fields=input_fields, loss= loss_op, # a list of tuples [(loss_sum0, weight_sum0), (loss_sum1, weight_sum1), ...] # attentions for force decoding predictions=attention) assert mode == ModeKeys.TRAIN loss_sums = model_returns[1] weight_sums = model_returns[2] loss_per_gpu = model_returns[3] grads = model_returns[4] loss = tf.reduce_sum(loss_sums) / tf.reduce_sum(weight_sums) tf.add_to_collection(Constants.DISPLAY_KEY_COLLECTION_NAME, Constants.TRAIN_LOSS_KEY_NAME) tf.add_to_collection(Constants.DISPLAY_VALUE_COLLECTION_NAME, loss) _add_to_display_collection(input_fields) # build train op train_op = opt.optimize(loss_per_gpu, gradients=grads) # build training hooks hooks = build_hooks(model_configs, distributed_mode=distributed_mode, is_chief=is_chief) from njunmt.training.text_metrics_spec import build_eval_metrics hooks.extend( build_eval_metrics(model_configs, dataset, is_cheif=is_chief, model_name=model_name)) return EstimatorSpec(mode, input_fields=input_fields, loss=loss, train_op=train_op, training_hooks=hooks, training_chief_hooks=None)
def model_fn( model_configs, mode, dataset, name=None, reuse=None, distributed_mode=False, is_chief=True, verbose=True): """ Creates NMT model for training, evaluation or inference. Args: model_configs: A dictionary of all configurations. mode: A mode. dataset: A `Dataset` object. name: A string, the name of top-level of the variable scope. reuse: Whether to reuse all variables, the parameter passed to `tf.variable_scope()`. verbose: Print model parameters if set True. distributed_mode: Whether training is on distributed mode. is_chief: Whether is the chief worker. Returns: A `EstimatorSpec` object. """ # Create model template function model_str = model_configs["model"] if model_str is None: model_str = "SequenceToSequence" # model_name = name or model_str.split(".")[-1] model_name = get_model_top_scope_name(model_str, name) if verbose: tf.logging.info("Create model: {} for {}".format( model_str, mode)) model = eval(model_str)( params=model_configs["model_params"], mode=mode, vocab_source=dataset.vocab_source, vocab_target=dataset.vocab_target, name=model_name, verbose=verbose) input_fields = eval(model_str).create_input_fields(mode) with tf.variable_scope("", reuse=reuse): model_output = model.build(input_fields=input_fields) # training mode if mode == ModeKeys.TRAIN: loss = model_output # Register the training loss in a collection so that hooks can easily fetch them tf.add_to_collection(Constants.DISPLAY_KEY_COLLECTION_NAME, Constants.TRAIN_LOSS_KEY_NAME) tf.add_to_collection(Constants.DISPLAY_VALUE_COLLECTION_NAME, loss) _add_to_display_collection(input_fields) # build train op train_op = optimize(loss, model_configs["optimizer_params"]) # build training hooks hooks = build_hooks(model_configs, distributed_mode=distributed_mode, is_chief=is_chief) from njunmt.training.text_metrics_spec import build_eval_metrics hooks.extend(build_eval_metrics(model_configs, dataset, is_cheif=is_chief, model_name=model_name)) return EstimatorSpec( mode, input_fields=input_fields, loss=loss, train_op=train_op, training_hooks=hooks, training_chief_hooks=None) # evaluation mode if mode == ModeKeys.EVAL: loss = model_output[0] return EstimatorSpec( mode, input_fields=input_fields, loss=loss, # attentions for force decoding predictions=model_output[1]) assert mode == ModeKeys.INFER return EstimatorSpec( mode, input_fields=input_fields, predictions=model_output)
def run(self): """ Trains the model. """ # vocabulary vocab_source = Vocab( filename=self._model_configs["data"]["source_words_vocabulary"], bpe_codes=self._model_configs["data"]["source_bpecodes"], reverse_seq=self._model_configs["train"]["features_r2l"]) vocab_target = Vocab( filename=self._model_configs["data"]["target_words_vocabulary"], bpe_codes=self._model_configs["data"]["target_bpecodes"], reverse_seq=self._model_configs["train"]["labels_r2l"]) eval_dataset = { "vocab_source": vocab_source, "vocab_target": vocab_target, "features_file": self._model_configs["data"]["eval_features_file"], "labels_file": self._model_configs["data"]["eval_labels_file"] } config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True estimator_spec = model_fn(model_configs=self._model_configs, mode=ModeKeys.TRAIN, vocab_source=vocab_source, vocab_target=vocab_target, name=self._model_configs["problem_name"]) train_ops = estimator_spec.train_ops hooks = estimator_spec.training_hooks # build training session sess = tf.train.MonitoredSession( session_creator=tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(), checkpoint_dir=None, master="", config=config), hooks=tuple(hooks) + tuple( build_eval_metrics(self._model_configs, eval_dataset, model_name=estimator_spec.name))) train_text_inputter = ParallelTextInputter( LineReader( data=self._model_configs["data"]["train_features_file"], maximum_length=self._model_configs["train"] ["maximum_features_length"], preprocessing_fn=lambda x: vocab_source.convert_to_idlist(x)), LineReader( data=self._model_configs["data"]["train_labels_file"], maximum_length=self._model_configs["train"] ["maximum_labels_length"], preprocessing_fn=lambda x: vocab_target.convert_to_idlist(x)), vocab_source.pad_id, vocab_target.pad_id, batch_size=self._model_configs["train"]["batch_size"], batch_tokens_size=self._model_configs["train"] ["batch_tokens_size"], shuffle_every_epoch=self._model_configs["train"] ["shuffle_every_epoch"], fill_full_batch=True, bucketing=True) train_data = train_text_inputter.make_feeding_data( input_fields=estimator_spec.input_fields) eidx = [0, 0] update_cycle = [self._model_configs["train"]["update_cycle"], 1] def step_fn(step_context): step_context.session.run(train_ops["zeros_op"]) try: while update_cycle[0] != update_cycle[1]: data = train_data.next() step_context.session.run(train_ops["collect_op"], feed_dict=data["feed_dict"]) update_cycle[1] += 1 data = train_data.next() update_cycle[1] = 1 return step_context.run_with_hooks(train_ops["train_op"], feed_dict=data["feed_dict"]) except StopIteration: eidx[1] += 1 while not sess.should_stop(): if eidx[0] != eidx[1]: tf.logging.info("STARTUP Epoch {}".format(eidx[1])) eidx[0] = eidx[1] sess.run_step_fn(step_fn)
def model_fn( model_configs, mode, dataset, name=None, reuse=None, distributed_mode=False, is_chief=True, verbose=True): """ Creates NMT model for training, evaluation or inference. Args: model_configs: A dictionary of all configurations. mode: A mode. dataset: A `Dataset` object. name: A string, the name of top-level of the variable scope. reuse: Whether to reuse all variables, the parameter passed to `tf.variable_scope()`. verbose: Print model parameters if set True. distributed_mode: Whether training is on distributed mode. is_chief: Whether is the chief worker. Returns: A `EstimatorSpec` object. """ # Create model template function model_str = model_configs["model"] if model_str is None: model_str = "SequenceToSequence" # model_name = name or model_str.split(".")[-1] model_name = get_model_top_scope_name(model_str, name) if verbose: tf.logging.info("Create model: {} for {}".format( model_str, mode)) # create model instance model = eval(model_str)( params=model_configs["model_params"], mode=mode, vocab_source=dataset.vocab_source, vocab_target=dataset.vocab_target, name=model_name, verbose=verbose) # create expert_utils.Parallelism parallelism = Parallelism(mode, reuse=reuse) if mode == ModeKeys.TRAIN: opt = OptimizerWrapper(model_configs["optimizer_params"]) def _build_model(): if verbose: tf.logging.info("Building Model.......") _input_fields = eval(model_str).create_input_fields(mode) _model_output = model.build(_input_fields) if verbose: tf.logging.info("Finish Building Model.......") if mode == ModeKeys.INFER: # model_output is prediction return _input_fields, _model_output elif mode == ModeKeys.EVAL: # model_output = (loss_sum, weight_sum), attention return _input_fields, _model_output[0], _model_output[1] else: # mode == TRAIN # model_output = loss_sum, weight_sum _loss = _model_output[0] / _model_output[1] grads = opt.optimizer.compute_gradients( _loss, var_list=tf.trainable_variables(), colocate_gradients_with_ops=True) return _input_fields, _loss, grads model_returns = parallelism(_build_model) input_fields = model_returns[0] if mode == ModeKeys.INFER: predictions = model_returns[1] return EstimatorSpec( mode, input_fields=input_fields, predictions=predictions) if mode == ModeKeys.EVAL: loss_op, attention = model_returns[1:] return EstimatorSpec( mode, input_fields=input_fields, loss=loss_op, # a list of tuples [(loss_sum0, weight_sum0), (loss_sum1, weight_sum1), ...] # attentions for force decoding predictions=attention) assert mode == ModeKeys.TRAIN loss_per_dp, grads = model_returns[1:] _add_to_display_collection(input_fields) # build train op train_loss, train_ops = opt.optimize(loss_per_dp, grads, update_cycle=model_configs["train"]["update_cycle"]) tf.add_to_collection(Constants.DISPLAY_KEY_COLLECTION_NAME, Constants.TRAIN_LOSS_KEY_NAME) tf.add_to_collection(Constants.DISPLAY_VALUE_COLLECTION_NAME, train_loss) # build training hooks hooks = build_hooks(model_configs, distributed_mode=distributed_mode, is_chief=is_chief) from njunmt.training.text_metrics_spec import build_eval_metrics hooks.extend(build_eval_metrics(model_configs, dataset, is_cheif=is_chief, model_name=model_name)) return EstimatorSpec( mode, input_fields=input_fields, loss=train_loss, train_ops=train_ops, training_hooks=hooks, training_chief_hooks=None)