예제 #1
0
    def __init__(self,
                 model,
                 src_file=None,
                 trg_file=None,
                 dev_every=0,
                 batcher=bare(SrcBatcher, batch_size=32),
                 loss_calculator=None,
                 run_for_epochs=None,
                 lr_decay=1.0,
                 lr_decay_times=3,
                 patience=1,
                 initial_patience=None,
                 dev_tasks=None,
                 restart_trainer=False,
                 reload_command=None,
                 name=None,
                 sample_train_sents=None,
                 max_num_train_sents=None,
                 max_src_len=None,
                 max_trg_len=None,
                 exp_global=Ref(Path("exp_global"))):
        """
    Args:
      exp_global:
      model: a generator.GeneratorModel object
      src_file: The file for the source data.
      trg_file: The file for the target data.
      dev_every (int): dev checkpoints every n sentences (0 for only after epoch)
      batcher: Type of batcher
      loss_calculator:
      lr_decay (float):
      lr_decay_times (int):  Early stopping after decaying learning rate a certain number of times
      patience (int): apply LR decay after dev scores haven't improved over this many checkpoints
      initial_patience (int): if given, allows adjusting patience for the first LR decay
      dev_tasks: A list of tasks to run on the development set
      restart_trainer: Restart trainer (useful for Adam) and revert weights to best dev checkpoint when applying LR decay (https://arxiv.org/pdf/1706.09733.pdf)
      reload_command: Command to change the input data after each epoch.
                           --epoch EPOCH_NUM will be appended to the command.
                           To just reload the data after each epoch set the command to 'true'.
      sample_train_sents:
      max_num_train_sents:
      max_src_len:
      max_trg_len:
      name: will be prepended to log outputs if given
    """
        self.exp_global = exp_global
        self.model_file = self.exp_global.dynet_param_collection.model_file
        self.src_file = src_file
        self.trg_file = trg_file
        self.dev_tasks = dev_tasks

        if lr_decay > 1.0 or lr_decay <= 0.0:
            raise RuntimeError(
                "illegal lr_decay, must satisfy: 0.0 < lr_decay <= 1.0")
        self.lr_decay = lr_decay
        self.patience = patience
        self.initial_patience = initial_patience
        self.lr_decay_times = lr_decay_times
        self.restart_trainer = restart_trainer
        self.run_for_epochs = run_for_epochs

        self.early_stopping_reached = False
        # training state
        self.training_state = TrainingState()

        self.reload_command = reload_command

        self.model = model
        self.loss_calculator = loss_calculator or LossCalculator(MLELoss())

        self.sample_train_sents = sample_train_sents
        self.max_num_train_sents = max_num_train_sents
        self.max_src_len = max_src_len
        self.max_trg_len = max_trg_len

        self.batcher = batcher
        self.logger = BatchLossTracker(self, dev_every, name)
예제 #2
0
 def __init__(self,
              exp_global=Ref(Path("exp_global")),
              layers=1,
              input_dim=None,
              hidden_dim=None,
              dropout=None,
              weightnoise_std=None,
              param_init=None,
              bias_init=None):
     register_handler(self)
     self.num_layers = layers
     input_dim = input_dim or exp_global.default_layer_dim
     hidden_dim = hidden_dim or exp_global.default_layer_dim
     self.hidden_dim = hidden_dim
     self.dropout_rate = dropout or exp_global.dropout
     self.weightnoise_std = weightnoise_std or exp_global.weight_noise
     assert hidden_dim % 2 == 0
     param_init = param_init or exp_global.param_init
     bias_init = bias_init or exp_global.bias_init
     self.forward_layers = [
         UniLSTMSeqTransducer(exp_global=exp_global,
                              input_dim=input_dim,
                              hidden_dim=hidden_dim / 2,
                              dropout=dropout,
                              weightnoise_std=weightnoise_std,
                              param_init=param_init[0] if isinstance(
                                  param_init, Sequence) else param_init,
                              bias_init=bias_init[0] if isinstance(
                                  bias_init, Sequence) else bias_init)
     ]
     self.backward_layers = [
         UniLSTMSeqTransducer(exp_global=exp_global,
                              input_dim=input_dim,
                              hidden_dim=hidden_dim / 2,
                              dropout=dropout,
                              weightnoise_std=weightnoise_std,
                              param_init=param_init[0] if isinstance(
                                  param_init, Sequence) else param_init,
                              bias_init=bias_init[0] if isinstance(
                                  bias_init, Sequence) else bias_init)
     ]
     self.forward_layers += [
         UniLSTMSeqTransducer(exp_global=exp_global,
                              input_dim=hidden_dim,
                              hidden_dim=hidden_dim / 2,
                              dropout=dropout,
                              weightnoise_std=weightnoise_std,
                              param_init=param_init[i] if isinstance(
                                  param_init, Sequence) else param_init,
                              bias_init=bias_init[i] if isinstance(
                                  bias_init, Sequence) else bias_init)
         for i in range(1, layers)
     ]
     self.backward_layers += [
         UniLSTMSeqTransducer(exp_global=exp_global,
                              input_dim=hidden_dim,
                              hidden_dim=hidden_dim / 2,
                              dropout=dropout,
                              weightnoise_std=weightnoise_std,
                              param_init=param_init[i] if isinstance(
                                  param_init, Sequence) else param_init,
                              bias_init=bias_init[i] if isinstance(
                                  bias_init, Sequence) else bias_init)
         for i in range(1, layers)
     ]
예제 #3
0
 def __init__(self,
              tasks,
              trainer=None,
              exp_global=Ref(Path("exp_global"))):
     super().__init__(exp_global=exp_global, tasks=tasks, trainer=trainer)
     self.exp_global = exp_global
예제 #4
0
 def __init__(self, exp_global=Ref(Path("exp_global")),
              ## COMPONENTS
              embed_encoder=None, segment_composer=None, final_transducer=None,
              ## OPTIONS
              length_prior=3.3,
              length_prior_alpha=None, # GeometricSequence
              epsilon_greedy=None,     # GeometricSequence
              reinforce_scale=None,    # GeometricSequence
              confidence_penalty=None, # SegmentationConfidencePenalty
              # For segmentation warmup (Always use the poisson prior)
              segmentation_warmup=0,
              ## FLAGS
              learn_delete       = False,
              use_baseline       = True,
              z_normalization    = True,
              learn_segmentation = True,
              compose_char       = False,
              log_reward         = True,
              debug=False,
              print_sample=False):
   register_handler(self)
   model = exp_global.dynet_param_collection.param_col
   # Sanity check
   assert embed_encoder is not None
   assert segment_composer is not None
   assert final_transducer is not None
   # The Embed Encoder transduces the embedding vectors to a sequence of vector
   self.embed_encoder = embed_encoder
   if not hasattr(embed_encoder, "hidden_dim"):
     embed_encoder_dim = yaml_context.default_layer_dim
   else:
     embed_encoder_dim = embed_encoder.hidden_dim
   # The Segment transducer produced word embeddings based on sequence of character embeddings
   self.segment_composer = segment_composer
   # The final transducer
   self.final_transducer = final_transducer
   # Decision layer of segmentation
   self.segment_transform = linear.Linear(input_dim  = embed_encoder_dim,
                                          output_dim = 3 if learn_delete else 2,
                                          model=model)
   # The baseline linear regression model
   self.baseline = linear.Linear(input_dim = embed_encoder_dim,
                                 output_dim = 1,
                                 model = model)
   # Flags
   self.use_baseline = use_baseline
   self.learn_segmentation = learn_segmentation
   self.learn_delete = learn_delete
   self.z_normalization = z_normalization
   self.debug = debug
   self.compose_char = compose_char
   self.print_sample = print_sample
   self.log_reward = log_reward
   # Fixed Parameters
   self.length_prior = length_prior
   self.segmentation_warmup = segmentation_warmup
   # Variable Parameters
   self.length_prior_alpha = length_prior_alpha
   self.lmbd = reinforce_scale
   self.eps = epsilon_greedy
   self.confidence_penalty = confidence_penalty
   # States of the object
   self.train = False