def __init__(self, model_file:str=settings.DEFAULT_MOD_PATH, log_file:str=settings.DEFAULT_LOG_PATH, dropout:float = 0.3, weight_noise:float = 0.0, default_layer_dim:int = 512, param_init:ParamInitializer=bare(GlorotInitializer), bias_init:ParamInitializer=bare(ZeroInitializer), save_num_checkpoints:int=1, eval_only:bool = False, commandline_args = None, dynet_param_collection:Optional['ParamCollection'] = None, placeholders:Dict[str,str]={}): self.model_file = model_file self.log_file = log_file self.dropout = dropout self.weight_noise = weight_noise self.default_layer_dim = default_layer_dim self.param_init = param_init self.bias_init = bias_init self.model_file = None self.eval_only = eval_only self.dynet_param_collection = dynet_param_collection or PersistentParamCollection(model_file, save_num_checkpoints) self.commandline_args = commandline_args self.placeholders = placeholders
def __init__(self, exp_global=bare(ExpGlobal), load=None, overwrite=None, preproc=None, model=None, train=None, evaluate=None, random_search_report=None): """ This is called after all other components have been initialized, so we can safely load DyNet weights here. """ self.exp_global = exp_global self.load = load self.overwrite = overwrite self.preproc = preproc self.model = model self.train = train self.evaluate = evaluate if load: exp_global.dynet_param_collection.load_from_data_file( f"{load}.data") logger.info(f"> populated DyNet weights from {load}.data") if random_search_report: logger.info( f"> instantiated random parameter search: {random_search_report}" )
def __init__(self, model=Ref(path=Path("model")), src_file=None, trg_file=None, dev_every=0, batcher=bare(xnmt.batcher.SrcBatcher, batch_size=32), loss_calculator=None, trainer=None, run_for_epochs=None, lr_decay=1.0, lr_decay_times=3, patience=1, initial_patience=None, dev_tasks=None, restart_trainer:bool=False, reload_command=None, name=None, sample_train_sents=None, max_num_train_sents=None, max_src_len=None, max_trg_len=None, exp_global=Ref(Path("exp_global"))): super().__init__(model=model, src_file=src_file, trg_file=trg_file, dev_every=dev_every, batcher=batcher, loss_calculator=loss_calculator, run_for_epochs=run_for_epochs, lr_decay=lr_decay, lr_decay_times=lr_decay_times, patience=patience, initial_patience=initial_patience, dev_tasks=dev_tasks, restart_trainer=restart_trainer, reload_command=reload_command, name=name, sample_train_sents=sample_train_sents, max_num_train_sents=max_num_train_sents, max_src_len=max_src_len, max_trg_len=max_trg_len, exp_global=exp_global) self.trainer = trainer or xnmt.optimizer.SimpleSGDTrainer(exp_global=self.exp_global, e0=0.1) self.dynet_profiling = getattr(exp_global.commandline_args, "dynet_profiling", 0)
def __init__(self, exp_global=bare(ExpGlobal), load: Optional[str] = None, overwrite: Optional[str] = None, preproc: PreprocRunner = None, model: Optional[GeneratorModel] = None, train: TrainingRegimen = None, evaluate: Optional[List[EvalTask]] = None, random_search_report: Optional[dict] = None) -> None: """ This is called after all other components have been initialized, so we can safely load DyNet weights here. """ self.exp_global = exp_global self.load = load self.overwrite = overwrite self.preproc = preproc self.model = model self.train = train self.evaluate = evaluate if load: exp_global.dynet_param_collection.load_from_data_file( f"{load}.data") logger.info(f"> populated DyNet weights from {load}.data") if random_search_report: logger.info( f"> instantiated random parameter search: {random_search_report}" )
def __init__(self, exp_global=Ref(Path("exp_global")), layers=1, input_dim=None, lstm_dim=None, mlp_hidden_dim=None, trg_embed_dim=None, dropout=None, rnn_spec="lstm", residual_to_output=False, input_feeding=True, bridge=bare(CopyBridge), label_smoothing=0.0, vocab_projector=None, vocab_size=None, vocab=None, trg_reader=Ref(path=Path("model.trg_reader"), required=False)): register_handler(self) self.param_col = exp_global.dynet_param_collection.param_col # Define dim lstm_dim = lstm_dim or exp_global.default_layer_dim self.mlp_hidden_dim = mlp_hidden_dim = mlp_hidden_dim or exp_global.default_layer_dim trg_embed_dim = trg_embed_dim or exp_global.default_layer_dim input_dim = input_dim or exp_global.default_layer_dim self.input_dim = input_dim self.label_smoothing = label_smoothing # Input feeding self.input_feeding = input_feeding self.lstm_dim = lstm_dim lstm_input = trg_embed_dim if input_feeding: lstm_input += input_dim # Bridge self.lstm_layers = layers self.bridge = bridge # LSTM self.fwd_lstm = RnnDecoder.rnn_from_spec( spec=rnn_spec, num_layers=layers, input_dim=lstm_input, hidden_dim=lstm_dim, model=self.param_col, residual_to_output=residual_to_output) # MLP self.context_projector = xnmt.linear.Linear(input_dim=input_dim + lstm_dim, output_dim=mlp_hidden_dim, model=self.param_col) self.vocab_size = self.choose_vocab_size(vocab_size, vocab, trg_reader) self.vocab_projector = vocab_projector or xnmt.linear.Linear( input_dim=self.mlp_hidden_dim, output_dim=self.vocab_size, model=self.param_col) # Dropout self.dropout = dropout or exp_global.dropout
def __init__(self, src_reader, trg_reader, src_embedder=bare(SimpleWordEmbedder), encoder=bare(BiLSTMSeqTransducer), attender=bare(MlpAttender), trg_embedder=bare(SimpleWordEmbedder), decoder=bare(MlpSoftmaxDecoder), inference=bare(SimpleInference), calc_global_fertility=False, calc_attention_entropy=False): '''Constructor. :param src_reader: A reader for the source side. :param src_embedder: A word embedder for the input language :param encoder: An encoder to generate encoded inputs :param attender: An attention module :param trg_reader: A reader for the target side. :param trg_embedder: A word embedder for the output language :param decoder: A decoder :param inference: The default inference strategy used for this model ''' register_handler(self) self.src_reader = src_reader self.trg_reader = trg_reader self.src_embedder = src_embedder self.encoder = encoder self.attender = attender self.trg_embedder = trg_embedder self.decoder = decoder self.calc_global_fertility = calc_global_fertility self.calc_attention_entropy = calc_attention_entropy self.inference = inference
def __init__(self, exp_global=bare(ExpGlobal), load=None, overwrite=None, preproc=None, model=None, train=None, evaluate=None, random_search_report=None): self.exp_global = exp_global self.load = load self.overwrite = overwrite self.preproc = preproc self.model = model self.train = train self.evaluate = evaluate if load: exp_global.dynet_param_collection.load_from_data_file(f"{load}.data") logger.info(f"> populated DyNet weights from {load}.data") if random_search_report: logger.info(f"> instantiated random parameter search: {random_search_report}")
def __init__(self, model_file=settings.DEFAULT_MOD_PATH, log_file=settings.DEFAULT_LOG_PATH, dropout=0.3, weight_noise=0.0, default_layer_dim=512, param_init=bare(GlorotInitializer), bias_init=bare(ZeroInitializer), save_num_checkpoints=1, eval_only=False, commandline_args=None, dynet_param_collection=None): self.model_file = model_file self.log_file = log_file self.dropout = dropout self.weight_noise = weight_noise self.default_layer_dim = default_layer_dim self.param_init = param_init self.bias_init = bias_init self.model_file = None self.eval_only = eval_only self.dynet_param_collection = dynet_param_collection or PersistentParamCollection( model_file, save_num_checkpoints) self.commandline_args = commandline_args
def __init__(self, src_reader, trg_reader, src_embedder=bare(SimpleWordEmbedder), encoder=bare(BiLSTMSeqTransducer), attender=bare(MlpAttender), trg_embedder=bare(SimpleWordEmbedder), decoder=bare(MlpSoftmaxDecoder), inference=bare(SimpleInference), calc_global_fertility=False, calc_attention_entropy=False): register_handler(self) self.src_reader = src_reader self.trg_reader = trg_reader self.src_embedder = src_embedder self.encoder = encoder self.attender = attender self.trg_embedder = trg_embedder self.decoder = decoder self.calc_global_fertility = calc_global_fertility self.calc_attention_entropy = calc_attention_entropy self.inference = inference
def __init__(self, beam_size, max_len=100, len_norm=bare(NoNormalization)): self.beam_size = beam_size self.max_len = max_len self.len_norm = len_norm self.entrs = []
def __init__(self, exp_global=Ref(Path("exp_global")), layers=1, input_dim=None, lstm_dim=None, mlp_hidden_dim=None, trg_embed_dim=None, dropout=None, rnn_spec="lstm", residual_to_output=False, input_feeding=True, param_init_lstm=None, param_init_context=None, bias_init_context=None, param_init_output=None, bias_init_output=None, bridge=bare(CopyBridge), label_smoothing=0.0, vocab_projector=None, vocab_size=None, vocab=None, trg_reader=Ref(path=Path("model.trg_reader"), required=False)): register_handler(self) self.param_col = exp_global.dynet_param_collection.param_col # Define dim lstm_dim = lstm_dim or exp_global.default_layer_dim self.mlp_hidden_dim = mlp_hidden_dim = mlp_hidden_dim or exp_global.default_layer_dim trg_embed_dim = trg_embed_dim or exp_global.default_layer_dim input_dim = input_dim or exp_global.default_layer_dim self.input_dim = input_dim self.label_smoothing = label_smoothing # Input feeding self.input_feeding = input_feeding self.lstm_dim = lstm_dim lstm_input = trg_embed_dim if input_feeding: lstm_input += input_dim # Bridge self.lstm_layers = layers self.bridge = bridge # LSTM self.fwd_lstm = RnnDecoder.rnn_from_spec( spec=rnn_spec, num_layers=layers, input_dim=lstm_input, hidden_dim=lstm_dim, model=self.param_col, residual_to_output=residual_to_output) param_init_lstm = param_init_lstm or exp_global.param_init if not isinstance(param_init_lstm, GlorotInitializer): raise NotImplementedError( "For the decoder LSTM, only Glorot initialization is currently supported" ) if getattr(param_init_lstm, "gain", 1.0) != 1.0: for l in range(layers): for i in [0, 1]: self.fwd_lstm.param_collection().parameters_list()[ 3 * l + i].scale(param_init_lstm.gain) # MLP self.context_projector = xnmt.linear.Linear( input_dim=input_dim + lstm_dim, output_dim=mlp_hidden_dim, model=self.param_col, param_init=param_init_context or exp_global.param_init, bias_init=bias_init_context or exp_global.bias_init) self.vocab_size = self.choose_vocab_size(vocab_size, vocab, trg_reader) self.vocab_projector = vocab_projector or xnmt.linear.Linear( input_dim=self.mlp_hidden_dim, output_dim=self.vocab_size, model=self.param_col, param_init=param_init_output or exp_global.param_init, bias_init=bias_init_output or exp_global.bias_init) # Dropout self.dropout = dropout or exp_global.dropout
def __init__(self, model, src_file=None, trg_file=None, dev_every=0, batcher=bare(SrcBatcher, batch_size=32), loss_calculator=None, run_for_epochs=None, lr_decay=1.0, lr_decay_times=3, patience=1, initial_patience=None, dev_tasks=None, restart_trainer=False, reload_command=None, name=None, sample_train_sents=None, max_num_train_sents=None, max_src_len=None, max_trg_len=None, exp_global=Ref(Path("exp_global"))): """ Args: exp_global: model: a generator.GeneratorModel object src_file: The file for the source data. trg_file: The file for the target data. dev_every (int): dev checkpoints every n sentences (0 for only after epoch) batcher: Type of batcher loss_calculator: lr_decay (float): lr_decay_times (int): Early stopping after decaying learning rate a certain number of times patience (int): apply LR decay after dev scores haven't improved over this many checkpoints initial_patience (int): if given, allows adjusting patience for the first LR decay dev_tasks: A list of tasks to run on the development set restart_trainer: Restart trainer (useful for Adam) and revert weights to best dev checkpoint when applying LR decay (https://arxiv.org/pdf/1706.09733.pdf) reload_command: Command to change the input data after each epoch. --epoch EPOCH_NUM will be appended to the command. To just reload the data after each epoch set the command to 'true'. sample_train_sents: max_num_train_sents: max_src_len: max_trg_len: name: will be prepended to log outputs if given """ self.exp_global = exp_global self.model_file = self.exp_global.dynet_param_collection.model_file self.src_file = src_file self.trg_file = trg_file self.dev_tasks = dev_tasks if lr_decay > 1.0 or lr_decay <= 0.0: raise RuntimeError("illegal lr_decay, must satisfy: 0.0 < lr_decay <= 1.0") self.lr_decay = lr_decay self.patience = patience self.initial_patience = initial_patience self.lr_decay_times = lr_decay_times self.restart_trainer = restart_trainer self.run_for_epochs = run_for_epochs self.early_stopping_reached = False # training state self.training_state = TrainingState() self.reload_command = reload_command self.model = model self.loss_calculator = loss_calculator or LossCalculator(MLELoss()) self.sample_train_sents = sample_train_sents self.max_num_train_sents = max_num_train_sents self.max_src_len = max_src_len self.max_trg_len = max_trg_len self.batcher = batcher self.logger = BatchLossTracker(self, dev_every, name)
def __init__(self, model=Ref(path=Path("model")), src_file=None, trg_file=None, dev_every=0, batcher=bare(xnmt.batcher.SrcBatcher, batch_size=32), loss_calculator=None, trainer=None, run_for_epochs=None, lr_decay=1.0, lr_decay_times=3, patience=1, initial_patience=None, dev_tasks=None, restart_trainer=False, reload_command=None, name=None, sample_train_sents=None, max_num_train_sents=None, max_src_len=None, max_trg_len=None, exp_global=Ref(Path("exp_global"))): """ :param model: a generator.GeneratorModel object :param src_file: the source training file :param trg_file: the target training file :param dev_every (int): dev checkpoints every n sentences (0 for only after epoch) :param batcher: Type of batcher :param loss_calculator: The method for calculating the loss. :param trainer: Trainer object, default is SGD with learning rate 0.1 :param run_for_epochs: :param lr_decay (float): :param lr_decay_times (int): Early stopping after decaying learning rate a certain number of times :param patience (int): apply LR decay after dev scores haven't improved over this many checkpoints :param initial_patience (int): if given, allows adjusting patience for the first LR decay :param dev_tasks: A list of tasks to use during the development stage. :param restart_trainer: Restart trainer (useful for Adam) and revert weights to best dev checkpoint when applying LR decay (https://arxiv.org/pdf/1706.09733.pdf) :param reload_command: Command to change the input data after each epoch. --epoch EPOCH_NUM will be appended to the command. To just reload the data after each epoch set the command to 'true'. :param name: will be prepended to log outputs if given :param sample_train_sents: :param max_num_train_sents: :param max_src_len: :param max_trg_len: :param exp_global: """ super().__init__(model=model, src_file=src_file, trg_file=trg_file, dev_every=dev_every, batcher=batcher, loss_calculator=loss_calculator, run_for_epochs=run_for_epochs, lr_decay=lr_decay, lr_decay_times=lr_decay_times, patience=patience, initial_patience=initial_patience, dev_tasks=dev_tasks, restart_trainer=restart_trainer, reload_command=reload_command, name=name, sample_train_sents=sample_train_sents, max_num_train_sents=max_num_train_sents, max_src_len=max_src_len, max_trg_len=max_trg_len, exp_global=exp_global) self.trainer = trainer or xnmt.optimizer.SimpleSGDTrainer( exp_global=self.exp_global, e0=0.1) self.dynet_profiling = getattr(exp_global.commandline_args, "dynet_profiling", 0)