def __init__( self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), embedder: embedders.Embedder = bare(embedders.LookupEmbedder), input_feeding: bool = True, bridge: bridges.Bridge = bare(bridges.CopyBridge), rnn: recurrent.UniLSTMSeqTransducer = bare( recurrent.UniLSTMSeqTransducer), transform: transforms.Transform = bare(transforms.AuxNonLinear), scorer: scorers.Scorer = bare(scorers.Softmax), truncate_dec_batches: bool = Ref("exp_global.truncate_dec_batches", default=False) ) -> None: self.param_col = param_collections.ParamManager.my_params(self) self.input_dim = input_dim self.embedder = embedder self.truncate_dec_batches = truncate_dec_batches self.bridge = bridge self.rnn = rnn self.transform = transform self.scorer = scorer # Input feeding self.input_feeding = input_feeding rnn_input_dim = embedder.emb_dim if input_feeding: rnn_input_dim += input_dim assert rnn_input_dim == rnn.total_input_dim, "Wrong input dimension in RNN layer: {} != {}".format( rnn_input_dim, rnn.total_input_dim)
def __init__( self, layers: numbers.Integral, input_dim: numbers.Integral, hidden_dim: numbers.Integral, param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)) ) -> None: if layers != 1: raise RuntimeError( "CustomLSTMSeqTransducer supports only exactly one layer") self.input_dim = input_dim self.hidden_dim = hidden_dim model = param_collections.ParamManager.my_params(self) # [i; f; o; g] self.p_Wx = model.add_parameters(dim=(hidden_dim * 4, input_dim), init=param_init.initializer( (hidden_dim * 4, input_dim))) self.p_Wh = model.add_parameters(dim=(hidden_dim * 4, hidden_dim), init=param_init.initializer( (hidden_dim * 4, hidden_dim))) self.p_b = model.add_parameters(dim=(hidden_dim * 4, ), init=bias_init.initializer( (hidden_dim * 4, )))
def __init__( self, layers: numbers.Integral = 1, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), dropout: numbers.Real = Ref("exp_global.dropout", default=0.0), forward_layers: Optional[Sequence[LatticeLSTMTransducer]] = None, backward_layers: Optional[Sequence[LatticeLSTMTransducer]] = None ) -> None: self.num_layers = layers input_dim = input_dim hidden_dim = hidden_dim self.hidden_dim = hidden_dim self.dropout_rate = dropout assert hidden_dim % 2 == 0 self.forward_layers = self.add_serializable_component( "forward_layers", forward_layers, lambda: self._make_dir_layers(input_dim=input_dim, hidden_dim=hidden_dim, dropout=dropout, layers=layers)) self.backward_layers = self.add_serializable_component( "backward_layers", backward_layers, lambda: self._make_dir_layers(input_dim=input_dim, hidden_dim=hidden_dim, dropout=dropout, layers=layers))
def __init__( self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), output_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), bias: bool = True, activation: str = 'tanh', hidden_layers: numbers.Integral = 1, param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)), layers: Optional[Sequence[Transform]] = None) -> None: self.layers = self.add_serializable_component( "layers", layers, lambda: MLP._create_layers(num_layers=hidden_layers, input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, bias=bias, activation=activation, param_init=param_init, bias_init=bias_init))
def __init__( self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), state_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)) ) -> None: self.input_dim = input_dim self.state_dim = state_dim self.hidden_dim = hidden_dim my_params = param_collections.ParamManager.my_params(self) self.linear_context = my_params.add_parameters( (hidden_dim, input_dim), init=param_init[0].initializer((hidden_dim, input_dim))) self.linear_query = my_params.add_parameters( (hidden_dim, state_dim), init=param_init[1].initializer((hidden_dim, state_dim))) self.bias_context = my_params.add_parameters( (hidden_dim, ), init=bias_init.initializer((hidden_dim, ))) self.pU = my_params.add_parameters((1, hidden_dim), init=param_init[2].initializer( (1, hidden_dim))) self.curr_sent = None self.attention_vecs = None self.WI = None
def __init__( self, layers: numbers.Integral = 1, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), downsampling_method: str = "concat", reduce_factor: Union[numbers.Integral, Sequence[numbers.Integral]] = 2, dropout: float = Ref("exp_global.dropout", default=0.0), builder_layers: Any = None): self.dropout = dropout assert layers > 0 assert hidden_dim % 2 == 0 assert type(reduce_factor) == int or (type(reduce_factor) == list and len(reduce_factor) == layers - 1) assert downsampling_method in ["concat", "skip"] self.downsampling_method = downsampling_method self.reduce_factor = reduce_factor self.input_dim = input_dim self.hidden_dim = hidden_dim self.builder_layers = self.add_serializable_component( "builder_layers", builder_layers, lambda: self.make_builder_layers( input_dim, hidden_dim, layers, dropout, downsampling_method, reduce_factor))
def __init__( self, emb_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), weight_noise: numbers.Real = Ref("exp_global.weight_noise", default=0.0), word_dropout: numbers.Real = 0.0, fix_norm: Optional[numbers.Real] = None, param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), vocab_size: Optional[numbers.Integral] = None, vocab: Optional[vocabs.Vocab] = None, yaml_path=None, src_reader: Optional[input_readers.InputReader] = Ref( "model.src_reader", default=None), trg_reader: Optional[input_readers.InputReader] = Ref( "model.trg_reader", default=None) ) -> None: #print(f"embedder received param_init: {param_init}") self.emb_dim = emb_dim self.weight_noise = weight_noise self.word_dropout = word_dropout self.fix_norm = fix_norm self.word_id_mask = None self.train = False param_collection = param_collections.ParamManager.my_params(self) self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path, src_reader, trg_reader) self.save_processed_arg("vocab_size", self.vocab_size) self.embeddings = param_collection.add_lookup_parameters( (self.vocab_size, self.emb_dim), init=param_init.initializer((self.vocab_size, self.emb_dim), is_lookup=True))
def __init__(self, max_pos: numbers.Integral, op: str = 'sum', emb_type: str = 'param', input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), dropout: numbers.Real = Ref("exp_global.dropout", default=0.0), param_init: param_initializers.ParamInitializer = Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer))) \ -> None: """ max_pos: largest embedded position op: how to combine positional encodings with the original encodings, can be "sum" or "concat" type: what type of embddings to use, "param"=parameterized (others, such as the trigonometric embeddings are todo) input_dim: embedding size dropout: apply dropout to output of this transducer param_init: how to initialize embedding matrix """ self.max_pos = max_pos self.input_dim = input_dim self.dropout = dropout self.op = op self.emb_type = emb_type my_params = param_collections.ParamManager.my_params(self) self.embeddings = nn.Embedding(self.max_pos, self.input_dim).to(xnmt.device) my_params.append(self.embeddings) my_params.init_params(param_init)
def __init__(self, layers=1, input_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), downsampling_method="concat", reduce_factor=2, dropout=Ref("exp_global.dropout", default=0.0)): self.dropout = dropout assert layers > 0 assert hidden_dim % 2 == 0 assert type(reduce_factor) == int or (type(reduce_factor) == list and len(reduce_factor) == layers - 1) assert downsampling_method in ["concat", "skip"] self.builder_layers = [] self.downsampling_method = downsampling_method self.reduce_factor = reduce_factor self.input_dim = input_dim f = UniLSTMSeqTransducer(input_dim=input_dim, hidden_dim=hidden_dim / 2, dropout=dropout) b = UniLSTMSeqTransducer(input_dim=input_dim, hidden_dim=hidden_dim / 2, dropout=dropout) self.builder_layers.append((f, b)) for _ in range(layers - 1): layer_input_dim = hidden_dim if downsampling_method == "skip" else hidden_dim * reduce_factor f = UniLSTMSeqTransducer(input_dim=layer_input_dim, hidden_dim=hidden_dim / 2, dropout=dropout) b = UniLSTMSeqTransducer(input_dim=layer_input_dim, hidden_dim=hidden_dim / 2, dropout=dropout) self.builder_layers.append((f, b))
def __init__( self, input_dim: int = Ref("exp_global.default_layer_dim"), trg_embed_dim: int = Ref("exp_global.default_layer_dim"), input_feeding: bool = True, bridge: Bridge = bare(CopyBridge), rnn: UniLSTMSeqTransducer = bare(UniLSTMSeqTransducer), transform: Transform = bare(AuxNonLinear), scorer: Scorer = bare(Softmax), truncate_dec_batches: bool = Ref("exp_global.truncate_dec_batches", default=False) ) -> None: self.param_col = ParamManager.my_params(self) self.input_dim = input_dim self.truncate_dec_batches = truncate_dec_batches self.bridge = bridge self.rnn = rnn self.transform = transform self.scorer = scorer # Input feeding self.input_feeding = input_feeding rnn_input_dim = trg_embed_dim if input_feeding: rnn_input_dim += input_dim assert rnn_input_dim == rnn.input_dim, "Wrong input dimension in RNN layer: {} != {}".format( rnn_input_dim, rnn.input_dim)
def __init__(self, filename, emb_dim=Ref("exp_global.default_layer_dim"), weight_noise=Ref("exp_global.weight_noise", default=0.0), word_dropout=0.0, fix_norm = None, vocab = None, yaml_path = None, src_reader = Ref("model.src_reader", default=None), trg_reader = Ref("model.trg_reader", default=None)): self.emb_dim = emb_dim self.weight_noise = weight_noise self.word_dropout = word_dropout self.word_id_mask = None self.train = False self.fix_norm = fix_norm self.pretrained_filename = filename param_collection = ParamManager.my_params(self) self.vocab = self.choose_vocab(vocab, yaml_path, src_reader, trg_reader) self.vocab_size = len(vocab) self.save_processed_arg("vocab", self.vocab) with open(self.pretrained_filename, encoding='utf-8') as embeddings_file: total_embs, in_vocab, missing, initial_embeddings = self._read_fasttext_embeddings(vocab, embeddings_file) self.embeddings = param_collection.lookup_parameters_from_numpy(initial_embeddings) logger.info(f"{in_vocab} vocabulary matches out of {total_embs} total embeddings; " f"{missing} vocabulary words without a pretrained embedding out of {self.vocab_size}")
def __init__( self, emb_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), weight_noise: numbers.Real = Ref("exp_global.weight_noise", default=0.0), word_dropout: numbers.Real = 0.0, fix_norm: Optional[numbers.Real] = None, param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), vocab_size: Optional[numbers.Integral] = None, vocab: Optional[vocabs.Vocab] = None, yaml_path: Path = Path(), src_reader: Optional[input_readers.InputReader] = Ref( "model.src_reader", default=None), trg_reader: Optional[input_readers.InputReader] = Ref( "model.trg_reader", default=None) ) -> None: self.emb_dim = emb_dim self.weight_noise = weight_noise self.word_dropout = word_dropout self.fix_norm = fix_norm self.word_id_mask = None self.train = False self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path, src_reader, trg_reader) self.save_processed_arg("vocab_size", self.vocab_size) self.embeddings = nn.Embedding(self.vocab_size, self.emb_dim).to(xnmt.device) my_params = param_collections.ParamManager.my_params(self) my_params.append(self.embeddings) my_params.init_params(param_init)
def __init__( self, policy_network=None, baseline=None, z_normalization=True, conf_penalty=None, weight=1.0, input_dim=Ref("exp_global.default_layer_dim"), output_dim=2, param_init=Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(param_initializers.ZeroInitializer))): self.input_dim = input_dim self.policy_network = self.add_serializable_component( "policy_network", policy_network, lambda: transforms.Linear(input_dim=self.input_dim, output_dim=output_dim, param_init=param_init, bias_init=bias_init)) self.baseline = self.add_serializable_component( "baseline", baseline, lambda: transforms.Linear(input_dim=self.input_dim, output_dim=1, param_init=param_init, bias_init=bias_init)) self.confidence_penalty = self.add_serializable_component( "conf_penalty", conf_penalty, lambda: conf_penalty) if conf_penalty is not None else None self.weight = weight self.z_normalization = z_normalization
def __init__(self, word_vocab=None, embedding=None, ngram_size=4, vocab_size=32000, cache_id_pool=None, cache_word_table=None, char_vocab=Ref(Path("model.src_reader.vocab")), hidden_dim=Ref("exp_global.default_layer_dim"), param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer))): super().__init__(word_vocab, vocab_size, cache_id_pool, cache_word_table) # Attributes if word_vocab is None: self.dict_entry = vocab_size+1 else: self.dict_entry = len(word_vocab) self.char_vocab = char_vocab self.param_init = param_init self.bias_init = bias_init self.hidden_dim = hidden_dim self.word_vect = None # Word Embedding self.ngram_size = ngram_size self.embedding = self.add_serializable_component("embedding", embedding, lambda: Linear(input_dim=self.dict_entry, output_dim=hidden_dim, param_init=param_init, bias_init=bias_init))
def __init__( self, tasks: Sequence[train_tasks.TrainingTask], task_weights: Optional[Sequence[numbers.Real]] = None, trainer: optimizers.XnmtOptimizer = bare(optimizers.SimpleSGDTrainer, e0=0.1), dev_zero: bool = False, loss_comb_method: str = Ref("exp_global.loss_comb_method", default="sum"), update_every_within: numbers.Integral = 1, update_every_across: numbers.Integral = 1, commandline_args=Ref("exp_global.commandline_args", default=None) ) -> None: super().__init__(tasks=tasks, trainer=trainer, dev_zero=dev_zero, update_every=update_every_across, commandline_args=commandline_args) if update_every_within != 1 and update_every_across != 1: raise ValueError( "update_every_within and update_every_across cannot be mixed.") self.update_every_within = update_every_within self.task_weights = task_weights or [1. / len(tasks)] * len(tasks) if len(self.task_weights) != len(self.tasks): raise ValueError( f"number of tasks must match number of task weights; " f"found: {len(self.task_weights)} != {len(self.tasks)}") self.train_loss_trackers = { task: loss_trackers.TrainLossTracker(task) for task in tasks } self.loss_comb_method = loss_comb_method
def __init__(self, input_dim=Ref("exp_global.default_layer_dim"), state_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer))): self.input_dim = input_dim self.state_dim = state_dim self.hidden_dim = hidden_dim param_collection = ParamManager.my_params(self) self.pW = param_collection.add_parameters((hidden_dim, input_dim), init=param_init.initializer( (hidden_dim, input_dim))) self.pV = param_collection.add_parameters((hidden_dim, state_dim), init=param_init.initializer( (hidden_dim, state_dim))) self.pb = param_collection.add_parameters((hidden_dim, ), init=bias_init.initializer( (hidden_dim, ))) self.pU = param_collection.add_parameters((1, hidden_dim), init=param_init.initializer( (1, hidden_dim))) self.curr_sent = None
def __init__( self, model: models.ConditionedModel = Ref("model"), src_file: Union[None, str, Sequence[str]] = None, trg_file: Optional[str] = None, dev_every: numbers.Integral = 0, dev_zero: bool = False, batcher: batchers.Batcher = bare(batchers.SrcBatcher, batch_size=32), loss_calculator: loss_calculators.LossCalculator = bare( loss_calculators.MLELoss), trainer: optimizers.XnmtOptimizer = bare(optimizers.SimpleSGDTrainer, e0=0.1), run_for_epochs: Optional[numbers.Integral] = None, lr_decay: numbers.Real = 1.0, lr_decay_times: numbers.Integral = 3, patience: numbers.Integral = 1, initial_patience: Optional[numbers.Integral] = None, dev_tasks: Sequence[eval_tasks.EvalTask] = None, dev_combinator: Optional[str] = None, restart_trainer: bool = False, reload_command: Optional[str] = None, name: str = "{EXP}", sample_train_sents: Optional[numbers.Integral] = None, max_num_train_sents: Optional[numbers.Integral] = None, max_src_len: Optional[numbers.Integral] = None, max_trg_len: Optional[numbers.Integral] = None, loss_comb_method: str = Ref("exp_global.loss_comb_method", default="sum"), update_every: numbers.Integral = 1, commandline_args: dict = Ref("exp_global.commandline_args", default={}) ) -> None: super().__init__(model=model, src_file=src_file, trg_file=trg_file, dev_every=dev_every, batcher=batcher, loss_calculator=loss_calculator, run_for_epochs=run_for_epochs, lr_decay=lr_decay, lr_decay_times=lr_decay_times, patience=patience, initial_patience=initial_patience, dev_tasks=dev_tasks, dev_combinator=dev_combinator, restart_trainer=restart_trainer, reload_command=reload_command, name=name, sample_train_sents=sample_train_sents, max_num_train_sents=max_num_train_sents, max_src_len=max_src_len, max_trg_len=max_trg_len) self.dev_zero = dev_zero self.trainer = trainer or optimizers.SimpleSGDTrainer(e0=0.1) self.dynet_profiling = commandline_args.get( "dynet_profiling", 0) if commandline_args else 0 self.train_loss_tracker = loss_trackers.TrainLossTracker(self) self.loss_comb_method = loss_comb_method self.update_every = update_every self.num_updates_skipped = 0
def __init__( self, layers, input_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), dropout=Ref("exp_global.dropout", default=0.0), stride=1, filter_width=2, param_init=Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)), forward_layers=None, backward_layers=None): self.num_layers = layers self.hidden_dim = hidden_dim assert hidden_dim % 2 == 0 self.forward_layers = self.add_serializable_component( "forward_layers", forward_layers, lambda: self.init_layers(input_dim, hidden_dim, dropout, stride, filter_width, param_init, bias_init)) self.backward_layers = self.add_serializable_component( "backward_layers", backward_layers, lambda: self.init_layers(input_dim, hidden_dim, dropout, stride, filter_width, param_init, bias_init))
def __init__(self, input_dim: numbers.Integral = Ref( "exp_global.default_layer_dim"), vocab_size: Optional[numbers.Integral] = None, vocab: Optional[vocabs.Vocab] = None, trg_reader: Optional[input_readers.InputReader] = Ref( "model.trg_reader", default=None), label_smoothing: numbers.Real = 0.0, param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)), output_projector: transforms.Linear = None) -> None: self.param_col = param_collections.ParamManager.my_params(self) self.input_dim = input_dim self.output_dim = self._choose_vocab_size(vocab_size, vocab, trg_reader) self.label_smoothing = label_smoothing self.output_projector = self.add_serializable_component( "output_projector", output_projector, lambda: output_projector or transforms.Linear(input_dim=self.input_dim, output_dim=self.output_dim, param_init=param_init, bias_init=bias_init))
def __init__( self, input_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), dropout=Ref("exp_global.dropout", default=0.0), filter_width=2, stride=1, param_init=Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(param_initializers.ZeroInitializer))): model = param_collections.ParamManager.my_params(self) self.hidden_dim = hidden_dim self.dropout = dropout self.input_dim = input_dim self.stride = stride dim_f = (filter_width, 1, input_dim, hidden_dim * 3) self.p_f = model.add_parameters(dim=dim_f, init=param_init.initializer( dim_f, num_shared=3)) # f, o, z dim_b = (hidden_dim * 3, ) self.p_b = model.add_parameters(dim=dim_b, init=bias_init.initializer( dim_b, num_shared=3))
def __init__(self, layers, input_dim, hidden_dim, param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer))): if layers != 1: raise RuntimeError( "CustomLSTMSeqTransducer supports only exactly one layer") self.input_dim = input_dim self.hidden_dim = hidden_dim model = ParamManager.my_params(self) # [i; f; o; g] self.p_Wx = model.add_parameters(dim=(hidden_dim * 4, input_dim), init=param_init.initializer( (hidden_dim * 4, input_dim))) self.p_Wh = model.add_parameters(dim=(hidden_dim * 4, hidden_dim), init=param_init.initializer( (hidden_dim * 4, hidden_dim))) self.p_b = model.add_parameters(dim=(hidden_dim * 4, ), init=bias_init.initializer( (hidden_dim * 4, )))
def __init__(self, input_dim=Ref("exp_global.default_layer_dim"), trg_embed_dim=Ref("exp_global.default_layer_dim"), input_feeding=True, rnn_layer=bare(UniLSTMSeqTransducer), mlp_layer=bare(MLP), bridge=bare(CopyBridge), label_smoothing=0.0): self.param_col = ParamManager.my_params(self) self.input_dim = input_dim self.label_smoothing = label_smoothing # Input feeding self.input_feeding = input_feeding rnn_input_dim = trg_embed_dim if input_feeding: rnn_input_dim += input_dim assert rnn_input_dim == rnn_layer.input_dim, "Wrong input dimension in RNN layer" # Bridge self.bridge = bridge # LSTM self.rnn_layer = rnn_layer # MLP self.mlp_layer = mlp_layer
def __init__( self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), state_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)) ) -> None: self.input_dim = input_dim self.state_dim = state_dim self.hidden_dim = hidden_dim my_params = param_collections.ParamManager.my_params(self) self.linear_context = nn.Linear(input_dim, hidden_dim, bias=True).to(xnmt.device) self.linear_query = nn.Linear(state_dim, hidden_dim, bias=False).to(xnmt.device) self.pU = nn.Linear(hidden_dim, 1, bias=False).to(xnmt.device) my_params.append(self.linear_context) my_params.append(self.linear_query) my_params.append(self.pU) my_params.init_params(param_init, bias_init) self.curr_sent = None self.attention_vecs = None self.WI = None
def __init__(self, src_file: Union[str, Sequence[str]], ref_file: Optional[str] = None, model: 'model_base.GeneratorModel' = Ref("model"), batcher: Batcher = Ref("train.batcher", default=bare(xnmt.batchers.SrcBatcher, batch_size=32)), loss_calculator: LossCalculator = bare(MLELoss), max_src_len: Optional[int] = None, max_trg_len: Optional[int] = None, max_num_sents: Optional[int] = None, loss_comb_method: str = Ref("exp_global.loss_comb_method", default="sum"), desc: Any = None): self.model = model self.loss_calculator = loss_calculator self.src_file = src_file self.ref_file = ref_file self.batcher = batcher self.src_data = None self.max_src_len = max_src_len self.max_trg_len = max_trg_len self.max_num_sents = max_num_sents self.loss_comb_method = loss_comb_method self.desc = desc
def __init__(self, input_dim: int = Ref("exp_global.default_layer_dim"), hidden_dim: int = Ref("exp_global.default_layer_dim"), downsample_by: int = 1, param_init=Ref("exp_global.param_init", default=bare( param_initializers.GlorotInitializer)), projection=None, batch_norm=None, nonlinearity=None): self.projection = self.add_serializable_component( "projection", projection, lambda: base.TransformSeqTransducer(modelparts_transforms.Linear( input_dim=input_dim * downsample_by, output_dim=hidden_dim, bias=False, param_init=param_init), downsample_by=downsample_by)) self.batch_norm = self.add_serializable_component( "batch_norm", batch_norm, lambda: norms.BatchNorm(hidden_dim=hidden_dim, num_dim=2)) self.nonlinearity = self.add_serializable_component( "nonlinearity", nonlinearity, lambda: base.TransformSeqTransducer( modelparts_transforms.Cwise("rectify"))) self.modules = [self.projection, self.batch_norm, self.nonlinearity]
def __init__(self, model=Ref("model"), src_file=None, trg_file=None, dev_every=0, dev_zero=False, batcher=bare(xnmt.batcher.SrcBatcher, batch_size=32), loss_calculator=None, trainer=None, run_for_epochs=None, lr_decay=1.0, lr_decay_times=3, patience=1, initial_patience=None, dev_tasks=None, restart_trainer: bool = False, reload_command=None, name="{EXP}", sample_train_sents=None, max_num_train_sents=None, max_src_len=None, max_trg_len=None, commandline_args=Ref("exp_global.commandline_args", default=None)): super().__init__(model=model, src_file=src_file, trg_file=trg_file, dev_every=dev_every, batcher=batcher, loss_calculator=loss_calculator, run_for_epochs=run_for_epochs, lr_decay=lr_decay, lr_decay_times=lr_decay_times, patience=patience, initial_patience=initial_patience, dev_tasks=dev_tasks, restart_trainer=restart_trainer, reload_command=reload_command, name=name, sample_train_sents=sample_train_sents, max_num_train_sents=max_num_train_sents, max_src_len=max_src_len, max_trg_len=max_trg_len) self.dev_zero = dev_zero self.trainer = trainer or xnmt.optimizer.SimpleSGDTrainer(e0=0.1) self.dynet_profiling = getattr(commandline_args, "dynet_profiling", 0) if commandline_args else 0
def __init__(self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), state_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), param_init: param_initializers.ParamInitializer = Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref("exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)))\ -> None: self.input_dim = input_dim self.state_dim = state_dim self.hidden_dim = hidden_dim param_collection = param_collections.ParamManager.my_params(self) self.pW = param_collection.add_parameters((hidden_dim, input_dim), init=param_init.initializer( (hidden_dim, input_dim))) self.pV = param_collection.add_parameters((hidden_dim, state_dim), init=param_init.initializer( (hidden_dim, state_dim))) self.pb = param_collection.add_parameters((hidden_dim, ), init=bias_init.initializer( (hidden_dim, ))) self.pU = param_collection.add_parameters((1, hidden_dim), init=param_init.initializer( (1, hidden_dim))) self.pL = param_collection.add_parameters((100, 1, 1, hidden_dim), init=param_init.initializer( (100, 1, 1, hidden_dim))) self.curr_sent = None
def __init__( self, tasks: Sequence[train_tasks.TrainingTask], trainer: optimizers.XnmtOptimizer = bare(optimizers.SimpleSGDTrainer, e0=0.1), dev_zero: bool = False, per_task_backward: bool = True, loss_comb_method: str = Ref("exp_global.loss_comb_method", default="sum"), update_every: numbers.Integral = 1, n_task_steps: Optional[Sequence[numbers.Integral]] = None, commandline_args: dict = Ref("exp_global.commandline_args", default=None) ) -> None: super().__init__(tasks=tasks, trainer=trainer, dev_zero=dev_zero, update_every=update_every, commandline_args=commandline_args) self.train_loss_trackers = { task: loss_trackers.TrainLossTracker(task) for task in tasks } self.per_task_backward = per_task_backward self.loss_comb_method = loss_comb_method self.n_task_steps = n_task_steps or [1] * len(tasks) if len(self.n_task_steps) != len(tasks): raise ValueError( f"number of tasks and steps per task do not match: {len(tasks)} != {len(self.n_task_steps)}" )
def __init__(self, input_dim=512, layers=1, hidden_dim=Ref("exp_global.default_layer_dim"), residual_to_output=False, dropout=0.0, bidirectional=True, builder=None, yaml_path=None, decoder_input_dim=Ref("exp_global.default_layer_dim", default=None), decoder_input_feeding=True): self._final_states = None if yaml_path is not None and "decoder" in yaml_path: bidirectional = False if decoder_input_feeding: input_dim += decoder_input_dim if bidirectional: self.builder = self.add_serializable_component( "builder", builder, lambda: ResidualBiRNNBuilder(num_layers=layers, input_dim=input_dim, hidden_dim=hidden_dim, add_to_output=residual_to_output, dropout=dropout)) else: self.builder = self.add_serializable_component( "builder", builder, lambda: ResidualRNNBuilder(num_layers=layers, input_dim=input_dim, hidden_dim=hidden_dim, add_to_output=residual_to_output, dropout=dropout))
def __init__( self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), dropout: numbers.Real = Ref("exp_global.dropout", default=0.0), param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)), num_heads: numbers.Integral = 8): assert (input_dim % num_heads == 0) self.dropout = dropout param_collection = param_collections.ParamManager.my_params(self) self.input_dim = input_dim self.num_heads = num_heads self.head_dim = input_dim // num_heads self.pWq, self.pWk, self.pWv, self.pWo = [ param_collection.add_parameters(dim=(input_dim, input_dim), init=param_init.initializer( (input_dim, input_dim))) for _ in range(4) ] self.pbq, self.pbk, self.pbv, self.pbo = [ param_collection.add_parameters(dim=(1, input_dim), init=bias_init.initializer(( 1, input_dim, ))) for _ in range(4) ]