def __init__( self, input_dim: int = Ref("exp_global.default_layer_dim"), state_dim: int = Ref("exp_global.default_layer_dim"), hidden_dim: int = Ref("exp_global.default_layer_dim"), param_init: ParamInitializer = Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init: ParamInitializer = Ref("exp_global.bias_init", default=bare(ZeroInitializer)), truncate_dec_batches: bool = Ref("exp_global.truncate_dec_batches", default=False) ) -> None: self.input_dim = input_dim self.state_dim = state_dim self.hidden_dim = hidden_dim self.truncate_dec_batches = truncate_dec_batches param_collection = ParamManager.my_params(self) self.pW = param_collection.add_parameters((hidden_dim, input_dim), init=param_init.initializer( (hidden_dim, input_dim))) self.pV = param_collection.add_parameters((hidden_dim, state_dim), init=param_init.initializer( (hidden_dim, state_dim))) self.pb = param_collection.add_parameters((hidden_dim, ), init=bias_init.initializer( (hidden_dim, ))) self.pU = param_collection.add_parameters((1, hidden_dim), init=param_init.initializer( (1, hidden_dim))) self.curr_sent = None
def __init__(self, layers=1, input_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), dropout = Ref("exp_global.dropout", default=0.0), weightnoise_std=Ref("exp_global.weight_noise", default=0.0), param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer)), yaml_path=None, decoder_input_dim=Ref("exp_global.default_layer_dim", default=None), decoder_input_feeding=True): self.num_layers = layers model = ParamManager.my_params(self) if yaml_path is not None and "decoder" in yaml_path: if decoder_input_feeding: input_dim += decoder_input_dim self.hidden_dim = hidden_dim self.dropout_rate = dropout self.weightnoise_std = weightnoise_std self.input_dim = input_dim if not isinstance(param_init, Sequence): param_init = [param_init] * layers if not isinstance(bias_init, Sequence): bias_init = [bias_init] * layers # [i; f; o; g] self.p_Wx = [model.add_parameters(dim=(hidden_dim*4, input_dim), init=param_init[0].initializer((hidden_dim*4, input_dim), num_shared=4))] self.p_Wx += [model.add_parameters(dim=(hidden_dim*4, hidden_dim), init=param_init[i].initializer((hidden_dim*4, hidden_dim), num_shared=4)) for i in range(1, layers)] self.p_Wh = [model.add_parameters(dim=(hidden_dim*4, hidden_dim), init=param_init[i].initializer((hidden_dim*4, hidden_dim), num_shared=4)) for i in range(layers)] self.p_b = [model.add_parameters(dim=(hidden_dim*4,), init=bias_init[i].initializer((hidden_dim*4,), num_shared=4)) for i in range(layers)] self.dropout_mask_x = None self.dropout_mask_h = None
def __init__(self, filename, emb_dim=Ref("exp_global.default_layer_dim"), weight_noise=Ref("exp_global.weight_noise", default=0.0), word_dropout=0.0, fix_norm = None, vocab = None, yaml_path = None, src_reader = Ref("model.src_reader", default=None), trg_reader = Ref("model.trg_reader", default=None)): self.emb_dim = emb_dim self.weight_noise = weight_noise self.word_dropout = word_dropout self.word_id_mask = None self.train = False self.fix_norm = fix_norm self.pretrained_filename = filename param_collection = ParamManager.my_params(self) self.vocab = self.choose_vocab(vocab, yaml_path, src_reader, trg_reader) self.vocab_size = len(vocab) self.save_processed_arg("vocab", self.vocab) with open(self.pretrained_filename, encoding='utf-8') as embeddings_file: total_embs, in_vocab, missing, initial_embeddings = self._read_fasttext_embeddings(vocab, embeddings_file) self.embeddings = param_collection.lookup_parameters_from_numpy(initial_embeddings) logger.info(f"{in_vocab} vocabulary matches out of {total_embs} total embeddings; " f"{missing} vocabulary words without a pretrained embedding out of {self.vocab_size}")
def __init__(self, layers, input_dim, hidden_dim, param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer))): if layers != 1: raise RuntimeError( "CustomLSTMSeqTransducer supports only exactly one layer") self.input_dim = input_dim self.hidden_dim = hidden_dim model = ParamManager.my_params(self) # [i; f; o; g] self.p_Wx = model.add_parameters(dim=(hidden_dim * 4, input_dim), init=param_init.initializer( (hidden_dim * 4, input_dim))) self.p_Wh = model.add_parameters(dim=(hidden_dim * 4, hidden_dim), init=param_init.initializer( (hidden_dim * 4, hidden_dim))) self.p_b = model.add_parameters(dim=(hidden_dim * 4, ), init=bias_init.initializer( (hidden_dim * 4, )))
def __init__(self, input_dim=Ref("exp_global.default_layer_dim"), trg_embed_dim=Ref("exp_global.default_layer_dim"), input_feeding=True, rnn_layer=bare(UniLSTMSeqTransducer), mlp_layer=bare(MLP), bridge=bare(CopyBridge), label_smoothing=0.0): self.param_col = ParamManager.my_params(self) self.input_dim = input_dim self.label_smoothing = label_smoothing # Input feeding self.input_feeding = input_feeding rnn_input_dim = trg_embed_dim if input_feeding: rnn_input_dim += input_dim assert rnn_input_dim == rnn_layer.input_dim, "Wrong input dimension in RNN layer" # Bridge self.bridge = bridge # LSTM self.rnn_layer = rnn_layer # MLP self.mlp_layer = mlp_layer
def __init__(self, input_dim=Ref("exp_global.default_layer_dim"), param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer)), num_heads=8): assert (input_dim % num_heads == 0) param_collection = ParamManager.my_params(self) self.input_dim = input_dim self.num_heads = num_heads self.head_dim = input_dim // num_heads self.pWq, self.pWk, self.pWv, self.pWo = [ param_collection.add_parameters(dim=(input_dim, input_dim), init=param_init.initializer( (input_dim, input_dim))) for _ in range(4) ] self.pbq, self.pbk, self.pbv, self.pbo = [ param_collection.add_parameters(dim=(1, input_dim), init=bias_init.initializer(( 1, input_dim, ))) for _ in range(4) ]
def __init__(self, emb_dim=Ref("exp_global.default_layer_dim"), weight_noise=Ref("exp_global.weight_noise", default=0.0), word_dropout=0.0, fix_norm=None, param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), vocab_size=None, vocab=None, yaml_path=None, src_reader=Ref("model.src_reader", default=None), trg_reader=Ref("model.trg_reader", default=None)): #print(f"embedder received param_init: {param_init}") self.emb_dim = emb_dim self.weight_noise = weight_noise self.word_dropout = word_dropout self.fix_norm = fix_norm self.word_id_mask = None self.train = False param_collection = ParamManager.my_params(self) self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path, src_reader, trg_reader) self.save_processed_arg("vocab_size", self.vocab_size) self.embeddings = param_collection.add_lookup_parameters( (self.vocab_size, self.emb_dim), init=param_init.initializer((self.vocab_size, self.emb_dim), is_lookup=True))
def __init__(self, filter_height, filter_width, channels, num_filters, stride): """ Args: num_layers: depth of the RNN input_dim: size of the inputs hidden_dim: size of the outputs (and intermediate RNN layer representations) """ model = ParamManager.my_params(self) self.filter_height = filter_height self.filter_width = filter_width self.channels = channels self.num_filters = num_filters self.stride = stride # (2,2) self.hidden_states = {} normalInit = dy.NormalInitializer(0, 0.1) self.filters1 = model.add_parameters( dim=(self.filter_height[0], self.filter_width[0], self.channels[0], self.num_filters[0]), init=normalInit) self.filters2 = model.add_parameters( dim=(self.filter_height[1], self.filter_width[1], self.channels[1], self.num_filters[1]), init=normalInit) self.filters3 = model.add_parameters( dim=(self.filter_height[2], self.filter_width[2], self.channels[2], self.num_filters[2]), init=normalInit)
def __init__(self, ## COMPONENTS embed_encoder=None, segment_composer=None, final_transducer=None, ## OPTIONS length_prior=3.3, length_prior_alpha=None, # GeometricSequence epsilon_greedy=None, # GeometricSequence reinforce_scale=None, # GeometricSequence confidence_penalty=None, # SegmentationConfidencePenalty # For segmentation warmup (Always use the poisson prior) segmentation_warmup=0, ## FLAGS learn_delete = False, use_baseline = True, z_normalization = True, learn_segmentation = True, compose_char = False, log_reward = True, debug=False, print_sample=False): model = ParamManager.my_params(self) # Sanity check assert embed_encoder is not None assert segment_composer is not None assert final_transducer is not None # The Embed Encoder transduces the embedding vectors to a sequence of vector self.embed_encoder = embed_encoder if not hasattr(embed_encoder, "hidden_dim"): embed_encoder_dim = yaml_context.default_layer_dim else: embed_encoder_dim = embed_encoder.hidden_dim # The Segment transducer produced word embeddings based on sequence of character embeddings self.segment_composer = segment_composer # The final transducer self.final_transducer = final_transducer # Decision layer of segmentation self.segment_transform = linear.Linear(input_dim = embed_encoder_dim, output_dim = 3 if learn_delete else 2) # The baseline linear regression model self.baseline = linear.Linear(input_dim = embed_encoder_dim, output_dim = 1) # Flags self.use_baseline = use_baseline self.learn_segmentation = learn_segmentation self.learn_delete = learn_delete self.z_normalization = z_normalization self.debug = debug self.compose_char = compose_char self.print_sample = print_sample self.log_reward = log_reward # Fixed Parameters self.length_prior = length_prior self.segmentation_warmup = segmentation_warmup # Variable Parameters self.length_prior_alpha = length_prior_alpha self.lmbd = reinforce_scale self.eps = epsilon_greedy self.confidence_penalty = confidence_penalty # States of the object self.train = False
def __init__(self, input_dim=Ref("exp_global.default_layer_dim"), state_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer))): self.input_dim = input_dim self.state_dim = state_dim self.hidden_dim = hidden_dim param_collection = ParamManager.my_params(self) self.pW = param_collection.add_parameters((hidden_dim, input_dim), init=param_init.initializer( (hidden_dim, input_dim))) self.pV = param_collection.add_parameters((hidden_dim, state_dim), init=param_init.initializer( (hidden_dim, state_dim))) self.pb = param_collection.add_parameters((hidden_dim, ), init=bias_init.initializer( (hidden_dim, ))) self.pU = param_collection.add_parameters((1, hidden_dim), init=param_init.initializer( (1, hidden_dim))) self.curr_sent = None
def __init__( self, input_dim: int = Ref("exp_global.default_layer_dim"), trg_embed_dim: int = Ref("exp_global.default_layer_dim"), input_feeding: bool = True, bridge: Bridge = bare(CopyBridge), rnn: UniLSTMSeqTransducer = bare(UniLSTMSeqTransducer), transform: Transform = bare(AuxNonLinear), scorer: Scorer = bare(Softmax), truncate_dec_batches: bool = Ref("exp_global.truncate_dec_batches", default=False) ) -> None: self.param_col = ParamManager.my_params(self) self.input_dim = input_dim self.truncate_dec_batches = truncate_dec_batches self.bridge = bridge self.rnn = rnn self.transform = transform self.scorer = scorer # Input feeding self.input_feeding = input_feeding rnn_input_dim = trg_embed_dim if input_feeding: rnn_input_dim += input_dim assert rnn_input_dim == rnn.input_dim, "Wrong input dimension in RNN layer: {} != {}".format( rnn_input_dim, rnn.input_dim)
def __init__(self, child: SeqTransducer, input_dim: int, layer_norm: bool = False): self.child = child self.input_dim = input_dim self.layer_norm = layer_norm if layer_norm: model = ParamManager.my_params(self) self.ln_g = model.add_parameters(dim=(input_dim,)) self.ln_b = model.add_parameters(dim=(input_dim,))
def __init__(self, filter_height, filter_width, channels, num_filters, stride, rhn_num_hidden_layers, rhn_dim, rhn_microsteps, attention_dim, residual=False): self.filter_height = filter_height self.filter_width = filter_width self.channels = channels self.num_filters = num_filters self.stride = stride self.rhn_num_hidden_layers = rhn_num_hidden_layers self.rhn_dim = rhn_dim self.rhn_microsteps = rhn_microsteps self.attention_dim = attention_dim self.residual = residual model = ParamManager.my_params(self) # Convolutional layer self.filter_conv = model.add_parameters(dim=(self.filter_height, self.filter_width, self.channels, self.num_filters)) # Recurrent highway layer self.recur = [] self.linear = [] self.init = [] self.attention = [] input_dim = num_filters for _ in range(rhn_num_hidden_layers): self.init.append(model.add_parameters((rhn_dim, ))) self.linear.append((model.add_parameters((rhn_dim, input_dim)), model.add_parameters(( rhn_dim, input_dim, )))) input_dim = rhn_dim recur_layer = [] for _ in range(self.rhn_microsteps): recur_layer.append((model.add_parameters( (rhn_dim, rhn_dim)), model.add_parameters( (rhn_dim, )), model.add_parameters(( rhn_dim, rhn_dim, )), model.add_parameters((rhn_dim, )))) self.recur.append(recur_layer) # Attention layer self.attention.append((model.add_parameters( (attention_dim, rhn_dim)), model.add_parameters(attention_dim, )))
def __init__(self, input_dim=Ref("exp_global.default_layer_dim"), state_dim=Ref("exp_global.default_layer_dim"), param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer))): self.input_dim = input_dim self.state_dim = state_dim param_collection = ParamManager.my_params(self) self.pWa = param_collection.add_parameters((input_dim, state_dim), init=param_init.initializer( (input_dim, state_dim))) self.curr_sent = None
def __init__(self, max_pos: int, emb_dim: int = Ref("exp_global.default_layer_dim"), param_init: ParamInitializer = Ref("exp_global.param_init", default=bare(GlorotInitializer))): """ max_pos: largest embedded position emb_dim: embedding size param_init: how to initialize embedding matrix """ self.max_pos = max_pos self.emb_dim = emb_dim param_collection = ParamManager.my_params(self) param_init = param_init dim = (self.emb_dim, max_pos) self.embeddings = param_collection.add_parameters(dim, init=param_init.initializer(dim, is_lookup=True))
def __init__(self, input_dim, window_receptor, output_dim, num_layers, internal_dim, non_linearity='linear'): """ Args: num_layers: num layers after first receptor conv input_dim: size of the inputs window_receptor: window for the receptor ouput_dim: size of the outputs internal_dim: size of hidden dimension, internal dimension non_linearity: Non linearity to apply between layers """ model = ParamManager.my_params(self) self.input_dim = input_dim self.window_receptor = window_receptor self.internal_dim = internal_dim self.non_linearity = non_linearity self.output_dim = output_dim if self.non_linearity == 'linear': self.gain = 1.0 elif self.non_linearity == 'tanh': self.gain = 1.0 elif self.non_linearity == 'relu': self.gain = 0.5 elif self.non_linearity == 'sigmoid': self.gain = 4.0 normalInit = dy.NormalInitializer(0, 0.1) self.pConv1 = model.add_parameters(dim=(self.input_dim, self.window_receptor, 1, self.internal_dim), init=normalInit) self.pBias1 = model.add_parameters(dim=(self.internal_dim, )) self.builder_layers = [] for _ in range(num_layers): conv = model.add_parameters(dim=(self.internal_dim, 1, 1, self.internal_dim), init=normalInit) bias = model.add_parameters(dim=(self.internal_dim, )) self.builder_layers.append((conv, bias)) self.last_conv = model.add_parameters(dim=(self.internal_dim, 1, 1, self.output_dim), init=normalInit) self.last_bias = model.add_parameters(dim=(self.output_dim, ))
def __init__(self, dec_layers = 1, enc_dim = Ref("exp_global.default_layer_dim"), dec_dim = Ref("exp_global.default_layer_dim"), param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer))): param_col = ParamManager.my_params(self) self.dec_layers = dec_layers self.enc_dim = enc_dim self.dec_dim = dec_dim self.projector = xnmt.linear.Linear(input_dim=enc_dim, output_dim=dec_dim, param_init=param_init, bias_init=bias_init)
def __init__(self, input_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), output_dim=Ref("exp_global.default_layer_dim", default=None), param_init_hidden=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init_hidden=Ref("exp_global.bias_init", default=bare(ZeroInitializer)), param_init_output=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init_output=Ref("exp_global.bias_init", default=bare(ZeroInitializer)), activation='tanh', hidden_layer=None, output_projector=None, yaml_path=None, vocab_size=None, vocab=None, trg_reader=Ref("model.trg_reader", default=None), decoder_rnn_dim=Ref("exp_global.default_layer_dim", default=None)): model = ParamManager.my_params(self) self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = output_dim if yaml_path is not None and "decoder" in yaml_path: self.input_dim += decoder_rnn_dim self.output_dim = self.choose_vocab_size(vocab_size, vocab, trg_reader) self.save_processed_arg("vocab_size", self.output_dim) self.hidden_layer = self.add_serializable_component("hidden_layer", hidden_layer, lambda: xnmt.linear.Linear(input_dim=self.input_dim, output_dim=self.hidden_dim, param_init=param_init_hidden, bias_init=bias_init_hidden)) if activation == 'tanh': self.activation = dy.tanh elif activation == 'relu': self.activation = dy.rectify elif activation == 'sigmoid': self.activation = dy.sigmoid elif activation == 'elu': self.activation = dy.elu elif activation == 'selu': self.activation = dy.selu elif activation == 'asinh': self.activation = dy.asinh else: raise ValueError('Unknown activation %s' % activation) self.output_projector = self.add_serializable_component("output_projector", output_projector, lambda: output_projector or xnmt.linear.Linear( input_dim=self.hidden_dim, output_dim=self.output_dim, param_init=param_init_output, bias_init=bias_init_output))
def __init__(self, in_height, out_height): """ Args: num_layers: depth of the RNN input_dim: size of the inputs hidden_dim: size of the outputs (and intermediate RNN layer representations) """ model = ParamManager.my_params(self) self.in_height = in_height self.out_height = out_height normalInit=dy.NormalInitializer(0, 0.1) self.pW = model.add_parameters(dim = (self.out_height, self.in_height), init=normalInit) self.pb = model.add_parameters(dim = self.out_height)
def __init__(self, ngram_size, param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.param_init", default=bare(ZeroInitializer)), embed_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim")): model = ParamManager.my_params(self) dim = (1, ngram_size, embed_dim, hidden_dim) self.filter = model.add_parameters(dim=dim, init=param_init.initializer(dim)) self.bias = model.add_parameters(dim=(embed_dim, ), init=bias_init.initializer(dim)) self.ngram_size = ngram_size self.embed_dim = embed_dim
def __init__(self, in_height, out_height, nonlinearity='linear'): """ Args: in_height: input dimension of the affine transform out_height: output dimension of the affine transform nonlinearity: nonlinear activation function """ model = ParamManager.my_params(self) self.in_height = in_height self.out_height = out_height self.nonlinearity = nonlinearity normalInit = dy.NormalInitializer(0, 0.1) self.pW = model.add_parameters(dim=(self.out_height, self.in_height), init=normalInit) self.pb = model.add_parameters(dim=self.out_height)
def __init__(self, dec_layers = 1, enc_dim = Ref("exp_global.default_layer_dim"), dec_dim = Ref("exp_global.default_layer_dim"), param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer)), projector=None): param_col = ParamManager.my_params(self) self.dec_layers = dec_layers self.enc_dim = enc_dim self.dec_dim = dec_dim self.projector = self.add_serializable_component("projector", projector, lambda: xnmt.linear.Linear(input_dim=self.enc_dim, output_dim=self.dec_dim, param_init=param_init, bias_init=bias_init))
def __init__(self, layers=1, input_dim=512, h=1, dropout=0.0, attn_dropout=False, layer_norm=False, **kwargs): dy_model = ParamManager.my_params(self) self.layer_names = [] for i in range(1, layers + 1): name = 'l{}'.format(i) layer = EncoderLayer(dy_model, input_dim, h, attn_dropout, layer_norm) self.layer_names.append((name, layer)) self.dropout_val = dropout
def __init__(self, word_vocab=None, src_vocab=Ref(Path("model.src_reader.vocab")), hidden_dim=Ref("exp_global.default_layer_dim"), vocab_size=25000): super().__init__() param_collection = ParamManager.my_params(self) if word_vocab is None: word_vocab = Vocab() dict_entry = vocab_size else: word_vocab.freeze() word_vocab.set_unk(word_vocab.UNK_STR) dict_entry = len(word_vocab) self.src_vocab = src_vocab self.word_vocab = word_vocab self.embedding = param_collection.add_lookup_parameters( (dict_entry, hidden_dim))
def __init__(self, vocab=None, vocab_size=1e6, count_file=None, min_count=1, embed_dim=Ref("exp_global.default_layer_dim")): assert vocab is not None self.vocab = vocab self.lookup = ParamManager.my_params(self).add_lookup_parameters((vocab_size, embed_dim)) self.frequent_words = None if count_file is not None: print("Reading count reference...") frequent_words = set() with open(count_file, "r") as fp: for line in fp: line = line.strip().split("\t") cnt = int(line[-1]) substr = "".join(line[0:-1]) if cnt >= min_count: frequent_words.add(substr) self.frequent_words = frequent_words
def __init__(self, input_dim, output_dim, bias=True, param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer))): self.bias = bias self.output_dim = output_dim model = ParamManager.my_params(self) self.W1 = model.add_parameters((output_dim, input_dim), init=param_init.initializer( (output_dim, input_dim))) if self.bias: self.b1 = model.add_parameters((output_dim, ), init=bias_init.initializer( (output_dim, )))
def __init__(self, input_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), output_dim=Ref("exp_global.default_layer_dim", default=None), param_init_hidden=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init_hidden=Ref("exp_global.bias_init", default=bare(ZeroInitializer)), param_init_output=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init_output=Ref("exp_global.bias_init", default=bare(ZeroInitializer)), hidden_layer=None, output_projector=None, yaml_path=None, vocab_size=None, vocab=None, trg_reader=Ref("model.trg_reader", default=None), decoder_rnn_dim=Ref("exp_global.default_layer_dim", default=None)): model = ParamManager.my_params(self) self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = output_dim if yaml_path is not None and "decoder" in yaml_path: self.input_dim += decoder_rnn_dim self.output_dim = self.choose_vocab_size(vocab_size, vocab, trg_reader) self.save_processed_arg("vocab_size", self.output_dim) self.hidden_layer = self.add_serializable_component( "hidden_layer", hidden_layer, lambda: xnmt.linear.Linear(input_dim=self.input_dim, output_dim=self.hidden_dim, param_init=param_init_hidden, bias_init=bias_init_hidden)) self.output_projector = self.add_serializable_component( "output_projector", output_projector, lambda: output_projector or xnmt.linear.Linear(input_dim=self.hidden_dim, output_dim=self.output_dim, param_init=param_init_output, bias_init=bias_init_output))
def __init__( self, input_dim: int = Ref("exp_global.default_layer_dim"), state_dim: int = Ref("exp_global.default_layer_dim"), param_init: ParamInitializer = Ref("exp_global.param_init", default=bare(GlorotInitializer)), truncate_dec_batches: bool = Ref("exp_global.truncate_dec_batches", default=False) ) -> None: if truncate_dec_batches: raise NotImplementedError( "truncate_dec_batches not yet implemented for BilinearAttender" ) self.input_dim = input_dim self.state_dim = state_dim param_collection = ParamManager.my_params(self) self.pWa = param_collection.add_parameters((input_dim, state_dim), init=param_init.initializer( (input_dim, state_dim))) self.curr_sent = None
def __init__(self, input_dim: int = Ref("exp_global.default_layer_dim"), output_dim: int = Ref("exp_global.default_layer_dim"), bias: bool = True, activation: str = 'tanh', param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer))): self.bias = bias self.output_dim = output_dim self.input_dim = input_dim if activation == 'tanh': self.activation = dy.tanh elif activation == 'relu': self.activation = dy.rectify elif activation == 'sigmoid': self.activation = dy.sigmoid elif activation == 'elu': self.activation = dy.elu elif activation == 'selu': self.activation = dy.selu elif activation == 'asinh': self.activation = dy.asinh elif activation == 'identity': def identity(x): return x self.activation = identity else: raise ValueError('Unknown activation %s' % activation) model = ParamManager.my_params(self) self.W1 = model.add_parameters((self.output_dim, self.input_dim), init=param_init.initializer( (self.output_dim, self.input_dim))) if self.bias: self.b1 = model.add_parameters((self.output_dim, ), init=bias_init.initializer( (self.output_dim, )))
def __init__(self, layers=1, input_dim=512, h=1, dropout=0.0, attn_dropout=False, layer_norm=False, vocab_size=None, vocab=None, trg_reader=Ref("model.trg_reader")): dy_model = ParamManager.my_params(self) self.layer_names = [] for i in range(1, layers + 1): name = 'l{}'.format(i) layer = DecoderLayer(dy_model, input_dim, h, attn_dropout, layer_norm) self.layer_names.append((name, layer)) self.vocab_size = self.choose_vocab_size(vocab_size, vocab, trg_reader) self.output_affine = LinearSent(dy_model, input_dim, self.vocab_size) self.dropout_val = dropout