def __init__(self, pc, dim_asp, dim_opi): self.pc = pc.add_subcollection() self.dim_asp = dim_asp self.dim_opi = dim_opi self._W_A = self.pc.add_parameters((2*self.dim_opi, 2*self.dim_asp), init=dy.UniformInitializer(0.2)) self._W_O = self.pc.add_parameters((2*self.dim_opi, 2*self.dim_opi), init=dy.UniformInitializer(0.2)) self._b = self.pc.add_parameters((2 * self.dim_opi,), init=dy.ConstInitializer(0.0))
def __init__(self, pc, n_in, n_out, dropout_rate): self.n_in = n_in self.n_out = n_out self.dropout_rate = dropout_rate self.pc = pc.add_subcollection() self._v = self.pc.add_parameters((self.n_out,), init=dy.UniformInitializer(0.2)) self._W1 = self.pc.add_parameters((self.n_out, self.n_out), init=dy.UniformInitializer(0.2)) self._W2 = self.pc.add_parameters((self.n_out, self.n_out), init=dy.UniformInitializer(0.2)) self._bd = self.pc.add_parameters((self.n_out), init=dy.ConstInitializer(0.0))
def __init__(self, pc, n_steps, n_in): """ :param n_steps: number of steps in truncated self-attention :param n_in: """ self.pc = pc.add_subcollection() self.n_steps = n_steps self.n_in = n_in self._v = self.pc.add_parameters((self.n_in,), init=dy.UniformInitializer(0.2)) self._W1 = self.pc.add_parameters((self.n_in, self.n_in), init=dy.UniformInitializer(0.2)) self._W2 = self.pc.add_parameters((self.n_in, self.n_in), init=dy.UniformInitializer(0.2)) self._W3 = self.pc.add_parameters((self.n_in, self.n_in), init=dy.UniformInitializer(0.2))
def __init__(self, pc, n_in, n_out, dropout_rate): self.n_in = n_in self.n_out = n_out self.dropout_rate = dropout_rate self.pc = pc.add_subcollection() self._WC = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) self._WP = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) self._WR = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) self._UP = self.pc.add_parameters((self.n_out, self.n_out), init=dy.UniformInitializer(0.2)) self._UR = self.pc.add_parameters((self.n_out, self.n_out), init=dy.UniformInitializer(0.2)) self._bc = self.pc.add_parameters((self.n_out), init=dy.ConstInitializer(0.0)) self._bp = self.pc.add_parameters((self.n_out), init=dy.ConstInitializer(0.0)) self._br = self.pc.add_parameters((self.n_out), init=dy.ConstInitializer(0.0))
def __init__(self): self.model = dy.Model() # Embeds the five states at each square: empty, blocked, occupied by agent, # goal, and * (occupied by both agent and goal). self.emb_env_mat = self.model.add_lookup_parameters((5, BLOCK_EMB_SIZE)) self.num_spots = env.WORLD_SIZE * env.WORLD_SIZE tot_size = BLOCK_EMB_SIZE * self.num_spots self.l1_weights = self.model.add_parameters((tot_size, int(tot_size / 2)), initializer = dy.UniformInitializer(0.1)) self.l1_biases = self.model.add_parameters((int(tot_size / 2))), initializer = dy.UniformInitializer(0.1)
def initializer(self, dim, is_lookup=False, num_shared=1): if is_lookup: fan_in = dim[0] else: fan_in = dim[-1] s = self.scale * np.sqrt(3. / fan_in) return dy.UniformInitializer(s)
def __init__(self, nl, di, dh, du, vs, pc, dr=0.0, pre_embs=None): super(BiUserLSTMEncoder, self).__init__(nl, di, dh, du, vs, pc, dr, pre_embs) self.dim += dh # Backward encoder self.rev_lstm = dy.VanillaLSTMBuilder(self.nl, self.di, self.dh, self.pc) self.rev_Th_p = self.pc.add_parameters((dh, du), init=dy.UniformInitializer(1/np.sqrt(dh)), name='revTh')
def LeCunUniform(fan_in, scale=1.0): """ Reference: LeCun 98, Efficient Backprop http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf """ s = scale * np.sqrt(3. / fan_in) return dy.UniformInitializer(s)
def Convolution1d(fsz, cmotsz, dsz, pc, strides=(1, 1, 1, 1), name="conv"): """1D Convolution. :param fsz: int, Size of conv filter. :param cmotsz: int, Size of conv output. :param dsz: int, Size of the input. :param pc: dy.ParameterCollection :param strides: Tuple[int, int, int, int] """ conv_pc = pc.add_subcollection(name=name) fan_in = dsz * fsz fan_out = cmotsz * fsz # Pytorch and Dynet have a gain param that has suggested values based on # the nonlinearity type, this defaults to the one for relu atm. glorot_bounds = 0.5 * np.sqrt(6 / (fan_in + fan_out)) weight = conv_pc.add_parameters((1, fsz, dsz, cmotsz), init=dy.UniformInitializer(glorot_bounds), name='weight') bias = conv_pc.add_parameters((cmotsz), name="bias") def conv(input_): """Perform the 1D conv. :param input: dy.Expression ((1, T, dsz), B) Returns: dy.Expression ((cmotsz,), B) """ c = dy.conv2d_bias(input_, weight, bias, strides, is_valid=False) activation = dy.rectify(c) mot = dy.reshape(dy.max_dim(activation, 1), (cmotsz, )) return mot return conv
def evaluate(self, inputs, train=False): """ Apply all MLP layers to concatenated input :param inputs: (key, vector) per feature type :param train: are we training now? :return: output vector of size self.output_dim """ input_keys, inputs = list(map(list, zip(*list(inputs)))) if self.input_keys: assert input_keys == self.input_keys, "Got: %s\nBut expected input keys: %s" % ( self.input_keys_str( self.input_keys), self.input_keys_str(input_keys)) else: self.input_keys = input_keys if self.gated: gates = self.params.get("gates") if gates is None: # FIXME attention weights should not be just parameters, but based on biaffine product? gates = self.params["gates"] = self.model.add_parameters( (len(inputs), self.gated), init=dy.UniformInitializer(1)) input_dims = [i.dim()[0][0] for i in inputs] max_dim = max(input_dims) x = dy.concatenate_cols([ dy.concatenate([i, dy.zeroes(max_dim - d) ]) # Pad with zeros to get uniform dim if d < max_dim else i for i, d in zip(inputs, input_dims) ]) * gates # Possibly multiple "attention heads" -- concatenate outputs to one vector inputs = [dy.reshape(x, (x.dim()[0][0] * x.dim()[0][1], ))] x = dy.concatenate(inputs) assert len( x.dim() [0]) == 1, "Input should be a vector, but has dimension " + str( x.dim()[0]) dim = x.dim()[0][0] if self.input_dim: assert dim == self.input_dim, "Input dim mismatch: %d != %d" % ( dim, self.input_dim) else: self.init_params(dim) self.config.print(self, level=4) if self.total_layers: if self.weights is None: self.weights = [[ self.params[prefix + str(i)] for prefix in ("W", "b") ] for i in range(self.total_layers)] if self.weights[0][0].dim( )[0][1] < dim: # number of columns in W0 self.weights[0][0] = dy.concatenate_cols( [self.weights[0][0], self.params["W0+"]]) for i, (W, b) in enumerate(self.weights): self.config.print(lambda: x.npvalue().tolist(), level=4) try: if train and self.dropout: x = dy.dropout(x, self.dropout) x = self.activation()(W * x + b) except ValueError as e: raise ValueError("Error in evaluating layer %d of %d" % (i + 1, self.total_layers)) from e self.config.print(lambda: x.npvalue().tolist(), level=4) return x
def __init__(self, pc, n_in, n_out, dropout_rate): """ LSTM constructor :param pc: parameter collection :param n_in: :param n_out: :param dropout_rate: dropout rate """ self.n_in = n_in self.n_out = n_out self.dropout_rate = dropout_rate self.pc = pc.add_subcollection() self._W = self.pc.add_parameters((4 * self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) self._U = self.pc.add_parameters((4 * self.n_out, self.n_out), init=dy.UniformInitializer(0.2)) self._b = self.pc.add_parameters((4 * self.n_out), init=dy.ConstInitializer(0.0))
def __init__(self, pc, n_in, n_out, use_bias=False): self.pc = pc.add_subcollection() self.n_in = n_in self.n_out = n_out self.use_bias = use_bias self._W = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) if self.use_bias: self._b = self.pc.add_parameters((self.n_out,), init=dy.ConstInitializer(0.0))
def __init__(self, input_dims, output_dims, model): self.input_dims = input_dims self.output_dims = output_dims self.model = model self.W_i = model.add_parameters( (output_dims, input_dims + output_dims), init=dynet.UniformInitializer(0.01), ) self.b_i = model.add_parameters( (output_dims, ), init=dynet.ConstInitializer(0), ) self.W_f = model.add_parameters( (output_dims, input_dims + output_dims), init=dynet.UniformInitializer(0.01), ) self.b_f = model.add_parameters( (output_dims, ), init=dynet.ConstInitializer(0), ) self.W_c = model.add_parameters( (output_dims, input_dims + output_dims), init=dynet.UniformInitializer(0.01), ) self.b_c = model.add_parameters( (output_dims, ), init=dynet.ConstInitializer(0), ) self.W_o = model.add_parameters( (output_dims, input_dims + output_dims), init=dynet.UniformInitializer(0.01), ) self.b_o = model.add_parameters( (output_dims, ), init=dynet.ConstInitializer(0), ) self.c0 = model.add_parameters( (output_dims, ), init=dynet.ConstInitializer(0), ) self.W_params = [self.W_i, self.W_f, self.W_c, self.W_o] self.b_params = [self.b_i, self.b_f, self.b_c, self.b_o] self.params = self.W_params + self.b_params + [self.c0]
def _create_model(self): self.logger.info('Creating the model...') model = dy.ParameterCollection() # context gru encoders c_fwdRnn = dy.GRUBuilder(self.model_args["gru_layers"], self.model_args["gru_input_dim"], self.model_args["gru_hidden_dim"], model) c_bwdRnn = dy.GRUBuilder(self.model_args["gru_layers"], self.model_args["gru_input_dim"], self.model_args["gru_hidden_dim"], model) # question gru encoders q_fwdRnn = dy.GRUBuilder(self.model_args["gru_layers"], self.model_args["gru_input_dim"], self.model_args["gru_hidden_dim"], model) q_bwdRnn = dy.GRUBuilder(self.model_args["gru_layers"], self.model_args["gru_input_dim"], self.model_args["gru_hidden_dim"], model) # embedding parameter lookup_params = model.add_lookup_parameters((self.model_args["vocab_size"], self.model_args["gru_input_dim"]), dy.UniformInitializer(self.model_args["lookup_init_scale"])) unk_lookup_params = model.add_lookup_parameters((self.model_args["number_of_unks"], self.model_args["gru_input_dim"]), dy.UniformInitializer(self.model_args["lookup_init_scale"])) self.logger.info('Done creating the model') model_parameters = {"c_fwdRnn": c_fwdRnn, "c_bwdRnn": c_bwdRnn, "q_fwdRnn": q_fwdRnn, "q_bwdRnn": q_bwdRnn, "lookup_params": lookup_params, "unk_lookup_params": unk_lookup_params} return model, model_parameters
def initializer(self, dim, is_lookup: bool = False, num_shared: numbers.Integral = 1) -> dy.UniformInitializer: if is_lookup: fan_in = dim[0] else: fan_in = dim[-1] s = self.scale * np.sqrt(3. / fan_in) return dy.UniformInitializer(s)
def __init__(self, model: dy.ParameterCollection, in_dim: int, out_dim: int, init: dy.PyInitializer = None, bias: bool = True): pc = model.add_subcollection() if not init: init = dy.UniformInitializer(math.sqrt(in_dim)) self.W = pc.add_parameters((out_dim, in_dim), init=init) if bias: self.b = pc.add_parameters((out_dim, ), init=init) self.pc = pc self.bias = bias
def __init__( self, bigrams_size, unigrams_size, bigrams_dims, unigrams_dims, lstm_units, hidden_units, label_size, span_nums, droprate=0, ): self.bigrams_size = bigrams_size self.bigrams_dims = bigrams_dims self.unigrams_dims = unigrams_dims self.unigrams_size = unigrams_size self.lstm_units = lstm_units self.hidden_units = hidden_units self.span_nums = span_nums self.droprate = droprate self.label_size = label_size self.model = dynet.Model() self.trainer = dynet.AdadeltaTrainer(self.model, eps=1e-7, rho=0.99) random.seed(1) self.activation = dynet.rectify self.bigram_embed = self.model.add_lookup_parameters( (self.bigrams_size, self.bigrams_dims), ) self.unigram_embed = self.model.add_lookup_parameters( (self.unigrams_size, self.unigrams_dims), ) self.fwd_lstm1 = LSTM(self.bigrams_dims + self.unigrams_dims, self.lstm_units, self.model) self.back_lstm1 = LSTM(self.bigrams_dims + self.unigrams_dims, self.lstm_units, self.model) self.fwd_lstm2 = LSTM(2 * self.lstm_units, self.lstm_units, self.model) self.back_lstm2 = LSTM(2 * self.lstm_units, self.lstm_units, self.model) self.p_hidden_W = self.model.add_parameters( (self.hidden_units, 2 * self.span_nums * self.lstm_units), dynet.UniformInitializer(0.01)) self.p_hidden_b = self.model.add_parameters((self.hidden_units, ), dynet.ConstInitializer(0)) self.p_output_W = self.model.add_parameters( (self.label_size, self.hidden_units), dynet.ConstInitializer(0)) self.p_output_b = self.model.add_parameters((self.label_size, ), dynet.ConstInitializer(0))
def __init__(self, pc, n_in, n_out, n_steps, dropout_rate): self.pc = pc.add_subcollection() self.n_in = n_in self.n_out = n_out self.n_steps = n_steps self.dropout_rate = dropout_rate # parameters for recurrent step self._W_xr = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) self._W_hr = self.pc.add_parameters((self.n_out, self.n_out), init=dy.UniformInitializer(0.2)) self._br = self.pc.add_parameters((self.n_out,), init=dy.ConstInitializer(0.0)) self._W_xz = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) self._W_hz = self.pc.add_parameters((self.n_out, self.n_out), init=dy.UniformInitializer(0.2)) self._bz = self.pc.add_parameters((self.n_out,), init=dy.ConstInitializer(0.0)) self._W_xh = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) self._W_hh = self.pc.add_parameters((self.n_out, self.n_out), init=dy.UniformInitializer(0.2)) self._bh = self.pc.add_parameters((self.n_out,), init=dy.ConstInitializer(0.0)) # for attention modeling attention_scale = 1.0 / math.sqrt(1.0) # actually the value is 0.0 self._u = self.pc.add_parameters((self.n_out,), init=dy.UniformInitializer(attention_scale)) self._W_h = self.pc.add_parameters((self.n_out, self.n_out), init=dy.UniformInitializer(0.2)) self._W_x = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) self._W_htilde = self.pc.add_parameters((self.n_out, self.n_out), init=dy.UniformInitializer(0.2))
def __init__(self, model, embedding_size, name="", initializer=dy.UniformInitializer(0.1), vocabulary=None, num_tokens=-1, anonymizer=None): if vocabulary: assert num_tokens < 0, "Specified a vocabulary but also set number of tokens to " + \ str(num_tokens) self.in_vocabulary = lambda token: token in vocabulary.tokens self.vocab_token_lookup = lambda token: vocabulary.token_to_id(token) self.unknown_token_id = vocabulary.token_to_id( vocabulary_handler.UNK_TOK) self.vocabulary_size = len(vocabulary) else: def check_vocab(index): """ Makes sure the index is in the vocabulary.""" assert index < num_tokens, "Passed token ID " + \ str(index) + "; expecting something less than " + str(num_tokens) return index < num_tokens self.in_vocabulary = check_vocab self.vocab_token_lookup = lambda x: x self.unknown_token_id = num_tokens # Deliberately throws an error here, # But should crash before this self.vocabulary_size = num_tokens self.anonymizer = anonymizer emb_name = name + "-tokens" print("Creating token embedder called " + emb_name + " of size " + str(self.vocabulary_size) + " x " + str(embedding_size)) self.token_embedding_matrix = model.add_lookup_parameters( (self.vocabulary_size, embedding_size), init=initializer, name=emb_name) if self.anonymizer: emb_name = name + "-entities" entity_size = len(self.anonymizer.entity_types) print( "Creating entity embedder called " + emb_name + " of size " + str(entity_size) + " x " + str(embedding_size)) self.entity_embedding_matrix = model.add_lookup_parameters( (entity_size, embedding_size), init=initializer, name=emb_name)
def __init__(self, pc, n_chars, dim_char, pretrained_embeddings=None): """ :param pc: parameter collection :param n_chars: number of distinct characters :param dim_char: dimension of character embedding """ self.pc = pc.add_subcollection() self.n_chars = n_chars self.dim_char = dim_char # network parameters #self.W = self.pc.add_lookup_parameters((self.n_chars, self.dim_char), # init='uniform', scale=np.sqrt(3.0 / self.dim_char)) self.W = self.pc.add_lookup_parameters((self.n_chars, self.dim_char), init=dy.UniformInitializer(np.sqrt(3.0 / self.dim_char))) if pretrained_embeddings is not None: print("Use pre-trained character embeddings...") self.W.init_from_array(pretrained_embeddings)
def initializer(self, dim, is_lookup=False, num_shared=1): """ Args: dim (tuple): dimensions of parameter tensor is_lookup (bool): Whether the parameter is a lookup parameter num_shared (int): If > 1, treat the first dimension as spanning multiple matrices, each of which is initialized individually Returns: a dynet initializer object """ gain = getattr(self, "gain", 1.0) if num_shared == 1: return dy.GlorotInitializer(gain=gain, is_lookup=is_lookup) else: per_param_dims = list(dim) assert per_param_dims[0] % num_shared == 0 per_param_dims[0] //= num_shared if is_lookup: per_param_dims = per_param_dims[:-1] scale = gain * math.sqrt(3.0 * len(per_param_dims)) / math.sqrt( sum(per_param_dims)) return dy.UniformInitializer(scale=scale)
def __init__(self, pc, n_in, n_out, use_bias=False, nonlinear=None): """ :param pc: parameter collection to hold the parameters :param n_in: input dimension :param n_out: output dimension :param use_bias: if add bias or not, default NOT :param nonlinear: non-linear activation function """ # create a sub-collection of the current parameters collection and returns it # the returned sub-collection is simply a ParameterCollection object tied to a parent collection self.pc = pc.add_subcollection() self.n_in = n_in self.n_out = n_out self.use_bias = use_bias self.nonlinear = nonlinear # add a parameter to the ParameterCollection with a given initializer self._W = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) if self.use_bias: self._b = self.pc.add_parameters((self.n_out,), init=dy.ConstInitializer(0.0))
def __init__( self, model, char_vocab, embed_size=30, window_size=3, filter_size=30, dropout=0.33, ): self.vocab = char_vocab self.model = model self.char_embeds = self.model.add_lookup_parameters( (len(char_vocab), 1, 1, embed_size), init=dy.UniformInitializer(np.sqrt(3.0 / embed_size)), ) self.filter_size = filter_size self.W_cnn = self.model.add_parameters( (1, window_size, embed_size, filter_size)) self.b_cnn = self.model.add_parameters((filter_size)) self.b_cnn.zero() self.dropout = dropout
def Convolution1d(fsz, cmotsz, dsz, pc, strides=(1, 1, 1, 1), name="conv"): """1D Convolution. :param fsz: int, Size of conv filter. :param cmotsz: int, Size of conv output. :param dsz: int, Size of the input. :param pc: dy.ParameterCollection :param strides: Tuple[int, int, int, int] """ conv_pc = pc.add_subcollection(name=name) fan_in = dsz * fsz fan_out = cmotsz * fsz # Pytorch and Dynet have a gain param that has suggested values based on # the nonlinearity type, this defaults to the one for relu atm. glorot_bounds = 0.5 * np.sqrt(6.0 / (fan_in + fan_out)) weight = conv_pc.add_parameters( (1, fsz, dsz, cmotsz), init=dy.UniformInitializer(glorot_bounds), name='weight' ) bias = conv_pc.add_parameters((cmotsz), name="bias") def conv(input_): """Perform the 1D conv. :param input: dy.Expression ((1, T, dsz), B) Returns: dy.Expression ((cmotsz,), B) """ c = dy.conv2d_bias(input_, weight, bias, strides, is_valid=False) activation = dy.rectify(c) # dy.max_dim(x, d=0) is currently slow (see https://github.com/clab/dynet/issues/1011) # So we do the max using max pooling instead. ((_, seq_len, _), _) = activation.dim() pooled = dy.maxpooling2d(activation, [1, seq_len, 1], strides) mot = dy.reshape(pooled, (cmotsz,)) return mot return conv
def add_params(model, size, name=""): """ Adds parameters to the model. Inputs: model (dy.ParameterCollection): The parameter collection for the model. size (tuple of int): The size to create. name (str, optional): The name of the parameters. """ if len(size) == 1: print("vector " + name + ": " + str(size[0]) + "; uniform in [-0.1, 0.1]") else: print("matrix " + name + ": " + str(size[0]) + " x " + str(size[1]) + "; uniform in [-0.1, 0.1]") return model.add_parameters(size, init=dy.UniformInitializer(0.1), name=name)
def __init__(self, pc, n_in, n_out, n_steps, dropout_rate): self.pc = pc.add_subcollection() self.n_in = n_in self.n_out = n_out self.dropout_rate = dropout_rate # steps in truncated attention self.n_steps = n_steps self._W = self.pc.add_parameters((4*self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) self._U = self.pc.add_parameters((4*self.n_out, self.n_out), init=dy.UniformInitializer(0.2)) self._b = self.pc.add_parameters((4*self.n_out), init=dy.ConstInitializer(0.0)) attention_scale = 1.0 / math.sqrt(1.0) # actually the value is 0.0 self._u = self.pc.add_parameters((self.n_out,), init=dy.UniformInitializer(attention_scale)) self._W_h = self.pc.add_parameters((self.n_out, self.n_out), init=dy.UniformInitializer(0.2)) self._W_x = self.pc.add_parameters((self.n_out, self.n_in), init=dy.UniformInitializer(0.2)) self._W_htilde = self.pc.add_parameters((self.n_out, self.n_out), init=dy.UniformInitializer(0.2))
import dynet as dy import json import numpy import random import sys from tqdm import tqdm _initializer = dy.UniformInitializer(0.1) _zero_initializer = dy.ConstInitializer(0.0) class Ensembler: ''' Learns to choose from outputs of two independent systems with differing sources of information ''' def __init__(self, irregularity_model_file, vocab, args): self.vocab = vocab self.epochs = args['epochs'] if 'epochs' in args else 10 self.decay_rate = args['decay_rate'] if 'decay_rate' in args else .5 self.lr = args['lr'] if 'lr' in args else 0.005 self.batch_size = args['batch-size'] if 'batch-size' in args else 1 self.choices = args['num-choices'] if 'num-choices' in args else 2 self.model_file = irregularity_model_file self.pc = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.pc, alpha=self.lr) def define_params(self, observations): self.param_dict = {}
def __init__( self, word_count, tag_count, word_dims, tag_dims, lstm_units, hidden_units, struct_out, label_out, droprate=0, struct_spans=4, label_spans=3, ): self.word_count = word_count self.tag_count = tag_count self.word_dims = word_dims self.tag_dims = tag_dims self.lstm_units = lstm_units self.hidden_units = hidden_units self.struct_out = struct_out self.label_out = label_out self.droprate = droprate self.model = dynet.Model() self.trainer = dynet.AdadeltaTrainer(self.model, eps=1e-7, rho=0.99) random.seed(1) self.activation = dynet.rectify self.word_embed = self.model.add_lookup_parameters( (word_count, word_dims), ) self.tag_embed = self.model.add_lookup_parameters( (tag_count, tag_dims), ) self.fwd_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model) self.back_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model) self.fwd_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model) self.back_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model) self.struct_hidden_W = self.model.add_parameters( (hidden_units, 4 * struct_spans * lstm_units), dynet.UniformInitializer(0.01), ) self.struct_hidden_b = self.model.add_parameters( (hidden_units, ), dynet.ConstInitializer(0), ) self.struct_output_W = self.model.add_parameters( (struct_out, hidden_units), dynet.ConstInitializer(0), ) self.struct_output_b = self.model.add_parameters( (struct_out, ), dynet.ConstInitializer(0), ) self.label_hidden_W = self.model.add_parameters( (hidden_units, 4 * label_spans * lstm_units), dynet.UniformInitializer(0.01), ) self.label_hidden_b = self.model.add_parameters( (hidden_units, ), dynet.ConstInitializer(0), ) self.label_output_W = self.model.add_parameters( (label_out, hidden_units), dynet.ConstInitializer(0), ) self.label_output_b = self.model.add_parameters( (label_out, ), dynet.ConstInitializer(0), )
from tupa.features.feature_params import MISSING_VALUE TRAINERS = { "sgd": (dy.SimpleSGDTrainer, "e0"), "cyclic": (dy.CyclicalSGDTrainer, "e0_min"), "momentum": (dy.MomentumSGDTrainer, "e0"), "adagrad": (dy.AdagradTrainer, "e0"), "adadelta": (dy.AdadeltaTrainer, None), "rmsprop": (dy.RMSPropTrainer, "e0"), "adam": (partial(dy.AdamTrainer, beta_2=0.9), "alpha"), } INITIALIZERS = { "glorot_uniform": dy.GlorotInitializer(), "normal": dy.NormalInitializer(), "uniform": dy.UniformInitializer(1), "const": dy.ConstInitializer(0), } ACTIVATIONS = { "square": dy.square, "cube": dy.cube, "tanh": dy.tanh, "sigmoid": dy.logistic, "relu": dy.rectify, } class NeuralNetwork(Classifier): """ Neural network to be used by the parser for action classification. Uses dense features.
def initializer(self, dim, is_lookup: bool = False, num_shared: numbers.Integral = 1) -> dy.UniformInitializer: return dy.UniformInitializer(scale=self.scale)