def __init__(self, params, source_alphabet_size, embedding_size, hidden_units, stack_embedding_size): input_size = source_alphabet_size + 2 output_size = source_alphabet_size + 1 self.stack_embedding_size = stack_embedding_size self.input_embeddings = params.add_lookup_parameters( (input_size, embedding_size), name='input-embeddings') self.output_embeddings = params.add_lookup_parameters( (output_size, embedding_size), name='output-embeddings') self.controller = dy.CoupledLSTMBuilder( 1, embedding_size + stack_embedding_size, hidden_units, params) # Intentionally set the gain for the sigmoid layers low, since this # seems to work better gain = 0.5 self.pop_strength_layer = add_layer( params, hidden_units, 1, sigmoid, weights_initializer=dy.GlorotInitializer(False, gain=gain), # Initialize the pop bias to -1 to allow information to propagate # through the stack bias_initializer=dy.ConstInitializer(-1.0), name='pop-strength') self.push_strength_layer = add_layer( params, hidden_units, 1, sigmoid, weights_initializer=dy.GlorotInitializer(False, gain=gain), bias_initializer=dy.GlorotInitializer(False, gain=gain), name='push-strength') self.push_value_layer = add_layer( params, hidden_units, stack_embedding_size, tanh, name='push-value') self.output_layer = combine_layers([ add_layer(params, hidden_units, hidden_units, tanh, name='output'), # This adds an extra affine layer between the tanh and the softmax add_layer(params, hidden_units, output_size, linear, name='softmax') ])
def add_layer(params, input_size, output_size, activation_function, weights_initializer=None, bias_initializer=None, name=None): params = params.add_subcollection(name) if weights_initializer is None: weights_initializer = dynet.GlorotInitializer( False, activation_function.glorot_gain) if bias_initializer is None: bias_initializer = dynet.GlorotInitializer( False, activation_function.glorot_gain) return Layer( params.add_parameters((output_size, input_size), weights_initializer, 'weights'), params.add_parameters(output_size, bias_initializer, 'bias'), activation_function)
def __init__(self, model, input_dim, output_dim, act=dy.rectify, init_gain=math.sqrt(2.), ln=False): self.pc = model.add_subcollection() self.act = act self.ln = ln self.W = self.pc.add_parameters((output_dim, input_dim), init=dy.GlorotInitializer(gain=init_gain)) self.b = self.pc.add_parameters(output_dim, init=dy.ConstInitializer(0.)) if ln: self.g = self.pc.add_parameters(output_dim, init=dy.ConstInitializer(1.)) self.spec = (input_dim, output_dim, act, init_gain, ln)
def __init__(self, input_size, hidden_size = 200, output_size = 10): self.model = dynet.ParameterCollection() self.init = dynet.GlorotInitializer(gain=4.0) #### # add layers self.layers = [] # first layer self.layers.append(self.add_layer(input_size, hidden_size)) # output layer self.layers.append(self.add_layer(hidden_size, output_size)) # 4.0 for logistic self.trainer = dynet.SimpleSGDTrainer(m=self.model, learning_rate=.001)
def __init__(self, pc, vocab_size, dim_embedding, dim_params): self.params = [] for first_layer, sec_layer in zip(dim_params, dim_params[1:]): pW = pc.add_parameters((sec_layer, first_layer), init=dy.GlorotInitializer()) pb = pc.add_parameters(sec_layer) self.params.append(pW) # pW with Xavier initialization self.params.append(pb) # pb self.lookup = pc.add_lookup_parameters( (vocab_size, dim_embedding )) # embedding matrix. dimensions: vocab_size X vector_size
def __init__(self, model, n_char, char_dim, n_filter, win_sizes): pc = model.add_subcollection() self.clookup = pc.add_lookup_parameters((n_char, char_dim)) self.Ws = [pc.add_parameters((char_dim, size, 1, n_filter), init=dy.GlorotInitializer(gain=0.5)) for size in win_sizes] self.bs = [pc.add_parameters((n_filter), init=dy.ConstInitializer(0)) for _ in win_sizes] self.win_sizes = win_sizes self.pc = pc self.spec = (n_char, char_dim, n_filter, win_sizes)
def __init__(self, model: dy.ParameterCollection, in_dim: int, out_dim: int, bias: bool = True, init: dy.PyInitializer = dy.GlorotInitializer()): pc = model.add_subcollection() init = init_wrap(init, (out_dim, in_dim)) self.W = pc.add_parameters((out_dim, in_dim), init=init) if bias: self.b = pc.add_parameters((out_dim, ), init=0) self.pc = pc self.bias = bias self.spec = (in_dim, out_dim, bias, init)
def __init__(self, m, in_dim, hid_dim, out_dim, non_lin=dy.tanh, opt=dy.AdamTrainer, n_iter=10): self._pw1 = m.add_parameters((in_dim, hid_dim), init=dy.GlorotInitializer()) self._pw2 = m.add_parameters((hid_dim, hid_dim), init=dy.GlorotInitializer()) self._pw3 = m.add_parameters((hid_dim, out_dim), init=dy.GlorotInitializer()) self._pb1 = m.add_parameters((hid_dim,), init=dy.GlorotInitializer()) self._pb2 = m.add_parameters((hid_dim,), init=dy.GlorotInitializer()) self._pb3 = m.add_parameters((out_dim,), init=dy.GlorotInitializer()) self.non_lin = non_lin self.opt = opt(m) self.n_iter = n_iter
def __init__( self, model: dy.ParameterCollection, h_dim: int, d_dim: int, f=dy.tanh, init: dy.PyInitializer = dy.GlorotInitializer()): pc = model.add_subcollection() init_W = init_wrap(init, (h_dim, d_dim)) self.W = pc.add_parameters((h_dim, d_dim), init=init_W) init_B = init_wrap(init, (h_dim, h_dim)) self.B = pc.add_parameters((h_dim, h_dim), init=init_B) self.pc, self.f = pc, f self.spec = (h_dim, d_dim, f, init)
def __init__(self, model: dy.ParameterCollection, sizes: List[int], f: 'nonlinear' = dy.tanh, p: float = 0.0, bias: bool = True, init: dy.PyInitializer = dy.GlorotInitializer()): pc = model.add_subcollection() self.W = [ pc.add_parameters((x, y), init=init_wrap(init, (x, y))) for x, y in zip(sizes[1:], sizes[:-1]) ] if bias: self.b = [pc.add_parameters((y, ), init=0) for y in sizes[1:]] self.pc, self.f, self.p, self.bias = pc, f, p, bias self.spec = (sizes, f, p, bias, init)
def initializer(self, dim, is_lookup=False, num_shared=1): """ Args: dim (tuple): dimensions of parameter tensor is_lookup (bool): Whether the parameter is a lookup parameter num_shared (int): If > 1, treat the first dimension as spanning multiple matrices, each of which is initialized individually Returns: a dynet initializer object """ gain = getattr(self, "gain", 1.0) if num_shared == 1: return dy.GlorotInitializer(gain=gain, is_lookup=is_lookup) else: per_param_dims = list(dim) assert per_param_dims[0] % num_shared == 0 per_param_dims[0] //= num_shared if is_lookup: per_param_dims = per_param_dims[:-1] scale = gain * math.sqrt(3.0 * len(per_param_dims)) / math.sqrt( sum(per_param_dims)) return dy.UniformInitializer(scale=scale)
def __init__(self, model, input_dim, output_dim, init_gain=math.sqrt(2.)): self.pc = model.add_subcollection() self.W = self.pc.add_parameters((output_dim, input_dim), init=dy.GlorotInitializer(gain=init_gain)) self.spec = (input_dim, output_dim, init_gain)
from tupa.classifiers.classifier import ClassifierProperty from tupa.config import Config from tupa.features.feature_params import MISSING_VALUE TRAINERS = { "sgd": (dy.SimpleSGDTrainer, "e0"), "cyclic": (dy.CyclicalSGDTrainer, "e0_min"), "momentum": (dy.MomentumSGDTrainer, "e0"), "adagrad": (dy.AdagradTrainer, "e0"), "adadelta": (dy.AdadeltaTrainer, None), "rmsprop": (dy.RMSPropTrainer, "e0"), "adam": (partial(dy.AdamTrainer, beta_2=0.9), "alpha"), } INITIALIZERS = { "glorot_uniform": dy.GlorotInitializer(), "normal": dy.NormalInitializer(), "uniform": dy.UniformInitializer(1), "const": dy.ConstInitializer(0), } ACTIVATIONS = { "square": dy.square, "cube": dy.cube, "tanh": dy.tanh, "sigmoid": dy.logistic, "relu": dy.rectify, } class NeuralNetwork(Classifier):
vt = Vocab.from_corpus([tags]) nwords = vw.size() ntags = vt.size() # DyNet Starts model = dy.Model() trainer = dy.SimpleSGDTrainer(model) NUM_LAYERS = 1 embeddings, emb_dim = load_embeddings_file(embedding) # init model parameters and initialize them WORDS_LOOKUP = model.add_lookup_parameters((nwords, emb_dim), init=dy.GlorotInitializer()) init = 0 UNK_vec = np.random.rand(emb_dim) for word in vw.w2i.keys(): # for those words we have already in w2i, update vector, otherwise add to w2i (since we keep data as integers) if word in embeddings.keys(): #print("found ["+word+"] in w2i") WORDS_LOOKUP.init_row(vw.w2i[word], embeddings[word]) else: WORDS_LOOKUP.init_row(vw.w2i[word], UNK_vec) p_t1 = model.add_lookup_parameters((ntags, 30)) # MLP on top of biLSTM outputs 100 -> 32 -> ntags pH = model.add_parameters((75, 50 + 50))
# size of hidden layer hidden_size = 200 # #### paramater `initializer` # See http://dynet.readthedocs.io/en/latest/python_ref.html#parameters-initializers # # Next we need to "initialize" the parameter values. `GlorotInitializer` is a pretty standard approach *however* the `gain` parameter depends on the type of `activation` being used. # In[13]: ################ # HYPERPARAMETER ################ initializer = dy.GlorotInitializer(gain=4.0) # You'll notice that the objects are `_dynet.Parameters` and *not* `expressions` until you "wrap" them with `dy.parameter()` # In[14]: # W_1 (input x hidden) as a Parameters object pW_1 = feed_forward_model.add_parameters( (input_size, hidden_size), init=initializer ) type(pW_1), type(dy.parameter(pW_1))
import dynet """ various helper mappings """ ## DyNet adds init option to choose initializer: https://github.com/clab/dynet/blob/master/python/CHANGES.md INITIALIZER_MAP = { 'glorot': dynet.GlorotInitializer(), 'constant': dynet.ConstInitializer(0.01), 'uniform': dynet.UniformInitializer(0.1), 'normal': dynet.NormalInitializer(mean=0, var=1) } TRAINER_MAP = { "sgd": dynet.SimpleSGDTrainer, "adam": dynet.AdamTrainer, "adadelta": dynet.AdadeltaTrainer, "adagrad": dynet.AdagradTrainer, "momentum": dynet.MomentumSGDTrainer } ACTIVATION_MAP = {"tanh": dynet.tanh, "rectify": dynet.rectify} BUILDERS = { "lstm": dynet. LSTMBuilder, # is dynet.VanillaLSTMBuilder (cf. https://github.com/clab/dynet/issues/474) "lstmc": dynet.CoupledLSTMBuilder, "gru": dynet.GRUBuilder, "rnn": dynet.SimpleRNNBuilder }
from encoder import Encoder from decoder import Decoder from parser import Parser from baseline import * from language_model import LanguageModel from util import Reader import dynet as dy from misc import compute_eval_score, compute_perplexity import os initializers = { 'glorot': dy.GlorotInitializer(), 'constant': dy.ConstInitializer(0.01), 'uniform': dy.UniformInitializer(0.1), 'normal': dy.NormalInitializer(mean=0, var=1) } optimizers = { "sgd": dy.SimpleSGDTrainer, "adam": dy.AdamTrainer, "adadelta": dy.AdadeltaTrainer, "adagrad": dy.AdagradTrainer } class Session(object): def __init__(self, options): self.reader = Reader(options.data_dir, options.data_augment) self.options = options