def __init__(self, name, input_dim, hidden_dim, opts, update_fn, h0, inputs, context=None, context_dim=None): self.name_ = name self.update_fn = update_fn self.h0 = h0 self.inputs = inputs # input sequence self.context = context # additional context to add at each timestep of input # params for standard recurrent step self.Uh = util.sharedMatrix(hidden_dim, hidden_dim, 'Uh', orthogonal_init=True) self.Wh = util.sharedMatrix(hidden_dim, input_dim, 'Wh', orthogonal_init=True) self.bh = util.shared(util.zeros((hidden_dim, )), 'bh') # params for context; if applicable if self.context: self.Wch = util.sharedMatrix(hidden_dim, context_dim, 'Wch', orthogonal_init=True) # params for reset gate; initial bias to not reset self.Ur = util.sharedMatrix(hidden_dim, hidden_dim, 'Ur', orthogonal_init=True) self.Wr = util.sharedMatrix(hidden_dim, input_dim, 'Wr', orthogonal_init=True) self.br = util.shared(np.asarray([opts.gru_initial_bias] * hidden_dim), 'br') # params for carry gate; initial bias to never carry h_t_minus_1 self.Uz = util.sharedMatrix(hidden_dim, hidden_dim, 'Uz', orthogonal_init=True) self.Wz = util.sharedMatrix(hidden_dim, input_dim, 'Wz', orthogonal_init=True) self.bz = util.shared(np.asarray([opts.gru_initial_bias] * hidden_dim), 'bz')
def __init__(self, vocab_size, embedding_dim, initial_embeddings_file=None, train_embeddings=True): if not train_embeddings and initial_embeddings_file is None: print >> sys.stderr, "WARNING: not training embedding without initial embeddings" self.train_embeddings = train_embeddings if initial_embeddings_file: e = np.load(initial_embeddings_file) assert e.shape[ 0] == vocab_size, "vocab mismatch size? loaded=%s expected=%s" % ( e.shape[0], vocab_size) # TODO code could handle this but just not wanting --embedding-dim set # when using init embeddings assert e.shape[ 1] == embedding_dim, "dimensionality config error. loaded embeddings %s d but --embedding-dim set to %s d" % ( e.shape[1], embedding_dim) assert e.dtype == np.float32, "%s" % e.dtype self.shared_embeddings = util.shared(e, 'tied_embeddings') else: self.shared_embeddings = util.sharedMatrix(vocab_size, embedding_dim, 'tied_embeddings', orthogonal_init=True)
def __init__(self, inp, n_labels, n_hidden_previous, update_fn, training=None, keep_prob=None): if type(inp) == list: self.input = T.concatenate(inp) input_size = len(inp) * n_hidden_previous else: self.input = inp input_size = n_hidden_previous if training is not None: assert keep_prob is not None self.input = dropout(self.input, training, keep_prob) self.update_fn = update_fn # input -> hidden (sized somwhere between size of input & softmax) n_hidden = int(math.sqrt(input_size * n_labels)) print "concat sizing %s -> %s -> %s" % (input_size, n_hidden, n_labels) self.Wih = util.sharedMatrix(input_size, n_hidden, 'Wih') self.bh = util.shared(util.zeros((1, n_hidden)), 'bh') # hidden -> softmax self.Whs = util.sharedMatrix(n_hidden, n_labels, 'Whs') self.bs = util.shared(util.zeros((1, n_labels)), 'bs')
def __init__(self, name, input_dim, hidden_dim, opts, update_fn, h0, inputs, context=None, context_dim=None): self.name_ = name self.update_fn = update_fn self.h0 = h0 self.inputs = inputs # input sequence self.context = context # additional context to add at each timestep of input # hidden -> hidden self.Uh = util.sharedMatrix(hidden_dim, hidden_dim, 'Uh', orthogonal_init=True) # embedded input -> hidden self.Wh = util.sharedMatrix(hidden_dim, input_dim, 'Wh', orthogonal_init=True) # context -> hidden (if applicable) if self.context: self.Whc = util.sharedMatrix(hidden_dim, context_dim, 'Wch', orthogonal_init=True) # bias self.bh = util.shared(util.zeros((hidden_dim, )), 'bh')