def _init_nn(self): """Initialize neural network. """ self.intm_dim = max(MIN_DIM, self.ndim - (self.ndim - self.n_y) / 2) # indices of word embeddings self.W_INDICES_ARG1 = TT.ivector(name="W_INDICES_ARG1") self.W_INDICES_ARG2 = TT.ivector(name="W_INDICES_ARG2") # connective's index self.CONN_INDEX = TT.iscalar(name="CONN_INDEX") # initialize the matrix of word embeddings self.init_w_emb() # word embeddings of the arguments self.EMB_ARG1 = self.W_EMB[self.W_INDICES_ARG1] self.EMB_ARG2 = self.W_EMB[self.W_INDICES_ARG2] # connective's embedding self._init_conn_emb() self.EMB_CONN = self.CONN_EMB[self.CONN_INDEX] # perform matrix decomposition _, _, self.ARG1 = TT.nlinalg.svd(self.EMB_ARG1, full_matrices=True) _, _, self.ARG2 = TT.nlinalg.svd(self.EMB_ARG2, full_matrices=True) self.ARG_DIFF = self.ARG1 - self.ARG2 # map decomposed matrices to the intermediate level self.ARG_DIFF2I = theano.shared(value=HE_UNIFORM((self.ndim, 1)), name="ARG_DIFF2I") self.arg_diff_bias = theano.shared(value=HE_UNIFORM((1, self.ndim)), name="arg_diff_bias") self._params.extend([self.ARG_DIFF2I, self.arg_diff_bias]) self.ARGS = (TT.dot(self.ARG_DIFF, self.ARG_DIFF2I).T + self.arg_diff_bias).flatten() # define final units self.I = TT.concatenate((self.ARGS, self.EMB_CONN)) self.I2Y = theano.shared(value=HE_UNIFORM((self.n_y, self.ndim + self.intm_dim)), name="I2Y") self.y_bias = theano.shared(value=HE_UNIFORM((1, self.n_y)), name="y_bias") self._params.extend([self.I2Y, self.y_bias]) self.Y_pred = TT.nnet.softmax(TT.dot(self.I2Y, self.I).T + self.y_bias) # initialize cost and optimization functions self.Y_gold = TT.vector(name="Y_gold") self._cost = TT.sum((self.Y_pred - self.Y_gold) ** 2) self._dev_cost = TT.sum((self.Y_pred - self.Y_gold) ** 2) self._pred_class = TT.argmax(self.Y_pred) grads = TT.grad(self._cost, wrt=self._params) self._init_funcs(grads)
def _init_w_emb(self): """Initialize task-specific word embeddings. """ self.W_EMB = theano.shared( value=HE_UNIFORM((self.w_i, self.ndim)), name="W_EMB") self._params.append(self.W_EMB)
def _init_w_emb(self): """Initialize task-specific word embeddings. """ self.W_EMB = theano.shared(value=HE_UNIFORM((self.w_i, self.ndim)), name="W_EMB") self._params.append(self.W_EMB)
def _init_nn(self): """Initialize neural network. """ self.intm_dim = max(MIN_DIM, self.ndim - (self.ndim - self.n_y) / 2) # indices of word embeddings self.W_INDICES_ARG1 = TT.ivector(name="W_INDICES_ARG1") self.W_INDICES_ARG2 = TT.ivector(name="W_INDICES_ARG2") # connective's index self.CONN_INDEX = TT.iscalar(name="CONN_INDEX") # initialize the matrix of word embeddings self.init_w_emb() # word embeddings of the arguments self.EMB_ARG1 = self.W_EMB[self.W_INDICES_ARG1] self.EMB_ARG2 = self.W_EMB[self.W_INDICES_ARG2] # connective's embedding self._init_conn_emb() self.EMB_CONN = self.CONN_EMB[self.CONN_INDEX] # perform matrix decomposition _, _, self.ARG1 = TT.nlinalg.svd(self.EMB_ARG1, full_matrices=True) _, _, self.ARG2 = TT.nlinalg.svd(self.EMB_ARG2, full_matrices=True) self.ARG_DIFF = self.ARG1 - self.ARG2 # map decomposed matrices to the intermediate level self.ARG_DIFF2I = theano.shared(value=HE_UNIFORM((self.ndim, 1)), name="ARG_DIFF2I") self.arg_diff_bias = theano.shared(value=HE_UNIFORM((1, self.ndim)), name="arg_diff_bias") self._params.extend([self.ARG_DIFF2I, self.arg_diff_bias]) self.ARGS = (TT.dot(self.ARG_DIFF, self.ARG_DIFF2I).T + self.arg_diff_bias).flatten() # define final units self.I = TT.concatenate((self.ARGS, self.EMB_CONN)) self.I2Y = theano.shared(value=HE_UNIFORM( (self.n_y, self.ndim + self.intm_dim)), name="I2Y") self.y_bias = theano.shared(value=HE_UNIFORM((1, self.n_y)), name="y_bias") self._params.extend([self.I2Y, self.y_bias]) self.Y_pred = TT.nnet.softmax(TT.dot(self.I2Y, self.I).T + self.y_bias) # initialize cost and optimization functions self.Y_gold = TT.vector(name="Y_gold") self._cost = TT.sum((self.Y_pred - self.Y_gold)**2) self._dev_cost = TT.sum((self.Y_pred - self.Y_gold)**2) self._pred_class = TT.argmax(self.Y_pred) grads = TT.grad(self._cost, wrt=self._params) self._init_funcs(grads)
def _init_conn_emb(self): """Initialize task-specific connective embeddings. """ self.CONN_EMB = theano.shared(value=HE_UNIFORM( (self.c_i, self.intm_dim)), name="CONN_EMB") self._params.append(self.CONN_EMB)
def _init_conn_emb(self): """Initialize task-specific connective embeddings. """ self.CONN_EMB = theano.shared( value=HE_UNIFORM((self.c_i, self.intm_dim)), name="CONN_EMB") self._params.append(self.CONN_EMB)
def _init_nn(self): """Initialize neural network. """ self.intm_dim = max(100, self.ndim - (self.ndim - self.n_y) / 2) # indices of word embeddings self.W_INDICES_ARG1 = TT.ivector(name="W_INDICES_ARG1") self.W_INDICES_ARG2 = TT.ivector(name="W_INDICES_ARG2") # connective's index self.CONN_INDEX = TT.iscalar(name="CONN_INDEX") # initialize the matrix of word embeddings self.init_w_emb() # word embeddings of the arguments self.EMB_ARG1 = self.W_EMB[self.W_INDICES_ARG1] self.EMB_ARG2 = self.W_EMB[self.W_INDICES_ARG2] # connective's embedding self._init_conn_emb() self.EMB_CONN = self.CONN_EMB[self.CONN_INDEX] # initialize forward LSTM unit invars = ((self.EMB_ARG1, False), (self.EMB_ARG2, False)) params, outvars = self._init_lstm(invars) self._params.extend(params) self.F_OUT_ARG1, self.F_OUT_ARG2 = outvars self.F_ARG1 = TT.mean(self.F_OUT_ARG1, axis=0) self.F_ARG2 = TT.mean(self.F_OUT_ARG2, axis=0) # define final units self.I = TT.concatenate((self.F_ARG1, self.F_ARG2, self.EMB_CONN)) self.I2Y = theano.shared(value=HE_UNIFORM( (self.n_y, self.intm_dim * 3)), name="I2Y") self.y_bias = theano.shared(value=HE_UNIFORM((1, self.n_y)), name="y_bias") self._params.extend([self.I2Y, self.y_bias]) self.Y_pred = TT.nnet.softmax(TT.dot(self.I2Y, self.I).T + self.y_bias) # initialize cost and optimization functions self.Y_gold = TT.vector(name="Y_gold") self._cost = TT.sum((self.Y_pred - self.Y_gold)**2) self._dev_cost = TT.sum((self.Y_pred - self.Y_gold)**2) self._pred_class = TT.argmax(self.Y_pred) grads = TT.grad(self._cost, wrt=self._params) self._init_funcs(grads)
def _init_w2v_emb(self): """Initialize word2vec embedding matrix. """ w_emb = np.empty((self.w_i, self.ndim)) w_emb[self.unk_w_i, :] = 1e-2 # prevent zeros in this row for w, i in self.w2emb_i.iteritems(): if i == self.unk_w_i: continue w_emb[i] = self.w2v[w] self.W_EMB = theano.shared(value=floatX(w_emb), name="W_EMB") # We unload embeddings every time before the training to free more # memory. Feel free to comment the line below, if you have plenty of # RAM. self.w2v.unload()
def __init__(self, a_w2v=False, a_lstsq=False, a_max_iters=MAX_ITERS): """Class constructor. Args: a_w2v (bool): use pre-trained word2vec instance a_lstsq (bool): pre-train task-specific word embeddings, but use least-square method to generate embeddings for unknown words from generic word2vec vectors a_max_iters (int): maximum number of iterations """ # access to the original word2vec resource if a_lstsq: a_w2v = True if a_w2v: self.w2v = Word2Vec # singleton object else: self.w2v = None self.lstsq = a_lstsq self._plain_w2v = self.w2v and not self.lstsq # matrix mapping word2vec to task-specific embeddings self.max_iters = a_max_iters self.w2emb = None self.ndim = -1 # vector dimensionality will be initialized later self.intm_dim = -1 # mapping from word to its embedding index self.unk_w_i = 0 self._aux_keys = set((0, )) self.w_i = 1 self.w2emb_i = dict() # mapping from connective to its embedding index self.unk_c_i = 0 self.c_i = 1 self.c2emb_i = dict() # variables needed for training self._trained = False self._params = [] self._w_stat = self._pred_class = None self.use_dropout = theano.shared(floatX(0.)) self.W_EMB = self.CONN_EMB = self._cost = self._dev_cost = None # initialize theano functions to None self._reset_funcs() # set up functions for obtaining word embeddings at train and test # times self._init_wemb_funcs()
def _init_lstm(self, a_invars, a_sfx="-forward"): """Initialize LSTM layer. Args: a_invars (list(theano.shared)): list of input parameters as symbolic theano variable a_sfx (str): suffix to use for function and parameter names Returns: (2-tuple): parameters to be optimized and list of symbolic outputs from the function """ intm_dim = self.intm_dim # initialize transformation matrices and bias term W_dim = (intm_dim, self.ndim) W = np.concatenate([ ORTHOGONAL(W_dim), ORTHOGONAL(W_dim), ORTHOGONAL(W_dim), ORTHOGONAL(W_dim) ], axis=0) W = theano.shared(value=W, name="W" + a_sfx) U_dim = (intm_dim, intm_dim) U = np.concatenate([ ORTHOGONAL(U_dim), ORTHOGONAL(U_dim), ORTHOGONAL(U_dim), ORTHOGONAL(U_dim) ], axis=0) U = theano.shared(value=U, name="U" + a_sfx) V = ORTHOGONAL(U_dim) # V for vendetta V = theano.shared(value=V, name="V" + a_sfx) b_dim = (1, intm_dim * 4) b = theano.shared(value=HE_UNIFORM(b_dim), name="b" + a_sfx) params = [W, U, V, b] # initialize dropout units w_do = theano.shared(value=floatX(np.ones((4 * intm_dim, ))), name="w_do") w_do = self._init_dropout(w_do) u_do = theano.shared(value=floatX(np.ones((4 * intm_dim, ))), name="u_do") u_do = self._init_dropout(u_do) # custom function for splitting up matrix parts def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n * dim:(n + 1) * dim] return _x[:, n * dim:(n + 1) * dim] # define recurrent LSTM unit def _step(x_, h_, c_, W, U, V, b, w_do, u_do): """Recurrent LSTM unit. Note: The general order of function parameters to fn is: sequences (if any), prior result(s) (if needed), non-sequences (if any) Args: x_ (theano.shared): input vector h_ (theano.shared): output vector c_ (theano.shared): memory state W (theano.shared): input transform matrix U (theano.shared): inner-state transform matrix V (theano.shared): output transform matrix b (theano.shared): bias vector w_do (TT.col): dropout unit for the W matrix u_do (TT.col): dropout unit for the U matrix Returns: (2-tuple(h, c)) new hidden and memory states """ # pre-compute common terms: # W \in R^{236 x 100} # x \in R^{1 x 100} # U \in R^{236 x 59} # h \in R^{1 x 59} # b \in R^{1 x 236} # w_do \in R^{236 x 1} # u_do \in R^{236 x 1} # xhb \in R^{1 x 236} xhb = (TT.dot(W * w_do.dimshuffle( (0, 'x')), x_.T) + TT.dot(U * u_do.dimshuffle( (0, 'x')), h_.T)).T + b # i \in R^{1 x 59} i = TT.nnet.sigmoid(_slice(xhb, 0, intm_dim)) # f \in R^{1 x 59} f = TT.nnet.sigmoid(_slice(xhb, 1, intm_dim)) # c \in R^{1 x 59} c = TT.tanh(_slice(xhb, 2, intm_dim)) c = i * c + f * c_ # V \in R^{59 x 59} # o \in R^{1 x 59} o = TT.nnet.sigmoid(_slice(xhb, 3, intm_dim) + TT.dot(V, c.T).T) # h \in R^{1 x 59} h = o * TT.tanh(c) # return current output and memory state return h.flatten(), c.flatten() m = 0 n = intm_dim ov = None outvars = [] for iv, igbw in a_invars: m = iv.shape[0] ret, _ = theano.scan(_step, sequences=[iv], outputs_info=[ floatX(np.zeros((n, ))), floatX(np.zeros((n, ))) ], non_sequences=[W, U, V, b, w_do, u_do], name="LSTM" + str(iv) + a_sfx, n_steps=m, truncate_gradient=TRUNCATE_GRADIENT, go_backwards=igbw) ov = ret[0] outvars.append(ov) return params, outvars