def LSTMCell(xt, h, c): f = edf.Sigmoid(edf.Add(edf.VDot(edf.ConCat(xt, h), Wf), bf)) i = edf.Sigmoid(edf.Add(edf.VDot(edf.ConCat(xt, h), Wi), bi)) o = edf.Sigmoid(edf.Add(edf.VDot(edf.ConCat(xt, h), Wo), bo)) c_hat = edf.Tanh(edf.Add(edf.VDot(edf.ConCat(xt, h), Wc), bc)) c_next = edf.Add(edf.Mul(f, c), edf.Mul(i, c_hat)) h_next = edf.Mul(o, edf.Tanh(c_next)) return h_next, c_next
def LSTMCell(xt, h, c, layer): f = edf.Sigmoid(edf.Add(edf.VDot(edf.ConCat(xt, h), Wf[layer]), bf[layer])) i = edf.Sigmoid(edf.Add(edf.VDot(edf.ConCat(xt, h), Wi[layer]), bi[layer])) o = edf.Sigmoid(edf.Add(edf.VDot(edf.ConCat(xt, h), Wo[layer]), bo[layer])) c_hat = edf.Tanh(edf.Add(edf.VDot(edf.ConCat(xt, h), Wc[layer]), bc[layer])) c_next = edf.Add(edf.Mul(f, c), edf.Mul(i, c_hat)) h_next = edf.Mul(o, edf.Tanh(c_next)) return h_next, c_next
def BuildModel(): edf.components = [] B = inp.value.shape[0] T = inp.value.shape[1] h = edf.Value(np.zeros((B, hidden_dim))) c = edf.Value(np.zeros((B, hidden_dim))) score = [] for t in range(T - 1): wordvec = edf.Embed(edf.Value(inp.value[:, t]), C2V) xt = edf.Reshape(wordvec, [-1, hidden_dim]) h_next, c_next = LSTMCell(xt, h, c) p = edf.SoftMax(edf.VDot(h_next, V)) logloss = edf.Reshape( edf.LogLoss(edf.Aref(p, edf.Value(inp.value[:, t + 1]))), (B, 1)) if t == 0: loss = logloss else: loss = edf.ConCat(loss, logloss) score.append(p) h = h_next c = c_next masks = np.zeros((B, T - 1), dtype=np.int32) masks[inp.value[:, 1:] != 0] = 1 loss = edf.MeanwithMask(loss, edf.Value(masks)) return loss, score
def LSTMCell(x, h, c): concat = edf.ConCat(h, x) # Forget Gate f_gate = edf.Sigmoid(edf.Add(edf.VDot(concat, Wf), bf)) # Input Gate i_gate = edf.Sigmoid(edf.Add(edf.VDot(concat, Wi), bi)) # Temp Vars c_temp = edf.Tanh(edf.Add(edf.VDot(concat, Wc), bc)) o_temp = edf.Sigmoid(edf.Add(edf.VDot(concat, Wo), bo)) # Output c_next = edf.Add(edf.Mul(f_gate, c), edf.Mul(i_gate, c_temp)) h_next = edf.Mul(o_temp, edf.Tanh(c_next)) return h_next, c_next
def BuildModel(): edf.components = [] B = inp.value.shape[0] T = inp.value.shape[1] h = [[None] * layer] * T c = [[None] * layer] * T for i in range(layer): h[0][i] = edf.Value(np.zeros((B, hidden_dim))) c[0][i] = edf.Value(np.zeros((B, hidden_dim))) score = [] for t in range(T - 1): wordvec = edf.Embed(edf.Value(inp.value[:, t]), C2V) xt = edf.Reshape(wordvec, [-1, hidden_dim]) for i in range(layer): h[t + 1][i], c[t + 1][i] = LSTMCell(xt, h[t][i], c[t][i], i) xt = h[t + 1][i] p = edf.SoftMax(edf.VDot(xt, V)) logloss = edf.Reshape(edf.LogLoss(edf.Aref(p, edf.Value(inp.value[:, t + 1]))), (B, 1)) if t == 0: loss = logloss else: loss = edf.ConCat(loss, logloss) score.append(p) masks = np.zeros((B, T - 1), dtype=np.int32) masks[inp.value[:, 1:] != 0] = 1 loss = edf.MeanwithMask(loss, edf.Value(masks)) return loss, score