def _step(self, loader, update, log, reporting_fns, verbose=None): steps = len(loader) pg = create_progress_bar(steps) cm = ConfusionMatrix(self.labels) epoch_loss = 0 epoch_div = 0 for batch_dict in pg(loader): dy.renew_cg() inputs = self.model.make_input(batch_dict) ys = inputs.pop('y') preds = self.model.forward(inputs) losses = self.model.loss(preds, ys) loss = dy.mean_batches(losses) batchsz = self._get_batchsz(batch_dict) lossv = loss.npvalue().item() * batchsz epoch_loss += lossv epoch_div += batchsz _add_to_cm(cm, ys, preds.npvalue()) update(loss) log(self.optimizer.global_step, lossv, batchsz, reporting_fns) metrics = cm.get_all_metrics() metrics['avg_loss'] = epoch_loss / float(epoch_div) verbose_output(verbose, cm) return metrics
def act(self, obs): dy.renew_cg() action = self.actor(obs).npvalue() if self.noise_stddev > 0: noise = np.random.randn(self.action_dim) * self.noise_stddev action += noise return np.clip(action, -1, 1)
def predict(self, batch_dict): dy.renew_cg() inputs = self.make_input(batch_dict) lengths = inputs['lengths'] unaries = self.compute_unaries(inputs) if self.do_crf is True: best_path, path_score = self.crf.decode(unaries) elif self.constraint is not None: best_path, path_score = viterbi( unaries, dy.log_softmax(dy.inputTensor(self.constraint[1] * -1e4)), Offsets.GO, Offsets.EOS, norm=True ) else: best_path = [np.argmax(x.npvalue(), axis=0) for x in unaries] # TODO: RN using autobatching, so none of this is really useful # If we want to support batching in this function we have to either loop over the batch # or we can just simplify all this code here best_path = np.stack(best_path).reshape(-1, 1) # (T, B) best_path = best_path.transpose(1, 0) results = [] for b in range(best_path.shape[0]): sentence = best_path[b, :lengths[b]] results.append(sentence) return results
def generate(sent): dy.renew_cg() src = sent #initialize the LSTM init_state_src = LSTM_SRC_BUILDER.initial_state() #get the output of the first LSTM src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output() #generate until a eos tag or max is reached current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)]) prev_word = sos_trg trg_sent = [] W_sm = dy.parameter(W_sm_p) b_sm = dy.parameter(b_sm_p) for i in range(MAX_SENT_SIZE): #feed the previous word into the lstm, calculate the most likely word, add it to the sentence current_state = current_state.add_input(LOOKUP_TRG[prev_word]) output_embedding = current_state.output() s = dy.affine_transform([b_sm, W_sm, output_embedding]) probs = (-dy.log_softmax(s)).value() next_word = np.argmax(probs) if next_word == eos_trg: break prev_word = next_word trg_sent.append(i2w_trg[next_word]) return trg_sent
def calc_scores(words): # Create a computation graph, and add parameters dy.renew_cg() # Take the sum of all the embedding vectors for each word score = dy.esum([dy.lookup(W, x) for x in words]) # Add the bias vector and return return score + b
def test_update(self): ones=np.ones((10, 10)) updated = np.ones((10, 10)) * 0.99 gradient = np.ones((10, 10)) * 0.01 dy.renew_cg() pp1 = dy.parameter(self.p1) pp2 = dy.parameter(self.p2) a = pp1 * self.lp1[1] b = pp2 * self.lp2[1] l = dy.dot_product(a, b) / 100 self.assertEqual(l.scalar_value(),10,msg=str(l.scalar_value())) l.backward() self.assertTrue(np.allclose(self.p1.grad_as_array(), 0.1 * ones),msg=np.array_str(self.p1.grad_as_array())) self.assertTrue(np.allclose(self.p2.grad_as_array(), 0.1 * ones),msg=np.array_str(self.p2.grad_as_array())) self.assertTrue(np.allclose(self.lp1.grad_as_array()[1], ones[0]),msg=np.array_str(self.lp1.grad_as_array())) self.assertTrue(np.allclose(self.lp2.grad_as_array()[1], ones[0]),msg=np.array_str(self.lp2.grad_as_array())) self.trainer.update() self.assertTrue(np.allclose(self.p1.as_array(), ones * 0.99),msg=np.array_str(self.p1.as_array())) self.assertTrue(np.allclose(self.p2.as_array(), ones * 0.99),msg=np.array_str(self.p2.as_array())) self.assertTrue(np.allclose(self.lp1.as_array()[1], ones[0] * 0.9),msg=np.array_str(self.lp1.as_array()[1])) self.assertTrue(np.allclose(self.lp2.as_array()[1], ones[0] * 0.9),msg=np.array_str(self.lp2.as_array()))
def calc_predict_and_activations(wids, tag, words): dy.renew_cg() if len(wids) < WIN_SIZE: wids += [0] * (WIN_SIZE-len(wids)) cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1) cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False) filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue() activations = filters.argmax(axis=0) pool_out = dy.max_dim(cnn_out, d=1) pool_out = dy.reshape(pool_out, (FILTER_SIZE,)) pool_out = dy.rectify(pool_out) scores = (W_sm * pool_out + b_sm).npvalue() print ('%d ||| %s' % (tag, ' '.join(words))) predict = np.argmax(scores) print (display_activations(words, activations)) print ('scores=%s, predict: %d' % (scores, predict)) features = pool_out.npvalue() W = W_sm.npvalue() bias = b_sm.npvalue() print (' bias=%s' % bias) contributions = W * features print (' very bad (%.4f): %s' % (scores[0], contributions[0])) print (' bad (%.4f): %s' % (scores[1], contributions[1])) print (' neutral (%.4f): %s' % (scores[2], contributions[2])) print (' good (%.4f): %s' % (scores[3], contributions[3])) print ('very good (%.4f): %s' % (scores[4], contributions[4]))
def test_gradient_sanity(self): dy.renew_cg() x=dy.inputTensor(self.v1) y=dy.inputTensor(self.v2) l = dy.dot_product(x,y) l.forward() self.assertRaises(RuntimeError, gradient_callable, x)
def test_update(self): ones = np.ones((10, 10)) dy.renew_cg() a = self.p1 * self.lp1[1] b = self.p2 * self.lp2[1] loss = dy.dot_product(a, b) / 100 self.assertEqual(loss.scalar_value(), 10, msg=str(loss.scalar_value())) loss.backward() # Check the gradients self.assertTrue(np.allclose(self.p1.grad_as_array(), 0.1 * ones), msg=np.array_str(self.p1.grad_as_array())) self.assertTrue(np.allclose(self.p2.grad_as_array(), 0.1 * ones), msg=np.array_str(self.p2.grad_as_array())) self.assertTrue(np.allclose(self.lp1.grad_as_array()[1], ones[ 0]), msg=np.array_str(self.lp1.grad_as_array())) self.assertTrue(np.allclose(self.lp2.grad_as_array()[1], ones[ 0]), msg=np.array_str(self.lp2.grad_as_array())) self.trainer.update() # Check the updated parameters self.assertTrue(np.allclose(self.p1.as_array(), ones * 0.99), msg=np.array_str(self.p1.as_array())) self.assertTrue(np.allclose(self.p2.as_array(), ones * 0.99), msg=np.array_str(self.p2.as_array())) self.assertTrue(np.allclose(self.lp1.as_array()[1], ones[ 0] * 0.9), msg=np.array_str(self.lp1.as_array()[1])) self.assertTrue(np.allclose(self.lp2.as_array()[1], ones[ 0] * 0.9), msg=np.array_str(self.lp2.as_array()))
def train(epoch): model.training = True train_loss = 0 train_loader = generate_batch_loader(train_data, batch_size=batch_size) for batch_idx, data in enumerate(train_loader): # Dymanic Construction of Graph dy.renew_cg() x = dy.inputTensor(data.reshape(-1, 784).T) recon_x, mu, logvar = model.forward(x) loss = loss_function(recon_x, x, mu, logvar) # Forward loss_value = loss.value() train_loss += loss_value # Backward loss.backward() optimizer.update() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_data), 100. * batch_idx / (len(train_data) / batch_size), loss_value / len(data))) print('====> Epoch: {} Average loss: {:.4f}'.format( epoch, train_loss / len(train_data)))
def test(epoch): model.training = False test_loss = 0 test_loader = generate_batch_loader(test_data, batch_size=batch_size) for i, data in enumerate(test_loader): # Dymanic Construction of Graph dy.renew_cg() x = dy.inputTensor(data.reshape(-1, 784).T) recon_x, mu, logvar = model.forward(x) loss = loss_function(recon_x, x, mu, logvar) # Forward loss_value = loss.value() test_loss += loss_value if i == 0: n = min(data.shape[0], 8) comparison = np.concatenate([data[:n], recon_x.npvalue().T.reshape(batch_size, 1, 28, 28)[:n]]) save_image(comparison, 'results/reconstruction_' + str(epoch) + '.png', nrow=n) test_loss /= len(test_data) print('====> Test set loss: {:.4f}'.format(test_loss))
def test(self, loader, reporting_fns, phase, **kwargs): metrics = {} total_loss = 0.0 total_toks = 0 initial_state = None start = time.time() for batch_dict in loader: dy.renew_cg() inputs = self.model.make_input(batch_dict) y = inputs.pop('y') output, initial_state = self.model.forward(inputs, initial_state, train=False) loss = self._loss(output, y) toks = self._num_toks(batch_dict) loss_val = loss.npvalue().item() * toks total_loss += loss_val total_toks += toks if initial_state is not None: initial_state = [x.npvalue() for x in initial_state] epochs = 0 if phase == 'Valid': self.valid_epochs += 1 epochs = self.valid_epochs metrics = self.calc_metrics(total_loss, total_toks) self.report( epochs, metrics, start, phase, 'EPOCH', reporting_fns ) return metrics
def test_pick_batch_elems(self): dy.renew_cg() x = dy.lookup_batch(self.p, [0, 1]) y = dy.pick_batch_elems(x, [0]) self.assertTrue(np.allclose(y.npvalue(), self.pval[0])) z = dy.pick_batch_elems(x, [0, 1]) self.assertTrue(np.allclose(z.npvalue(), self.pval.T))
def test_concatenate_to_batch(self): dy.renew_cg() x = dy.lookup_batch(self.p, [0, 1]) y = dy.pick_batch_elem(x, 0) z = dy.pick_batch_elem(x, 1) w = dy.concatenate_to_batch([y, z]) self.assertTrue(np.allclose(w.npvalue(), self.pval.T))
def test_inputTensor_batched_list(self): for i in range(4): dy.renew_cg() input_tensor = self.input_vals.reshape(self.shapes[i]) xb = dy.inputTensor([np.asarray(x).transpose() for x in input_tensor.transpose()]) self.assertEqual( xb.dim()[0], (self.shapes[i][:-1] if i > 0 else (1,)), msg="Dimension mismatch" ) self.assertEqual( xb.dim()[1], self.shapes[i][-1], msg="Dimension mismatch" ) self.assertTrue( np.allclose(xb.npvalue(), input_tensor), msg="Expression value different from initial value" ) self.assertEqual( dy.sum_batches(dy.squared_norm(xb)).scalar_value(), self.squared_norm, msg="Value mismatch" )
def calc_loss(sent): dy.renew_cg() # Transduce all batch elements with an LSTM src = sent[0] trg = sent[1] #initialize the LSTM init_state_src = LSTM_SRC_BUILDER.initial_state() #get the output of the first LSTM src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output() #now step through the output sentence all_losses = [] current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)]) prev_word = trg[0] W_sm = dy.parameter(W_sm_p) b_sm = dy.parameter(b_sm_p) for next_word in trg[1:]: #feed the current state into the current_state = current_state.add_input(LOOKUP_TRG[prev_word]) output_embedding = current_state.output() s = dy.affine_transform([b_sm, W_sm, output_embedding]) all_losses.append(dy.pickneglogsoftmax(s, next_word)) prev_word = next_word return dy.esum(all_losses)
def calc_scores(words): dy.renew_cg() word_embs = [dy.lookup(W_emb, x) for x in words] fwd_init = fwdLSTM.initial_state() fwd_embs = fwd_init.transduce(word_embs) bwd_init = bwdLSTM.initial_state() bwd_embs = bwd_init.transduce(reversed(word_embs)) return W_sm * dy.concatenate([fwd_embs[-1], bwd_embs[-1]]) + b_sm
def test_gradient(self): dy.renew_cg() x=dy.inputTensor(self.v1) y=dy.inputTensor(self.v2) l = dy.dot_product(x,y) l.forward() l.backward(full=True) self.assertTrue(np.allclose(x.gradient(), self.v2),msg="{}\n{}\n{}".format(l.value(),x.gradient(),self.v2,y.gradient(),self.v2))
def test_get_parameters(self): dy.renew_cg() self.rnn.initial_state() P_p = self.rnn.get_parameters() P_e = self.rnn.get_parameter_expressions() for l_p,l_e in zip(P_p,P_e): for w_p,w_e in zip(l_p,l_e): self.assertTrue(np.allclose(w_e.npvalue(),w_p.as_array()))
def calc_loss(sents): dy.renew_cg() # Transduce all batch elements with an LSTM src_sents = [x[0] for x in sents] tgt_sents = [x[1] for x in sents] src_cws = [] src_len = [len(sent) for sent in src_sents] max_src_len = np.max(src_len) num_words = 0 for i in range(max_src_len): src_cws.append([sent[i] for sent in src_sents]) #initialize the LSTM init_state_src = LSTM_SRC_BUILDER.initial_state() #get the output of the first LSTM src_output = init_state_src.add_inputs([dy.lookup_batch(LOOKUP_SRC, cws) for cws in src_cws])[-1].output() #now decode all_losses = [] # Decoder #need to mask padding at end of sentence tgt_cws = [] tgt_len = [len(sent) for sent in sents] max_tgt_len = np.max(tgt_len) masks = [] for i in range(max_tgt_len): tgt_cws.append([sent[i] if len(sent) > i else eos_trg for sent in tgt_sents]) mask = [(1 if len(sent) > i else 0) for sent in tgt_sents] masks.append(mask) num_words += sum(mask) current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)]) prev_words = tgt_cws[0] W_sm = dy.parameter(W_sm_p) b_sm = dy.parameter(b_sm_p) for next_words, mask in zip(tgt_cws[1:], masks): #feed the current state into the current_state = current_state.add_input(dy.lookup_batch(LOOKUP_TRG, prev_words)) output_embedding = current_state.output() s = dy.affine_transform([b_sm, W_sm, output_embedding]) loss = (dy.pickneglogsoftmax_batch(s, next_words)) mask_expr = dy.inputVector(mask) mask_expr = dy.reshape(mask_expr, (1,),len(sents)) mask_loss = loss * mask_expr all_losses.append(mask_loss) prev_words = next_words return dy.sum_batches(dy.esum(all_losses)), num_words
def test_param_change_after_update(self): for trainer_type in dy.SimpleSGDTrainer, dy.AdamTrainer: trainer = trainer_type(self.m) for _ in range(100): p = self.m.add_parameters((1,)) dy.renew_cg() p.forward() p.backward() trainer.update()
def calc_loss(words, labels, heads): dy.renew_cg() word_embs = [dy.lookup(W_emb, x) for x in words] fwd_init = fwdLSTM.initial_state() fwd_embs = fwd_init.transduce(word_embs) bwd_init = bwdLSTM.initial_state() bwd_embs = bwd_init.transduce(reversed(word_embs)) src_encodings = [dy.reshape(dy.concatenate([f, b]), (HID_SIZE * 2, 1)) for f, b in zip(fwd_embs, reversed(bwd_embs))] return biaffineParser.decode_loss(src_encodings, ([heads], [labels]))
def calc_acc(words, labels, heads): dy.renew_cg() word_embs = [dy.lookup(W_emb, x) for x in words] fwd_init = fwdLSTM.initial_state() fwd_embs = fwd_init.transduce(word_embs) bwd_init = bwdLSTM.initial_state() bwd_embs = bwd_init.transduce(reversed(word_embs)) src_encodings = [dy.reshape(dy.concatenate([f, b]), (HID_SIZE * 2, 1)) for f, b in zip(fwd_embs, reversed(bwd_embs))] pred_heads, pred_labels = biaffineParser.decoding(src_encodings) return biaffineParser.cal_accuracy(pred_heads, pred_labels, heads, labels)
def test_save_load(self): self.p.forward() self.p.backward() self.t.update() dy.renew_cg() v1 = self.p.value() dy.save(self.file, [self.p]) [p2] = dy.load(self.file, self.m2) v2 = p2.value() self.assertTrue(np.allclose(v1, v2))
def renew_cg(self): # renew the compute graph for every single instance dy.renew_cg() param_exprs = dict() param_exprs['U'] = dy.parameter(self.params['word_score_U']) param_exprs['pW'] = dy.parameter(self.params['predict_W']) param_exprs['pb'] = dy.parameter(self.params['predict_b']) param_exprs['<bos>'] = dy.parameter(self.params['<BoS>']) self.param_exprs = param_exprs
def calc_loss(sent): dy.renew_cg() # Transduce all batch elements with an LSTM src = sent[0] trg = sent[1] # initialize the LSTM init_state_src = LSTM_SRC_BUILDER.initial_state() # get the output of the first LSTM src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output() # Now compute mean and standard deviation of source hidden state. W_mean = dy.parameter(W_mean_p) V_mean = dy.parameter(V_mean_p) b_mean = dy.parameter(b_mean_p) W_var = dy.parameter(W_var_p) V_var = dy.parameter(V_var_p) b_var = dy.parameter(b_var_p) # The mean vector from the encoder. mu = mlp(src_output, W_mean, V_mean, b_mean) # This is the diagonal vector of the log co-variance matrix from the encoder # (regard this as log variance is easier for furture implementation) log_var = mlp(src_output, W_var, V_var, b_var) # Compute KL[N(u(x), sigma(x)) || N(0, I)] # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) kl_loss = -0.5 * dy.sum_elems(1 + log_var - dy.pow(mu, dy.inputVector([2])) - dy.exp(log_var)) z = reparameterize(mu, log_var) # now step through the output sentence all_losses = [] current_state = LSTM_TRG_BUILDER.initial_state().set_s([z, dy.tanh(z)]) prev_word = trg[0] W_sm = dy.parameter(W_sm_p) b_sm = dy.parameter(b_sm_p) for next_word in trg[1:]: # feed the current state into the current_state = current_state.add_input(LOOKUP_TRG[prev_word]) output_embedding = current_state.output() s = dy.affine_transform([b_sm, W_sm, output_embedding]) all_losses.append(dy.pickneglogsoftmax(s, next_word)) prev_word = next_word softmax_loss = dy.esum(all_losses) return kl_loss, softmax_loss
def calc_scores(wids): dy.renew_cg() if len(wids) < WIN_SIZE: wids += [0] * (WIN_SIZE-len(wids)) cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1) cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False) pool_out = dy.max_dim(cnn_out, d=1) pool_out = dy.reshape(pool_out, (FILTER_SIZE,)) pool_out = dy.rectify(pool_out) return W_sm * pool_out + b_sm
def calc_scores(words): dy.renew_cg() # Transduce all batch elements with an LSTM word_reps = LSTM.transduce([LOOKUP[x] for x in words]) # Softmax scores W = dy.parameter(W_sm) b = dy.parameter(b_sm) scores = [dy.affine_transform([b, W, x]) for x in word_reps] return scores
def generate_sent(): dy.renew_cg() hist = [S] * N sent = [] while True: p = dy.softmax(calc_score_of_history(hist)).npvalue() next_word = np.random.choice(nwords, p=p/p.sum()) if next_word == S or len(sent) == MAX_LEN: break sent.append(next_word) hist = hist[1:] + [next_word] return sent
def calc_sent_loss(sent): # Create a computation graph dy.renew_cg() # The initial history is equal to end of sentence symbols hist = [S] * N # Step through the sentence, including the end of sentence token all_losses = [] for next_word in sent + [S]: s = calc_score_of_history(hist) all_losses.append(dy.pickneglogsoftmax(s, next_word)) hist = hist[1:] + [next_word] return dy.esum(all_losses)
def calc_scores(tree): dy.renew_cg() emb = builder.expr_for_tree(tree) W_sm_exp = dy.parameter(W_sm) b_sm_exp = dy.parameter(b_sm) return W_sm_exp * emb + b_sm_exp
def _train(self, sentences, transition_system, evaluate, relations, triggers=None): start_chunk = time.time() start_all = time.time() loss_chunk = 0 loss_all = 0 total_chunk = 0 total_all = 0 losses = [] self.set_empty_vector() for i, sentence in enumerate(sentences): if i != 0 and i % 100 == 0: end = time.time() print( f'count: {i}\tloss: {loss_chunk/total_chunk:.4f}\ttime: {end-start_chunk:,.2f} secs' ) start_chunk = end loss_chunk = 0 total_chunk = 0 if len(sentence) > 2: for e in sentence: e.children = [] # assign embedding to each word features = self.extract_features(sentence, drop_word=True) # initialize sentence parse state = transition_system(sentence) # parse sentence while not state.is_terminal(): outputs = evaluate(state.stack, state.buffer, features) if triggers: dy_op_scores, dy_lbl_scores, dy_tg_scores = outputs np_tg_scores = dy_tg_scores.npvalue() else: dy_op_scores, dy_lbl_scores = outputs # get scores in numpy arrays np_op_scores = dy_op_scores.npvalue() np_lbl_scores = dy_lbl_scores.npvalue() # collect all legal transitions legal_transitions = [] if triggers: for lt in state.all_legal(): ix = state.t2i[lt] if lt == "shift": for j, tg in enumerate(triggers[1:], start=2): if (hasattr(state.buffer[0], 'is_parent') and state.buffer[0].is_parent and j == 1): continue t = new_Transition( lt, None, tg, np_op_scores[ix] + np_lbl_scores[0] + np_tg_scores[j], dy_op_scores[ix] + dy_lbl_scores[0] + dy_tg_scores[j]) legal_transitions.append(t) if lt == "drop": t = new_Transition( lt, None, "O", np_op_scores[ix] + np_lbl_scores[0] + np_tg_scores[1], dy_op_scores[ix] + dy_lbl_scores[0] + dy_tg_scores[1]) legal_transitions.append(t) t = new_Transition( lt, None, "Protein", np_op_scores[ix] + np_lbl_scores[0] + np_tg_scores[4], dy_op_scores[ix] + dy_lbl_scores[0] + dy_tg_scores[4]) legal_transitions.append(t) if lt in ['left_reduce', 'left_attach']: for j, r in enumerate(relations): k = 1 + 2 * j t = new_Transition( lt, r, None, np_op_scores[ix] + np_lbl_scores[k] + np_tg_scores[0], dy_op_scores[ix] + dy_lbl_scores[k] + dy_tg_scores[0]) legal_transitions.append(t) if lt in ['right_reduce', 'right_attach']: for j, r in enumerate(relations): k = 2 + 2 * j t = new_Transition( lt, r, None, np_op_scores[ix] + np_lbl_scores[k] + np_tg_scores[0], dy_op_scores[ix] + dy_lbl_scores[k] + dy_tg_scores[0]) legal_transitions.append(t) if lt == "swap": t = new_Transition( lt, None, None, np_op_scores[ix] + np_lbl_scores[0] + np_tg_scores[0], dy_op_scores[ix] + dy_lbl_scores[0] + dy_tg_scores[0]) legal_transitions.append(t) # collect all correct transitions correct_transitions = [] for t in legal_transitions: if state.is_correct(t[0]): relation = state.get_arc_label_for_transition( t[0]) label = state.get_token_label_for_transition( t[0]) if t[1] == relation and t[2] == label: correct_transitions.append(t) else: if state.is_legal('shift'): ix = state.t2i['shift'] t = Transition('shift', None, None, np_op_scores[ix] + np_lbl_scores[0], dy_op_scores[ix] + dy_lbl_scores[0]) legal_transitions.append(t) if state.is_legal('left_arc'): ix = state.t2i['left_arc'] for j, r in enumerate(relations): k = 1 + 2 * j t = Transition( 'left_arc', r, None, np_op_scores[ix] + np_lbl_scores[k], dy_op_scores[ix] + dy_lbl_scores[k]) legal_transitions.append(t) if state.is_legal('right_arc'): ix = state.t2i['right_arc'] for j, r in enumerate(relations): k = 2 + 2 * j t = Transition( 'right_arc', r, None, np_op_scores[ix] + np_lbl_scores[k], dy_op_scores[ix] + dy_lbl_scores[k]) legal_transitions.append(t) if state.is_legal('drop'): ix = state.t2i['drop'] t = Transition('drop', None, None, np_op_scores[ix] + np_lbl_scores[0], dy_op_scores[ix] + dy_lbl_scores[0]) legal_transitions.append(t) # collect all correct transitions correct_transitions = [] for t in legal_transitions: if state.is_correct(t): if t.op in [ 'shift', 'drop' ] or t.label in state.stack[-1].relation: correct_transitions.append(t) # select transition best_correct = max(correct_transitions, key=attrgetter('score')) i_correct = legal_transitions.index(best_correct) legal_scores = dy.concatenate( [t.dy_score for t in legal_transitions]) loss = dy.hinge(legal_scores, i_correct) # loss = dy.pickneglogsoftmax(legal_scores, i_correct) losses.append(loss) # perform transition selected = best_correct state.perform_transition(selected.op, selected.label, selected.trigger) # process losses in chunks if len(losses) > 50: try: loss = dy.esum(losses) l = loss.scalar_value() loss.backward() self.trainer.update() except: pass dy.renew_cg() self.set_empty_vector() losses = [] loss_chunk += l loss_all += l total_chunk += 1 total_all += 1 # consider any remaining losses if len(losses) > 0: try: loss = dy.esum(losses) loss.scalar_value() loss.backward() self.trainer.update() except: pass dy.renew_cg() self.set_empty_vector() end = time.time() print('\nend of epoch') print( f'count: {i}\tloss: {loss_all/total_all:.4f}\ttime: {end-start_all:,.2f} secs' )
def translate_sentence(self, sent): dy.renew_cg() W_y = dy.parameter(self.W_y) b_y = dy.parameter(self.b_y) W1_att_e = dy.parameter(self.W1_att_e) W1_att_f = dy.parameter(self.W1_att_f) w2_att = dy.parameter(self.w2_att) W1_att_lang = dy.parameter(self.W1_att_lang) M_s = self.src_lookup M_t = self.tgt_lookup src_sent = sent src_sent_rev = list(reversed(sent)) # Bidirectional representations l2r_state = self.l2r_builder.initial_state() r2l_state = self.r2l_builder.initial_state() l2r_contexts = [] r2l_contexts = [] for (cw_l2r, cw_r2l) in zip(src_sent, src_sent_rev): l2r_state = l2r_state.add_input(M_s[cw_l2r]) r2l_state = r2l_state.add_input(M_s[cw_r2l]) l2r_contexts.append( l2r_state.output()) # [<S>, x_1, x_2, ..., </S>] r2l_contexts.append( r2l_state.output()) # [</S> x_n, x_{n-1}, ... <S>] r2l_contexts.reverse() # [<S>, x_1, x_2, ..., </S>] # Combine the left and right representations for every word h_fs = [] for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts): h_fs.append(dy.concatenate([l2r_i, r2l_i])) encoded_h = h_fs[-1] h_fs_matrix = dy.concatenate_cols(h_fs) # Decoder trans_sentence = [u'<s>'] cw = self.tgt_vocab[u'<s>'] c_t = dy.vecInput(self.hidden_size * 2) c_t.set([0 for i in xrange(self.contextsize)]) dec_state = self.dec_builder.initial_state([encoded_h]) langid = self.lang_li.index(self.rsrc_vocab[sent[1]]) langeb = dy.lookup(self.langeb_lookup, langid) while len(trans_sentence) < self.max_len: embed = dy.lookup(M_t, cw) dec_state = dec_state.add_input(dy.concatenate([embed, c_t])) h_e = dec_state.output() # c_t = self.__attention_mlp(h_fs_matrix, h_e) c_t = self.__attention_mlp(h_fs_matrix, h_e, W1_att_e, W1_att_f, w2_att, W1_att_lang, langeb) # calculate attention ''' a_t = h_fs_matrix_t * h_e alignment = dy.softmax(a_t) c_t = h_fs_matrix * alignment''' ind_tem = dy.concatenate([h_e, c_t]) ind_tem1 = W_y * ind_tem ind_tem2 = ind_tem1 + b_y score = dy.softmax(ind_tem2) probs1 = score.npvalue() cw = np.argmax(probs1) if cw == self.tgt_vocab[u'</s>']: break trans_sentence.append(self.rtgt_vocab[cw]) return trans_sentence[1:]
def do_one_example(model, encoder, revcoder, decoder, encoder_params, decoder_params, sentence_de, sentence_en, downstream=False, GRU=False): dy.renew_cg() total_words = len(sentence_en) encoder_lookup = encoder_params["lookup"] decoder_lookup = decoder_params["lookup"] R = dy.parameter(decoder_params["R"]) bias = dy.parameter(decoder_params["bias"]) sentence_de_forward = sentence_de sentence_de_reverse = sentence_de[::-1] s = encoder.initial_state() inputs = [dy.lookup(encoder_lookup, de) for de in sentence_de_forward] states = s.add_inputs(inputs) encoder_outputs = [s.output() for s in states] s_reverse = revcoder.initial_state() inputs = [dy.lookup(encoder_lookup, de) for de in sentence_de_reverse] states_reverse = s_reverse.add_inputs(inputs) revcoder_outputs = [s.output() for s in states_reverse] final_coding_output = encoder_outputs[-1] + revcoder_outputs[-1] final_state = states[-1].s() final_state_reverse = states_reverse[-1].s() if GRU: final_coding_state = final_state_reverse + final_state else: final_coding_state = ((final_state_reverse[0] + final_state[0]), (final_state_reverse[1] + final_state[1])) final_combined_outputs = [ revcoder_output + encoder_output for revcoder_output, encoder_output in zip(revcoder_outputs[::-1], encoder_outputs) ] s_init = decoder.initial_state().set_s(final_state_reverse) o_init = s_init.output() alpha_init = dy.softmax( dy.concatenate([ dy.dot_product(o_init, final_combined_output) for final_combined_output in final_combined_outputs ])) c_init = attend_vector(final_combined_outputs, alpha_init) s_0 = s_init o_0 = o_init alpha_0 = alpha_init c_0 = c_init losses = [] for en in sentence_en: #Calculate loss and append to the losses array scores = None if downstream: scores = R * dy.concatenate([o_0, c_0]) + bias else: scores = R * o_0 + bias loss = dy.pickneglogsoftmax(scores, en) losses.append(loss) #Take in input i_t = dy.concatenate([dy.lookup(decoder_lookup, en), c_0]) s_t = s_0.add_input(i_t) o_t = s_t.output() alpha_t = dy.softmax( dy.concatenate([ dy.dot_product(o_t, final_combined_output) for final_combined_output in final_combined_outputs ])) c_t = attend_vector(final_combined_outputs, alpha_t) #Prepare for the next iteration s_0 = s_t o_0 = o_t c_0 = c_t alpha_0 = alpha_t total_loss = dy.esum(losses) return total_loss, total_words
def get_loss(self, input, output): dynet.renew_cg() embedded = self.embed_seq(input) encoded = self.encode_seq(embedded) return self.decode(encoded, output)
def beam_search_generate(self, src_seq, beam_n=5): dynet.renew_cg() embedded = self.embed_seq(src_seq) input_vectors = self.encode_seq(embedded) w = dynet.parameter(self.decoder_w) b = dynet.parameter(self.decoder_b) s = self.dec_lstm.initial_state() s = s.add_input( dynet.concatenate([ input_vectors[-1], dynet.vecInput(self.args.hidden_dim * 2), dynet.vecInput(self.pronouncer.args.hidden_dim * 2) ])) beams = [{"state": s, "out": [], "err": 0}] completed_beams = [] while len(completed_beams) < beam_n: potential_beams = [] for beam in beams: if len(beam["out"]) > 0: attn_vector = self.attend(input_vectors, beam["state"]) embed_vector = self.tgt_lookup[beam["out"][-1].i] spelling = [ self.pronouncer.src_vocab[letter] for letter in beam["out"][-1].s.upper() ] embedded_spelling = self.pronouncer.embed_seq(spelling) pron_vector = self.pronouncer.encode_seq( embedded_spelling)[-1] fpv = dynet.nobackprop(pron_vector) inp = dynet.concatenate([embed_vector, attn_vector, fpv]) s = beam["state"].add_input(inp) out_vector = w * s.output() + b probs = dynet.softmax(out_vector) probs = probs.vec_value() for potential_next_i in range(len(probs)): potential_beams.append({ "state": s, "out": beam["out"] + [self.tgt_vocab[potential_next_i]], "err": beam["err"] - math.log(probs[potential_next_i]) }) potential_beams.sort(key=lambda x: x["err"]) beams = potential_beams[:beam_n - len(completed_beams)] completed_beams = completed_beams + [ beam for beam in beams if beam["out"][-1] == self.tgt_vocab.END_TOK or len(beam["out"]) > 5 * len(src_seq) ] beams = [ beam for beam in beams if beam["out"][-1] != self.tgt_vocab.END_TOK and len(beam["out"]) <= 5 * len(src_seq) ] completed_beams.sort(key=lambda x: x["err"]) return [beam["out"] for beam in completed_beams]
def start_batch(self): dy.renew_cg() self.batch_loss = []
def test_value_sanity(self): dy.renew_cg() x = dy.inputTensor(self.v1) dy.renew_cg() self.assertRaises(RuntimeError, npvalue_callable, x)
def test_inputTensor_except(self): dy.renew_cg() self.assertRaises(TypeError, dy.inputTensor, batched=True)
def parse(self, t, oracle_actions=None): dy.renew_cg() if oracle_actions: oracle_actions = list(oracle_actions) oracle_actions.reverse() stack_top = self.stackRNN.initial_state() toks = list(t) toks.reverse() stack = [] cur = self.buffRNN.initial_state() buffer = [] empty_buffer_emb = dy.parameter(self.pempty_buffer_emb) W_comp = dy.parameter(self.pW_comp) b_comp = dy.parameter(self.pb_comp) W_s2h = dy.parameter(self.pW_s2h) b_s2h = dy.parameter(self.pb_s2h) W_act = dy.parameter(self.pW_act) b_act = dy.parameter(self.pb_act) losses = [] for tok in toks: tok_embedding = self.WORDS_LOOKUP[tok] cur = cur.add_input(tok_embedding) buffer.append((cur.output(), tok_embedding, self.vocab.i2w[tok])) while not (len(stack) == 1 and len(buffer) == 0): # based on parser state, get valid actions valid_actions = [] if len(buffer) > 0: # can only reduce if elements in buffer valid_actions += [SHIFT] if len(stack) >= 2: # can only shift if 2 elements on stack valid_actions += [REDUCE_L, REDUCE_R] # compute probability of each of the actions and choose an action # either from the oracle or if there is no oracle, based on the model action = valid_actions[0] log_probs = None if len(valid_actions) > 1: buffer_embedding = buffer[-1][0] if buffer else empty_buffer_emb stack_embedding = stack[-1][0].output( ) # the stack has something here parser_state = dy.concatenate( [buffer_embedding, stack_embedding]) h = dy.tanh(W_s2h * parser_state + b_s2h) logits = W_act * h + b_act log_probs = dy.log_softmax(logits, valid_actions) if oracle_actions is None: action = max(enumerate(log_probs.vec_value()), key=itemgetter(1))[0] if oracle_actions is not None: action = oracle_actions.pop() if log_probs is not None: # append the action-specific loss losses.append(dy.pick(log_probs, action)) # execute the action to update the parser state if action == SHIFT: _, tok_embedding, token = buffer.pop() stack_state, _ = stack[-1] if stack else (stack_top, '<TOP>') stack_state = stack_state.add_input(tok_embedding) stack.append((stack_state, token)) else: # one of the reduce actions right = stack.pop() left = stack.pop() head, modifier = (left, right) if action == REDUCE_R else (right, left) top_stack_state, _ = stack[-1] if stack else (stack_top, '<TOP>') head_rep, head_tok = head[0].output(), head[1] mod_rep, mod_tok = modifier[0].output(), modifier[1] composed_rep = dy.rectify(W_comp * dy.concatenate([head_rep, mod_rep]) + b_comp) top_stack_state = top_stack_state.add_input(composed_rep) stack.append((top_stack_state, head_tok)) if oracle_actions is None: print('{0} --> {1}'.format(head_tok, mod_tok)) # the head of the tree that remains at the top of the stack is now the root if oracle_actions is None: head = stack.pop()[1] print('ROOT --> {0}'.format(head)) return -dy.esum(losses) if losses else None
history = lambda x, y: open(os.path.join(config.save_dir, 'valid_history'), 'a').write('%.2f %.2f\n' % (x, y)) while global_step < config.train_iters: print time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime()), '\nStart training epoch #%d' % (epoch, ) epoch += 1 lamb = (global_step * 1.0) / config.train_iters for words, tags, arcs, rels, domain_flag in data: num = int(words.shape[1] / 2) words_ = [words[:, :num], words[:, num:]] tags_ = [tags[:, :num], tags[:, num:]] arcs_ = [arcs[:, :num], arcs[:, num:]] rels_ = [rels[:, :num], rels[:, num:]] for step in xrange(2): dy.renew_cg() common_top_recur, private_top_recur, p_fs, p_bs = parser.run_lstm( words_[step], tags_[step]) if domain_flag == 0: arc_accuracy, rel_accuracy, overall_accuracy, parser_loss = parser.run_parser( words_[step], common_top_recur, private_top_recur, arc_targets=arcs_[step], rel_targets=rels_[step]) parser_loss = parser_loss * 0.5 parser_loss.backward() class_loss, class_accurate = parser.run_classifier( common_top_recur, words_[step], domain_flag) class_loss = lamb * class_loss * 0.5 class_loss.backward()
def predict(self, word_indices, char_indices, train=False): """ predict tags for a sentence represented as char+word embeddings """ dynet.renew_cg() # new graph char_emb = [] rev_char_emb = [] wfeatures = [self.wembeds[w] for w in word_indices] if self.c_in_dim > 0: # get representation for words for chars_of_token in char_indices: # use last state as word representation last_state = self.char_rnn.predict_sequence( [self.cembeds[c] for c in chars_of_token])[-1] rev_last_state = self.char_rnn.predict_sequence( [self.cembeds[c] for c in reversed(chars_of_token)])[-1] char_emb.append(last_state) rev_char_emb.append(rev_last_state) features = [ dynet.concatenate([w, c, rev_c]) for w, c, rev_c in zip( wfeatures, char_emb, reversed(rev_char_emb)) ] else: features = wfeatures if train: # only do at training time features = [dynet.noise(fe, self.noise_sigma) for fe in features] output_expected_at_layer = self.h_layers output_expected_at_layer -= 1 # go through layers # input is now combination of w + char emb prev = features prev_rev = features num_layers = self.h_layers for i in range(0, num_layers): predictor = self.predictors["inner"][i] forward_sequence, backward_sequence = predictor.predict_sequence( prev, prev_rev) if i > 0 and self.activation: # activation between LSTM layers forward_sequence = [ self.activation(s) for s in forward_sequence ] backward_sequence = [ self.activation(s) for s in backward_sequence ] if i == output_expected_at_layer: output_predictor = self.predictors["output_layers_dict"] concat_layer = [ dynet.concatenate([f, b]) for f, b in zip( forward_sequence, reversed(backward_sequence)) ] if train and self.noise_sigma > 0.0: concat_layer = [ dynet.noise(fe, self.noise_sigma) for fe in concat_layer ] output = output_predictor.predict_sequence(concat_layer) return output prev = forward_sequence prev_rev = backward_sequence raise Exception("oops should not be here") return None
def train(corpus, bigrams_dims, unigrams_dims, lstm_units, hidden_units, epochs, batch_size, train_data_file, dev_data_file, model_save_file, droprate, unk_params, alpha, beta): start_time = time.time() fm = corpus bigrams_size = corpus.total_bigrams() unigrams_size = corpus.total_unigrams() network = Network( bigrams_size=bigrams_size, unigrams_size=unigrams_size, bigrams_dims=bigrams_dims, unigrams_dims=unigrams_dims, lstm_units=lstm_units, hidden_units=hidden_units, label_size=fm.total_labels(), span_nums=fm.total_span_nums(), droprate=droprate, ) network.init_params() print('Hidden units : {} ,per LSTM units : {}'.format( hidden_units, lstm_units, )) print('Embeddings: bigrams = {}, unigrams = {}'.format( (bigrams_size, bigrams_dims), (unigrams_size, unigrams_dims))) print('Dropout rate : {}'.format(droprate)) print('Parameters initialized in [-0.01,0.01]') print('Random UNKing parameter z = {}'.format(unk_params)) training_data = corpus.gold_data_from_file(train_data_file) num_batched = -(-len(training_data) // batch_size) print('Loaded {} training sentences ({} batches of size {})!'.format( len(training_data), num_batched, batch_size, )) parse_every = -(-num_batched // 4) dev_sentences = SegSentence.load_sentence_file(dev_data_file) print('Loaded {} validation sentences!'.format(len(dev_sentences))) best_acc = FScore() for epoch in xrange(1, epochs + 1): print('............ epoch {} ............'.format(epoch)) total_cost = 0.0 total_states = 0 training_acc = FScore() np.random.shuffle(training_data) for b in xrange(num_batched): batch = training_data[(b * batch_size):(b + 1) * batch_size] explore = [ Segmenter.exploration(example, fm, network, alpha=alpha, beta=beta) for example in batch ] for (_, acc) in explore: training_acc += acc batch = [example for (example, _) in explore] dynet.renew_cg() network.prep_params() errors = [] for example in batch: ## random UNKing ## for (i, uni) in enumerate(example['unigrams']): if uni <= 2: continue u_freq = fm.unigrams_freq_list[uni] drop_prob = unk_params / (unk_params + u_freq) r = np.random.random() if r < drop_prob: example['unigrams'][i] = 0 for (i, bi) in enumerate(example['fwd_bigrams']): if bi <= 2: continue b_freq = fm.bigrams_freq_list[bi] drop_prob = unk_params / (unk_params + b_freq) r = np.random.random() if r < drop_prob: example['fwd_bigrams'][i] = 0 fwd, back = network.evaluate_recurrent( example['fwd_bigrams'], example['unigrams'], ) for (left, right), correct in example['label_data'].items(): # correct = example['label_data'][(left,right)] scores = network.evaluate_labels( fwd, back, left, right) probs = dynet.softmax(scores) loss = -dynet.log(dynet.pick(probs, correct)) errors.append(loss) total_states += len(example['label_data']) batch_error = dynet.esum(errors) total_cost += batch_error.scalar_value() batch_error.backward() network.trainer.update() mean_cost = total_cost / total_states print( '\rBatch {} Mean Cost {:.4f} [Train: {}]'.format( b, mean_cost, training_acc, ), end='', ) sys.stdout.flush() if ((b + 1) % parse_every) == 0 or b == (num_batched - 1): dev_acc = Segmenter.evaluate_corpus( dev_sentences, fm, network, ) print(' [Val: {}]'.format(dev_acc)) if dev_acc.fscore() > best_acc.fscore(): best_acc = dev_acc network.save(model_save_file) print(' [saved model : {}]'.format(model_save_file)) current_time = time.time() runmins = (current_time - start_time) / 60 print(' Elapsed time: {:.2f}m'.format(runmins)) return network
def CalculateLossForDaf(daf, fValidation=False): dy.renew_cg() tagged_daf = {"words": [], "file": daf["file"]} daf = daf["words"] # add a bos before and after seq = ['*BOS*'] + list(' '.join([word for word, _, _ in daf])) + ['*BOS*'] # get all the char encodings for the daf char_embeds = [let_enc(let) for let in seq] # run it through the bilstm char_bilstm_outputs = bilstm(char_embeds) # now iterate and get all the separate word representations by concatenating the bilstm output # before and after the word word_bilstm_outputs = [] iLet_start = 0 for iLet, char in enumerate(seq): # if it is a bos, check if it's at the end of the sequence if char == '*BOS*': if iLet + 1 == len(seq): char = ' ' else: continue # if we are at a space, take this bilstm output and the one at the letter start if char == ' ': cur_word_bilstm_output = dy.concatenate( [char_bilstm_outputs[iLet_start], char_bilstm_outputs[iLet]]) # add it in word_bilstm_outputs.append(cur_word_bilstm_output) # set the iLet_start ocunter to here iLet_start = iLet # safe-check, make sure word bilstm outputs length is the same as the daf if len(word_bilstm_outputs) != len(daf): log_message('Size mismatch!! word_bilstm_outputs: ' + str(len(word_bilstm_outputs)) + ', daf: ' + str(len(daf))) prev_pos_lstm_state = prev_pos_lstm.initial_state().add_input( pos_enc('*BOS*')) all_losses = [] pos_prec = 0.0 pos_items = 0 # now iterate through the bilstm outputs, and each word in the daf for (word, gold_word_class, gold_word_pos), bilstm_output in zip(daf, word_bilstm_outputs): should_backprop = gold_word_class == 1 # create the mlp input, a concatenate of the bilstm output and of the prev pos output mlp_input = dy.concatenate( [bilstm_output, prev_pos_lstm_state.output()]) # run through the class mlp pos_mlp_output = pos_mlp(mlp_input) try: temp_pos_array = pos_mlp_output.npvalue() possible_pos_array = np.zeros(temp_pos_array.shape) pos_list = pos_hashtable[word] #pos_list.add('') #concat 'unknown' as possible pos possible_pos_indices = [ pos_vocab[temp_pos] for temp_pos in pos_list ] possible_pos_array[possible_pos_indices] = temp_pos_array[ possible_pos_indices] except KeyError: possible_pos_array = pos_mlp_output.npvalue() #if fValidation: # possible_pos_array[pos_vocab['']] = 0.0 # don't allow validation to guess UNK b/c it never trained against that TODO this makes sense, right? predicted_word_pos = pos_vocab.getItem(np.argmax(possible_pos_array)) confidence = np.max(possible_pos_array) / np.sum(possible_pos_array) if should_backprop: pos_prec += 1 if predicted_word_pos == gold_word_pos else 0 pos_items += 1 # if we aren't doing validation, calculate the loss if not fValidation: if should_backprop: all_losses.append( -dy.log(dy.pick(pos_mlp_output, pos_vocab[gold_word_pos]))) word_pos_ans = gold_word_pos # otherwise, set the answer to be the argmax else: if should_backprop: pos_conf_matrix(pos_vocab[predicted_word_pos], pos_vocab[gold_word_pos]) word_pos_ans = predicted_word_pos # run through the prev-pos-mlp prev_pos_lstm_state = prev_pos_lstm_state.add_input( pos_enc(word_pos_ans)) #prev_pos_lstm_state = prev_pos_lstm_state.add_input(pos_enc('')) tagged_daf["words"].append({ "word": word, "gold_pos": gold_word_pos, "predicted_pos": predicted_word_pos, "confidence": confidence }) pos_prec = pos_prec / pos_items if pos_items > 0 else None #class_prec = class_prec / class_items if class_items > 0 else None if fValidation: return pos_prec, tagged_daf total_loss = dy.esum(all_losses) if len(all_losses) > 0 else None return total_loss, pos_prec
def propogate(self, sentence): dy.renew_cg() fwdRNN_surface_init = self.fwdRNN_surface.initial_state() bwdRNN_surface_init = self.bwdRNN_surface.initial_state() if self.stemming: fwdRNN_root_init = self.fwdRNN_root.initial_state() bwdRNN_root_init = self.bwdRNN_root.initial_state() fwdRNN_suffix_init = self.fwdRNN_suffix.initial_state() bwdRNN_suffix_init = self.bwdRNN_suffix.initial_state() fwdRNN_context_init = self.fwdRNN_context.initial_state() bwdRNN_context_init = self.bwdRNN_context.initial_state() if self.postagging: W = dy.parameter(self.pW) b = dy.parameter(self.pb) # CONTEXT REPRESENTATIONS surface_words_rep = [] for index, word in enumerate(sentence): encoded_surface_word = self._encode(word.surface_word, self.char2id) surface_word_char_embeddings = self._embed( encoded_surface_word, self.SURFACE_CHARS_LOOKUP) fw_exps_surface_word = fwdRNN_surface_init.transduce( surface_word_char_embeddings) bw_exps_surface_word = bwdRNN_surface_init.transduce( reversed(surface_word_char_embeddings)) surface_word_rep = dy.concatenate( [fw_exps_surface_word[-1], bw_exps_surface_word[-1]]) surface_words_rep.append(surface_word_rep) fw_exps_context = fwdRNN_context_init.transduce(surface_words_rep) bw_exps_context = bwdRNN_context_init.transduce( reversed(surface_words_rep)) root_scores = [] postag_scores = [] # Stem and POS REPRESENTATIONS for index, word in enumerate(sentence): if self.stemming: encoded_roots = [ self._encode(root, self.char2id) for root in word.roots ] encoded_suffixes = [ self._encode(suffix, self.char2id) for suffix in word.suffixes ] roots_embeddings = [ self._embed(root, self.ROOT_CHARS_LOOKUP) for root in encoded_roots ] suffix_embeddings = [ self._embed(suffix, self.ROOT_CHARS_LOOKUP) for suffix in encoded_suffixes ] root_stem_representations = [] for root_embedding, suffix_embedding in zip( roots_embeddings, suffix_embeddings): fw_exps_root = fwdRNN_root_init.transduce(root_embedding) bw_exps_root = bwdRNN_root_init.transduce( reversed(root_embedding)) root_representation = dy.rectify( dy.concatenate([fw_exps_root[-1], bw_exps_root[-1]])) if len(suffix_embedding) != 0: fw_exps_suffix = fwdRNN_suffix_init.transduce( suffix_embedding) bw_exps_suffix = bwdRNN_suffix_init.transduce( reversed(suffix_embedding)) suffix_representation = dy.rectify( dy.concatenate( [fw_exps_suffix[-1], bw_exps_suffix[-1]])) root_stem_representations.append( dy.rectify( dy.esum([ root_representation, suffix_representation ]))) else: root_stem_representations.append(root_representation) left_context_rep = fw_exps_context[index] right_context_rep = bw_exps_context[len(sentence) - index - 1] context_rep = dy.tanh( dy.esum([left_context_rep, right_context_rep])) if self.stemming and self.postagging: root_scores.append( (dy.reshape(context_rep, (1, context_rep.dim()[0][0])) * dy.concatenate(root_stem_representations, 1))[0]) postag_scores.append( (dy.reshape(context_rep, (1, context_rep.dim()[0][0])) * W + b)[0]) elif self.stemming: root_scores.append( (dy.reshape(context_rep, (1, context_rep.dim()[0][0])) * dy.concatenate(root_stem_representations, 1))[0]) elif self.postagging: postag_scores.append( (dy.reshape(context_rep, (1, context_rep.dim()[0][0])) * W + b)[0]) return root_scores, postag_scores
def test_value(self): dy.renew_cg() x = dy.inputTensor(self.v1) self.assertTrue(np.allclose(x.npvalue(), self.v1))
def train(self, train_data, dev_data, num_epochs=120, batch_size=10): for I in range(num_epochs): print("EPOCH NUMBER {}".format(I)) avg_loss = 0. random.shuffle(train_data) good, bad = 0., 0. avg_edit_distance = 0. q = 0. losses = [] preds = [] for i, (x, y) in enumerate(train_data): if i % batch_size == 0 and i > 0: loss_sum = dy.esum(losses) loss_sum.forward() loss_sum.backward() self.trainer.update() losses = [] # evaluate trainset accuracy for (word_probs, y_true) in preds: generated_string = "" for char_probs in word_probs: generated_string += self.I2C[np.argmax( char_probs.npvalue())] if generated_string == y_true: good += 1 else: bad += 1 preds = [] dy.renew_cg() encoded_state, encoded_x = self.encode(x) loss, probs = self.decode(encoded_state, y, encoded_x, train=True) preds.append((probs, y)) losses.append(loss) if i % 2000 == 0 and i > 0: print(i) #print (avg_loss) avg_loss = 0. #self.test(dev_data) #print ('DROPOUT = 0.5') #self.embedding_collector.collect() print("training accuracy: {}".format(good / (good + bad))) acc, edit_dis = self.evaluate(dev_data) self.accs.append(acc) patience = 8 if I > 8 and abs( min(self.accs[-patience:]) - max(self.accs[-patience:])) < 0.01: return 0 if acc > self.best_acc: self.best_acc = acc self.model.save("model.m." + str(self.id)) #self.embedding_collector.collect() return 0
def test_pick_batch_elem(self): dy.renew_cg() x = dy.lookup_batch(self.p, [0, 1]) y = dy.pick_batch_elem(x, 1) self.assertTrue(np.allclose(y.npvalue(), self.pval[1]))
def get_perplexity(self, input, output): dynet.renew_cg() embedded = self.embed_seq(input) encoded = self.encode_seq(embedded) loss = self.decode(encoded, output) return math.exp(loss.value() / (len(output) - 1))
def test_lookup_batch(self): dy.renew_cg() x = dy.lookup_batch(self.p, [0, 1]) self.assertTrue(np.allclose(x.npvalue(), self.pval.T))
def beamDecode(model, encoder, revcoder, decoder, encoder_params, decoder_params, sentence_de, downstream=False, k=10): dy.renew_cg() encoder_lookup = encoder_params["lookup"] decoder_lookup = decoder_params["lookup"] R = dy.parameter(decoder_params["R"]) bias = dy.parameter(decoder_params["bias"]) sentence_de_forward = sentence_de sentence_de_reverse = sentence_de[::-1] s = encoder.initial_state() inputs = [dy.lookup(encoder_lookup, de) for de in sentence_de_forward] states = s.add_inputs(inputs) encoder_outputs = [s.output() for s in states] s_reverse = revcoder.initial_state() inputs = [dy.lookup(encoder_lookup, de) for de in sentence_de_reverse] states_reverse = s_reverse.add_inputs(inputs) revcoder_outputs = [s.output() for s in states_reverse] final_coding_output = encoder_outputs[-1] + revcoder_outputs[-1] final_state = states[-1].s() final_state_reverse = states_reverse[-1].s() final_coding_state = ((final_state_reverse[0] + final_state[0]), (final_state_reverse[1] + final_state[1])) final_combined_outputs = [ revcoder_output + encoder_output for revcoder_output, encoder_output in zip(revcoder_outputs[::-1], encoder_outputs) ] s_init = decoder.initial_state().set_s(final_state_reverse) o_init = s_init.output() alpha_init = dy.softmax( dy.concatenate([ dy.dot_product(o_init, final_combined_output) for final_combined_output in final_combined_outputs ])) c_init = attend_vector(final_combined_outputs, alpha_init) s_0 = s_init o_0 = o_init alpha_0 = alpha_init c_0 = c_init finishedSequences = [] currentSequences = [ (s_0, c_0, o_0, [], 0.0), ] #print "Beam Search Start" while len(finishedSequences) < 2 * k: candidates = [] for currentSequence in currentSequences: scores = None if downstream: scores = dy.affine_transform([ bias, R, dy.concatenate([currentSequence[2], currentSequence[1]]) ]) else: scores = dy.affine_transform([bias, R, currentSequence[2]]) topkTokens = topk(scores.npvalue(), k) for topkToken in topkTokens: loss = (dy.pickneglogsoftmax(scores, topkToken)).value() candidate_i_t = dy.concatenate( [dy.lookup(decoder_lookup, topkToken), currentSequence[1]]) candidate_s_t = currentSequence[0].add_input(candidate_i_t) candidate_o_t = candidate_s_t.output() candidate_alpha_t = dy.softmax( dy.concatenate([ dy.dot_product(candidate_o_t, final_combined_output) for final_combined_output in final_combined_outputs ])) candidate_c_t = attend_vector(final_combined_outputs, candidate_alpha_t) candidate_loss = currentSequence[4] + loss candidate_sequence = copy.deepcopy(currentSequence[3]) candidate_sequence.append(topkToken) candidate = (candidate_s_t, candidate_c_t, candidate_o_t, candidate_sequence, candidate_loss) if topkToken == STOP or len( candidate_sequence) > len(sentence_de) + 10: if len(candidate_sequence) > 3 or len( candidate_sequence) >= len(sentence_de): finishedSequences.append(candidate) else: candidates.append(candidate) #Sort candidates by loss, lesser loss is better candidates.sort(key=lambda x: x[4]) currentSequences = candidates[:k] #print "Beam Search End" finishedSequences.sort(key=lambda x: x[4]) sentence_en = finishedSequences[0][3] return loss, sentence_en
def total_loss(model, data): losses = [] for instance in tqdm(data, desc='Computing loss'): losses.append(nll(model, instance).value()) dy.renew_cg() return sum(losses)
def step_batch(self, batch): dy.renew_cg() W_y = dy.parameter(self.W_y) b_y = dy.parameter(self.b_y) W1_att_e = dy.parameter(self.W1_att_e) W1_att_f = dy.parameter(self.W1_att_f) w2_att = dy.parameter(self.w2_att) W1_att_lang = dy.parameter(self.W1_att_lang) M_s = self.src_lookup M_t = self.tgt_lookup src_sent, tgt_sent = zip(*batch) src_sent = zip(*src_sent) tgt_sent = zip(*tgt_sent) src_sent_rev = list(reversed(src_sent)) # Bidirectional representations l2r_state = self.l2r_builder.initial_state() r2l_state = self.r2l_builder.initial_state() l2r_contexts = [] r2l_contexts = [] for (cw_l2r, cw_r2l) in zip(src_sent, src_sent_rev): l2r_state = l2r_state.add_input(dy.lookup_batch(M_s, cw_l2r)) r2l_state = r2l_state.add_input(dy.lookup_batch(M_s, cw_r2l)) l2r_contexts.append( l2r_state.output()) # [<S>, x_1, x_2, ..., </S>] r2l_contexts.append( r2l_state.output()) # [</S> x_n, x_{n-1}, ... <S>] # encoded_h1 = l2r_state.output() # tem1 = encoded_h1.npvalue() r2l_contexts.reverse() # [<S>, x_1, x_2, ..., </S>] # Combine the left and right representations for every word h_fs = [] for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts): h_fs.append(dy.concatenate([l2r_i, r2l_i])) encoded_h = h_fs[-1] h_fs_matrix = dy.concatenate_cols(h_fs) # h_fs_matrix_t = dy.transpose(h_fs_matrix) losses = [] num_words = 0 # Decoder c_t = dy.vecInput(self.hidden_size * 2) c_t.set([0 for i in xrange(self.contextsize)]) encoded_h = dy.concatenate([encoded_h]) dec_state = self.dec_builder.initial_state([encoded_h]) langeb = dy.lookup_batch(self.langeb_lookup, [ self.lang_li.index(self.rsrc_vocab[item]) for item in src_sent[1] ]) for (cw, nw) in zip(tgt_sent[0:-1], tgt_sent[1:]): embed = dy.lookup_batch(M_t, cw) dec_state = dec_state.add_input(dy.concatenate([embed, c_t])) h_e = dec_state.output() #calculate attention ''' a_t = h_fs_matrix_t * h_e alignment = dy.softmax(a_t) c_t = h_fs_matrix * alignment''' c_t = self.__attention_mlp_batch(h_fs_matrix, h_e, W1_att_e, W1_att_f, w2_att, W1_att_lang, langeb) ind_tem = dy.concatenate([h_e, c_t]) ind_tem1 = W_y * ind_tem ind_tem2 = ind_tem1 + b_y loss = dy.pickneglogsoftmax_batch(ind_tem2, nw) # to modify losses.append(loss) num_words += 1 return dy.sum_batches(dy.esum(losses)), num_words
def train(model, train, dev, trainer, epochs, vec_drop, batch_size, logger, truth_ind=2, print_every=20000): """ training method :param model: attacker model :param train: training set :param dev: development/test set :param trainer: optimizer :param epochs: number of epochs :param vec_drop: representation vector (of the sentence) dropout :param batch_size: size of batch :param logger: :param truth_ind: index of the truth in the train/dev set :param print_every: print every x examples in each epoch :return: """ train_acc_arr, train_loss_arr = [], [] dev_acc_arr, dev_loss_arr = [], [] best_model_epoch = 1 best_score = 0.0 logger.debug('training started') for epoch in xrange(1, epochs + 1): dy.renew_cg() # train epoch_pass(train, model, trainer, True, batch_size, vec_drop, truth_ind, logger, print_every) train_task_acc, loss = epoch_pass(train, model, trainer, False, batch_size, vec_drop, truth_ind, logger, print_every) train_acc_arr.append(train_task_acc) train_loss_arr.append(loss) logger.debug('train, {0}, adv acc: {1}'.format(epoch, train_task_acc)) # dev dev_task_acc, loss = epoch_pass(dev, model, trainer, False, batch_size, vec_drop, truth_ind, logger, print_every) dev_acc_arr.append(dev_task_acc) dev_loss_arr.append(loss) logger.debug('dev, {0}, adv acc: {1}'.format(epoch, dev_task_acc)) log_value('attacker-acc', dev_task_acc, epoch) if dev_task_acc > best_score: best_score = dev_task_acc best_model_epoch = epoch model.save(models_dir + task + '/best_attacker') logger.info('best_score:' + str(best_score)) logger.info('best_epoch:' + str(best_model_epoch)) logger.info('train_task_acc:' + str(train_acc_arr)) logger.info('train_loss:' + str(train_loss_arr)) logger.info('dev_task_acc:' + str(dev_acc_arr)) logger.info('dev_loss:' + str(dev_loss_arr))
def generate(self, s_sentence, orig_sent, max_len=150): dy.renew_cg() global beam_size W_y = dy.parameter(self.params["W_y"]) b_y = dy.parameter(self.params["b_y"]) s_lookup = self.params["s_lookup"] t_lookup = self.params["t_lookup"] # s_sentence = [self.s_vocab[EOS]] + s_sentence + [self.s_vocab[EOS]] # orig_sent = [EOS] + orig_sent + [EOS] s_sentence_rev = list(reversed(s_sentence)) l2r_state = self.l2r_builder.initial_state() r2l_state = self.r2l_builder.initial_state() l2r_contexts = [] r2l_contexts = [] for cw_l2r in s_sentence: l2r_state = l2r_state.add_input(s_lookup[cw_l2r]) l2r_contexts.append(l2r_state.output()) for cw_r2l in s_sentence_rev: r2l_state = r2l_state.add_input(s_lookup[cw_r2l]) r2l_contexts.append(r2l_state.output()) r2l_contexts.reverse() H_f = [] H_f = [dy.concatenate(list(p)) for p in zip(l2r_contexts, r2l_contexts)] H_f_mat = dy.concatenate_cols(H_f) W1_att = dy.parameter(self.params["W1_att"]) w1dt = W1_att * H_f_mat c_t_init = dy.vecInput(2*self.HIDDEN_DIM) # c_t = dy.concatenate([l2r_contexts[-1], r2l_contexts[-1]]) dec_state_init = self.dec_builder.initial_state() possible_list = {("<EOS>", dec_state_init, c_t_init): 0.0} for i in range(len(s_sentence)*2): t_list = {} count_eos = 0 for (poss, dec_state, c_t), prob in possible_list.iteritems(): spl_poss = poss.split(' ') if i > 1 and spl_poss[-1] == "<EOS>": count_eos += 1 t_list[(poss, dec_state, c_t)] = prob continue if unk in spl_poss[-1]: word_to_lookup = unk else: word_to_lookup = spl_poss[-1] embedding = t_lookup[self.t_vocab[word_to_lookup]] x_t = dy.concatenate([c_t, embedding]) dec_state = dec_state.add_input(x_t) c_t, a_t = self.attend(H_f_mat, dec_state, w1dt, len(s_sentence), 1) probs = dy.softmax(W_y*dy.concatenate([c_t, dec_state.output()]) + b_y).vec_value() inds = self.list_nlargest(probs, beam_size) if len(a_t) != len(orig_sent): print len(a_t) print orig_sent exit() for ind in inds: word_to_add = self.t_id_lookup[ind] if word_to_add == unk: max_att_ind = a_t.index(max(a_t)) att_word = orig_sent[max_att_ind] if att_word == word_to_lookup.replace(unk, ""): att_word = orig_sent[max_att_ind + 1] word_to_add = att_word + unk sent = poss + " " + word_to_add sent_prob = prob + math.log(probs[ind]) # lp = (5 + len(sent.split()))/(5+1) # sent_prob = sent_prob/pow(lp, alpha) t_list[(sent, dec_state, c_t)] = sent_prob if count_eos == beam_size: break possible_list = {} for tup in self.dict_nlargest(t_list, beam_size*2): possible_list[tup] = t_list[tup] final_sent = self.dict_nlargest(possible_list, 1)[0][0] return " ".join(final_sent.replace("<EOS>", " ").replace(unk, " ").strip().split())
def predict_beamsearch(self, encoder, input_seq): if len(input_seq) == 0: return [] dn.renew_cg() self.readout = dn.parameter(self.params['readout']) self.bias = dn.parameter(self.params['bias']) self.w_c = dn.parameter(self.params['w_c']) self.u_a = dn.parameter(self.params['u_a']) self.v_a = dn.parameter(self.params['v_a']) self.w_a = dn.parameter(self.params['w_a']) alphas_mtx = [] # encode input sequence blstm_outputs, input_masks = encoder.encode_batch([input_seq]) # complete sequences and their probabilities final_states = [] # initialize the decoder rnn s_0 = self.decoder_rnn.initial_state() # holds beam step index mapped to (sequence, probability, decoder state, attn_vector) tuples beam = {-1: [([common.BEGIN_SEQ], 1.0, s_0, self.init_lookup[0])]} i = 0 # expand another step if didn't reach max length and there's still beams to expand while i < self.max_prediction_len and len(beam[i - 1]) > 0: # create all expansions from the previous beam: new_hypos = [] for hypothesis in beam[i - 1]: prefix_seq, prefix_prob, prefix_decoder, prefix_attn = hypothesis last_hypo_symbol = prefix_seq[-1] # cant expand finished sequences if last_hypo_symbol == common.END_SEQ: continue # expand from the last symbol of the hypothesis try: prev_output_vec = self.output_lookup[self.y2int[last_hypo_symbol]] except KeyError: # not a known symbol print 'impossible to expand, key error: ' + str(last_hypo_symbol) continue decoder_input = dn.concatenate([prev_output_vec, prefix_attn]) s = prefix_decoder.add_input(decoder_input) decoder_rnn_output = s.output() # perform attention step attention_output_vector, alphas = self.attend(blstm_outputs, decoder_rnn_output) # save attention weights for plotting # TODO: add attention weights properly to allow building the attention matrix for the best path if self.plot: val = alphas.vec_value() alphas_mtx.append(val) # compute output probabilities # h = readout * attention_output_vector + bias h = dn.affine_transform([self.bias, self.readout, attention_output_vector]) probs = dn.softmax(h) probs_val = probs.npvalue() # TODO: maybe should choose nbest from all expansions and not only from nbest of each hypothesis? # find best candidate outputs n_best_indices = common.argmax(probs_val, self.beam_size) for index in n_best_indices: p = probs_val[index] new_seq = prefix_seq + [self.int2y[index]] new_prob = prefix_prob * p if new_seq[-1] == common.END_SEQ or i == self.max_prediction_len - 1: # TODO: add to final states only if fits in k best? # if found a complete sequence or max length - add to final states final_states.append((new_seq[1:-1], new_prob)) else: new_hypos.append((new_seq, new_prob, s, attention_output_vector)) # add the most probable expansions from all hypotheses to the beam new_probs = np.array([p for (s, p, r, a) in new_hypos]) argmax_indices = common.argmax(new_probs, self.beam_size) beam[i] = [new_hypos[l] for l in argmax_indices] i += 1 # get nbest results from final states found in search final_probs = np.array([p for (s, p) in final_states]) argmax_indices = common.argmax(final_probs, self.beam_size) nbest_seqs = [final_states[l] for l in argmax_indices] return nbest_seqs, alphas_mtx
def start_batch(self): self.losses = [] dy.renew_cg()
def train(self, train_file, epochs): # matplotlib config loss_values = [] plt.ion() ax = plt.gca() ax.set_xlim([0, 10]) ax.set_ylim([0, 3]) plt.title("Loss over time") plt.xlabel("Minibatch") plt.ylabel("Loss") for i in range(epochs): print('started epoch', (i + 1)) losses = [] train_data = open(train_file, 'r').read().strip().split('\n') # shuffle the training data. random.shuffle(train_data) step = 0 for line in train_data: fields = line.strip().split() features, label = fields[:-1], fields[-1] gold_label = self.vocab.action2id(label) result = self.build_graph(features) # getting loss with respect to negative log softmax function and the gold label. loss = dynet.pickneglogsoftmax(result, gold_label) # appending to the minibatch losses losses.append(loss) step += 1 if len(losses) >= self.properties.minibatch_size: # now we have enough loss values to get loss for minibatch minibatch_loss = dynet.esum(losses) / len(losses) # calling dynet to run forward computation for all minibatch items minibatch_loss.forward() # getting float value of the loss for current minibatch minibatch_loss_value = minibatch_loss.value() # printing info and plotting loss_values.append(minibatch_loss_value) if len(loss_values) % 10 == 0: ax.set_xlim([0, len(loss_values) + 10]) ax.plot(loss_values) plt.draw() plt.pause(0.0001) progress = round(100 * float(step) / len(train_data), 2) print('current minibatch loss', minibatch_loss_value, 'progress:', progress, '%') # calling dynet to run backpropagation minibatch_loss.backward() # calling dynet to change parameter values with respect to current backpropagation self.updater.update() # empty the loss vector losses = [] # refresh the memory of dynet dynet.renew_cg() # there are still some minibatch items in the memory but they are smaller than the minibatch size # so we ask dynet to forget them dynet.renew_cg()
epoch = all_sents = dev_time = all_words = this_words = this_loss = 0 random_training_instance = shuffled_infinite_list(train) for updates in xrange(1, updates): if updates % int(500 / FLAGS_batch_size) == 0: trainer.status() train_time = time.time() - start - dev_time all_words += this_words print("loss=%.4f, words per second=%.4f" % (this_loss / this_words, all_words / train_time)) this_loss = this_words = 0 if updates % int(10000 / FLAGS_batch_size) == 0: dev_start = time.time() dev_loss = dev_words = 0 for i in xrange(0, len(valid), FLAGS_batch_size): valid_minibatch = valid[i:i + FLAGS_batch_size] dy.renew_cg() # Clear existing computation graph. loss_exp, mb_words = lm.minibatch_lm_loss(valid_minibatch) dev_loss += loss_exp.scalar_value() dev_words += mb_words print("nll=%.4f, ppl=%.4f, words=%r, time=%.4f, word_per_sec=%.4f" % (dev_loss / dev_words, math.exp(dev_loss / dev_words), dev_words, train_time, all_words / train_time)) # Compute loss for one training minibatch. minibatch = [ next(random_training_instance) for _ in xrange(FLAGS_batch_size) ] dy.renew_cg() # Clear existing computation graph. loss_exp, mb_words = lm.minibatch_lm_loss(minibatch) this_loss += loss_exp.scalar_value() this_words += mb_words
def parse_event(self, sentence): for e in sentence: e.children = [] self.set_empty_vector() # assign embedding to each word features = self.extract_features(sentence) # initialize sentence parse state = CustomTransitionSystem(sentence) # parse sentence while not state.is_terminal(): op_scores, lbl_scores, tg_scores = self.evaluate_events( state.stack, state.buffer, features) # get numpy arrays op_scores = op_scores.npvalue() lbl_scores = lbl_scores.npvalue() tg_scores = tg_scores.npvalue() # select transition left_lbl_score, left_lbl = max( zip(lbl_scores[1::2], self.ev_relations)) right_lbl_score, right_lbl = max( zip(lbl_scores[2::2], self.ev_relations)) trigger_score, trigger = max(zip(tg_scores[2:], self.i2tg[1:])) # collect all legal transitions transitions = [] # if state.is_legal('shift'): # t = ('shift', None, trigger, op_scores[state.t2i['shift']] + lbl_scores[0] + trigger_score) # transitions.append(t) # if state.is_legal('left_arc'): # t = ('left_arc', left_lbl, None, op_scores[state.t2i['left_arc']] + left_lbl_score + tg_scores[0]) # transitions.append(t) # if state.is_legal('right_arc'): # t = ('right_arc', right_lbl, None, op_scores[state.t2i['right_arc']] + right_lbl_score + tg_scores[0]) # transitions.append(t) # if state.is_legal('drop'): # t = ('drop', None, "O", op_scores[state.t2i['drop']] + lbl_scores[0] + tg_scores[1]) # transitions.append(t) # t = ('drop', None, "Protein", op_scores[state.t2i['drop']] + lbl_scores[0] + tg_scores[4]) # transitions.append(t) # print('LEGAL:', list(state.all_legal())) for lt in state.all_legal(): ix = state.t2i[lt] if lt == "shift": t = (lt, None, trigger, op_scores[state.t2i[lt]] + lbl_scores[0] + trigger_score) transitions.append(t) if lt == "drop": t = (lt, None, "O", op_scores[state.t2i[lt]] + lbl_scores[0] + tg_scores[1]) transitions.append(t) t = (lt, None, "Protein", op_scores[state.t2i[lt]] + lbl_scores[0] + tg_scores[4]) transitions.append(t) if lt in ['left_reduce', 'left_attach']: t = (lt, left_lbl, None, op_scores[state.t2i[lt]] + left_lbl_score + tg_scores[0]) transitions.append(t) if lt in ['right_reduce', 'right_attach']: t = (lt, right_lbl, None, op_scores[state.t2i[lt]] + right_lbl_score + tg_scores[0]) transitions.append(t) if lt == "swap": t = (lt, None, None, op_scores[state.t2i[lt]] + lbl_scores[0] + tg_scores[0]) transitions.append(t) # print('STACK:', state.stack) # print('BUFFER:', state.buffer) # print('ARCS:', state.arcs) # select best legal transition best_act, best_lbl, best_tg, best_socre = max(transitions, key=itemgetter(3)) # print (best_act) # print ("----------------------------") # perform transition state.perform_transition(best_act, best_lbl, best_tg) dy.renew_cg() return sentence