Пример #1
0
    def _step(self, loader, update, log, reporting_fns, verbose=None):
        steps = len(loader)
        pg = create_progress_bar(steps)
        cm = ConfusionMatrix(self.labels)
        epoch_loss = 0
        epoch_div = 0

        for batch_dict in pg(loader):
            dy.renew_cg()
            inputs = self.model.make_input(batch_dict)
            ys = inputs.pop('y')
            preds = self.model.forward(inputs)
            losses = self.model.loss(preds, ys)
            loss = dy.mean_batches(losses)
            batchsz = self._get_batchsz(batch_dict)
            lossv = loss.npvalue().item() * batchsz
            epoch_loss += lossv
            epoch_div += batchsz
            _add_to_cm(cm, ys, preds.npvalue())
            update(loss)
            log(self.optimizer.global_step, lossv, batchsz, reporting_fns)

        metrics = cm.get_all_metrics()
        metrics['avg_loss'] = epoch_loss / float(epoch_div)
        verbose_output(verbose, cm)
        return metrics
Пример #2
0
 def act(self, obs):
     dy.renew_cg()
     action = self.actor(obs).npvalue()
     if self.noise_stddev > 0:
         noise = np.random.randn(self.action_dim) * self.noise_stddev
         action += noise
     return np.clip(action, -1, 1)
Пример #3
0
    def predict(self, batch_dict):
        dy.renew_cg()
        inputs = self.make_input(batch_dict)
        lengths = inputs['lengths']
        unaries = self.compute_unaries(inputs)
        if self.do_crf is True:
            best_path, path_score = self.crf.decode(unaries)
        elif self.constraint is not None:
            best_path, path_score = viterbi(
                unaries,
                dy.log_softmax(dy.inputTensor(self.constraint[1] * -1e4)),
                Offsets.GO, Offsets.EOS,
                norm=True
            )
        else:
            best_path = [np.argmax(x.npvalue(), axis=0) for x in unaries]
        # TODO: RN using autobatching, so none of this is really useful
        # If we want to support batching in this function we have to either loop over the batch
        # or we can just simplify all this code here
        best_path = np.stack(best_path).reshape(-1, 1)  # (T, B)

        best_path = best_path.transpose(1, 0)
        results = []

        for b in range(best_path.shape[0]):
            sentence = best_path[b, :lengths[b]]
            results.append(sentence)
        return results
Пример #4
0
def generate(sent):
    dy.renew_cg()

    src = sent


    #initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    #get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output()

    #generate until a eos tag or max is reached
    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])

    prev_word = sos_trg
    trg_sent = []
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for i in range(MAX_SENT_SIZE):
        #feed the previous word into the lstm, calculate the most likely word, add it to the sentence
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()
        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        probs = (-dy.log_softmax(s)).value()
        next_word = np.argmax(probs)

        if next_word == eos_trg:
            break
        prev_word = next_word
        trg_sent.append(i2w_trg[next_word])
    return trg_sent
Пример #5
0
def calc_scores(words):
  # Create a computation graph, and add parameters
  dy.renew_cg()
  # Take the sum of all the embedding vectors for each word
  score = dy.esum([dy.lookup(W, x) for x in words])
  # Add the bias vector and return
  return score + b
Пример #6
0
    def test_update(self):
        ones=np.ones((10, 10))
        updated = np.ones((10, 10)) * 0.99
        gradient = np.ones((10, 10)) * 0.01

        dy.renew_cg()
        pp1 = dy.parameter(self.p1)
        pp2 = dy.parameter(self.p2)

        a = pp1 * self.lp1[1]
        b = pp2 * self.lp2[1]
        l = dy.dot_product(a, b) / 100
        self.assertEqual(l.scalar_value(),10,msg=str(l.scalar_value()))
        l.backward()

        self.assertTrue(np.allclose(self.p1.grad_as_array(), 0.1 * ones),msg=np.array_str(self.p1.grad_as_array()))
        self.assertTrue(np.allclose(self.p2.grad_as_array(), 0.1 * ones),msg=np.array_str(self.p2.grad_as_array()))
        self.assertTrue(np.allclose(self.lp1.grad_as_array()[1], ones[0]),msg=np.array_str(self.lp1.grad_as_array()))
        self.assertTrue(np.allclose(self.lp2.grad_as_array()[1], ones[0]),msg=np.array_str(self.lp2.grad_as_array()))

        self.trainer.update()



        self.assertTrue(np.allclose(self.p1.as_array(), ones * 0.99),msg=np.array_str(self.p1.as_array()))
        self.assertTrue(np.allclose(self.p2.as_array(), ones * 0.99),msg=np.array_str(self.p2.as_array()))
        self.assertTrue(np.allclose(self.lp1.as_array()[1], ones[0] * 0.9),msg=np.array_str(self.lp1.as_array()[1]))
        self.assertTrue(np.allclose(self.lp2.as_array()[1], ones[0] * 0.9),msg=np.array_str(self.lp2.as_array()))
Пример #7
0
def calc_predict_and_activations(wids, tag, words):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue()
    activations = filters.argmax(axis=0)

    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)

    scores = (W_sm * pool_out + b_sm).npvalue()
    print ('%d ||| %s' % (tag, ' '.join(words)))
    predict = np.argmax(scores)
    print (display_activations(words, activations))
    print ('scores=%s, predict: %d' % (scores, predict))
    features = pool_out.npvalue()
    W = W_sm.npvalue()
    bias = b_sm.npvalue()
    print ('  bias=%s' % bias)
    contributions = W * features
    print (' very bad (%.4f): %s' % (scores[0], contributions[0]))
    print ('      bad (%.4f): %s' % (scores[1], contributions[1]))
    print ('  neutral (%.4f): %s' % (scores[2], contributions[2]))
    print ('     good (%.4f): %s' % (scores[3], contributions[3]))
    print ('very good (%.4f): %s' % (scores[4], contributions[4]))
Пример #8
0
 def test_gradient_sanity(self):
     dy.renew_cg()
     x=dy.inputTensor(self.v1)
     y=dy.inputTensor(self.v2)
     l = dy.dot_product(x,y)
     l.forward()
     self.assertRaises(RuntimeError, gradient_callable, x)
Пример #9
0
    def test_update(self):
        ones = np.ones((10, 10))

        dy.renew_cg()

        a = self.p1 * self.lp1[1]
        b = self.p2 * self.lp2[1]
        loss = dy.dot_product(a, b) / 100

        self.assertEqual(loss.scalar_value(), 10, msg=str(loss.scalar_value()))

        loss.backward()

        # Check the gradients
        self.assertTrue(np.allclose(self.p1.grad_as_array(), 0.1 * ones),
                        msg=np.array_str(self.p1.grad_as_array()))
        self.assertTrue(np.allclose(self.p2.grad_as_array(), 0.1 * ones),
                        msg=np.array_str(self.p2.grad_as_array()))
        self.assertTrue(np.allclose(self.lp1.grad_as_array()[1], ones[
                        0]), msg=np.array_str(self.lp1.grad_as_array()))
        self.assertTrue(np.allclose(self.lp2.grad_as_array()[1], ones[
                        0]), msg=np.array_str(self.lp2.grad_as_array()))

        self.trainer.update()

        # Check the updated parameters
        self.assertTrue(np.allclose(self.p1.as_array(), ones * 0.99),
                        msg=np.array_str(self.p1.as_array()))
        self.assertTrue(np.allclose(self.p2.as_array(), ones * 0.99),
                        msg=np.array_str(self.p2.as_array()))
        self.assertTrue(np.allclose(self.lp1.as_array()[1], ones[
                        0] * 0.9), msg=np.array_str(self.lp1.as_array()[1]))
        self.assertTrue(np.allclose(self.lp2.as_array()[1], ones[
                        0] * 0.9), msg=np.array_str(self.lp2.as_array()))
Пример #10
0
def train(epoch):
    model.training = True
    train_loss = 0
    train_loader = generate_batch_loader(train_data, batch_size=batch_size)
    for batch_idx, data in enumerate(train_loader):

        # Dymanic Construction of Graph
        dy.renew_cg()
        x = dy.inputTensor(data.reshape(-1, 784).T)
        recon_x, mu, logvar = model.forward(x)
        loss = loss_function(recon_x, x, mu, logvar)

        # Forward
        loss_value = loss.value()
        train_loss += loss_value
        # Backward
        loss.backward()
        optimizer.update()

        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_data),
                100. * batch_idx / (len(train_data) / batch_size),
                loss_value / len(data)))

    print('====> Epoch: {} Average loss: {:.4f}'.format(
          epoch, train_loss / len(train_data)))
Пример #11
0
def test(epoch):
    model.training = False
    test_loss = 0
    test_loader = generate_batch_loader(test_data, batch_size=batch_size)
    for i, data in enumerate(test_loader):

        # Dymanic Construction of Graph
        dy.renew_cg()
        x = dy.inputTensor(data.reshape(-1, 784).T)
        recon_x, mu, logvar = model.forward(x)
        loss = loss_function(recon_x, x, mu, logvar)

        # Forward
        loss_value = loss.value()
        test_loss += loss_value

        if i == 0:
            n = min(data.shape[0], 8)
            comparison = np.concatenate([data[:n],
                                         recon_x.npvalue().T.reshape(batch_size, 1, 28, 28)[:n]])
            save_image(comparison,
                     'results/reconstruction_' + str(epoch) + '.png', nrow=n)

    test_loss /= len(test_data)
    print('====> Test set loss: {:.4f}'.format(test_loss))
Пример #12
0
    def test(self, loader, reporting_fns, phase, **kwargs):
        metrics = {}
        total_loss = 0.0
        total_toks = 0
        initial_state = None
        start = time.time()
        for batch_dict in loader:
            dy.renew_cg()
            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            output, initial_state = self.model.forward(inputs, initial_state, train=False)
            loss = self._loss(output, y)
            toks = self._num_toks(batch_dict)
            loss_val = loss.npvalue().item() * toks
            total_loss += loss_val
            total_toks += toks
            if initial_state is not None:
                initial_state = [x.npvalue() for x in initial_state]

        epochs = 0
        if phase == 'Valid':
            self.valid_epochs += 1
            epochs = self.valid_epochs

        metrics = self.calc_metrics(total_loss, total_toks)
        self.report(
            epochs, metrics, start,
            phase, 'EPOCH', reporting_fns
        )
        return metrics
Пример #13
0
 def test_pick_batch_elems(self):
     dy.renew_cg()
     x = dy.lookup_batch(self.p, [0, 1])
     y = dy.pick_batch_elems(x, [0])
     self.assertTrue(np.allclose(y.npvalue(), self.pval[0]))
     z = dy.pick_batch_elems(x, [0, 1])
     self.assertTrue(np.allclose(z.npvalue(), self.pval.T))
Пример #14
0
 def test_concatenate_to_batch(self):
     dy.renew_cg()
     x = dy.lookup_batch(self.p, [0, 1])
     y = dy.pick_batch_elem(x, 0)
     z = dy.pick_batch_elem(x, 1)
     w = dy.concatenate_to_batch([y, z])
     self.assertTrue(np.allclose(w.npvalue(), self.pval.T))
Пример #15
0
 def test_inputTensor_batched_list(self):
     for i in range(4):
         dy.renew_cg()
         input_tensor = self.input_vals.reshape(self.shapes[i])
         xb = dy.inputTensor([np.asarray(x).transpose()
                              for x in input_tensor.transpose()])
         self.assertEqual(
             xb.dim()[0],
             (self.shapes[i][:-1] if i > 0 else (1,)),
             msg="Dimension mismatch"
         )
         self.assertEqual(
             xb.dim()[1],
             self.shapes[i][-1],
             msg="Dimension mismatch"
         )
         self.assertTrue(
             np.allclose(xb.npvalue(), input_tensor),
             msg="Expression value different from initial value"
         )
         self.assertEqual(
             dy.sum_batches(dy.squared_norm(xb)).scalar_value(),
             self.squared_norm,
             msg="Value mismatch"
         )
Пример #16
0
def calc_loss(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]


    #initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    #get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output()
    #now step through the output sentence
    all_losses = []

    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])
    prev_word = trg[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_word in trg[1:]:
        #feed the current state into the 
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        prev_word = next_word
    return dy.esum(all_losses)
Пример #17
0
def calc_scores(words):
    dy.renew_cg()
    word_embs = [dy.lookup(W_emb, x) for x in words]
    fwd_init = fwdLSTM.initial_state()
    fwd_embs = fwd_init.transduce(word_embs)
    bwd_init = bwdLSTM.initial_state()
    bwd_embs = bwd_init.transduce(reversed(word_embs))
    return W_sm * dy.concatenate([fwd_embs[-1], bwd_embs[-1]]) + b_sm
Пример #18
0
 def test_gradient(self):
     dy.renew_cg()
     x=dy.inputTensor(self.v1)
     y=dy.inputTensor(self.v2)
     l = dy.dot_product(x,y)
     l.forward()
     l.backward(full=True)
     self.assertTrue(np.allclose(x.gradient(), self.v2),msg="{}\n{}\n{}".format(l.value(),x.gradient(),self.v2,y.gradient(),self.v2))
Пример #19
0
 def test_get_parameters(self):
     dy.renew_cg()
     self.rnn.initial_state()
     P_p = self.rnn.get_parameters()
     P_e = self.rnn.get_parameter_expressions()
     for l_p,l_e in zip(P_p,P_e):
         for w_p,w_e in zip(l_p,l_e):
             self.assertTrue(np.allclose(w_e.npvalue(),w_p.as_array()))
Пример #20
0
def calc_loss(sents):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src_sents = [x[0] for x in sents]
    tgt_sents = [x[1] for x in sents]
    src_cws = []

    src_len = [len(sent) for sent in src_sents]        
    max_src_len = np.max(src_len)
    num_words = 0

    for i in range(max_src_len):
        src_cws.append([sent[i] for sent in src_sents])


    #initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    #get the output of the first LSTM
    src_output = init_state_src.add_inputs([dy.lookup_batch(LOOKUP_SRC, cws) for cws in src_cws])[-1].output()
    #now decode
    all_losses = []

    # Decoder
    #need to mask padding at end of sentence
    tgt_cws = []
    tgt_len = [len(sent) for sent in sents]
    max_tgt_len = np.max(tgt_len)
    masks = []

    for i in range(max_tgt_len):
        tgt_cws.append([sent[i] if len(sent) > i else eos_trg for sent in tgt_sents])
        mask = [(1 if len(sent) > i else 0) for sent in tgt_sents]
        masks.append(mask)
        num_words += sum(mask)



    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])
    prev_words = tgt_cws[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_words, mask in zip(tgt_cws[1:], masks):
        #feed the current state into the 
        current_state = current_state.add_input(dy.lookup_batch(LOOKUP_TRG, prev_words))
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        loss = (dy.pickneglogsoftmax_batch(s, next_words))
        mask_expr = dy.inputVector(mask)
        mask_expr = dy.reshape(mask_expr, (1,),len(sents))
        mask_loss = loss * mask_expr
        all_losses.append(mask_loss)
        prev_words = next_words
    return dy.sum_batches(dy.esum(all_losses)), num_words
Пример #21
0
 def test_param_change_after_update(self):
     for trainer_type in dy.SimpleSGDTrainer, dy.AdamTrainer:
         trainer = trainer_type(self.m)
         for _ in range(100):
             p = self.m.add_parameters((1,))
             dy.renew_cg()
             p.forward()
             p.backward()
             trainer.update()
Пример #22
0
def calc_loss(words, labels, heads):
    dy.renew_cg()
    word_embs = [dy.lookup(W_emb, x) for x in words]
    fwd_init = fwdLSTM.initial_state()
    fwd_embs = fwd_init.transduce(word_embs)
    bwd_init = bwdLSTM.initial_state()
    bwd_embs = bwd_init.transduce(reversed(word_embs))
    src_encodings = [dy.reshape(dy.concatenate([f, b]), (HID_SIZE * 2, 1)) for f, b in zip(fwd_embs, reversed(bwd_embs))]
    return biaffineParser.decode_loss(src_encodings, ([heads], [labels]))
Пример #23
0
def calc_acc(words, labels, heads):
    dy.renew_cg()
    word_embs = [dy.lookup(W_emb, x) for x in words]
    fwd_init = fwdLSTM.initial_state()
    fwd_embs = fwd_init.transduce(word_embs)
    bwd_init = bwdLSTM.initial_state()
    bwd_embs = bwd_init.transduce(reversed(word_embs))
    src_encodings = [dy.reshape(dy.concatenate([f, b]), (HID_SIZE * 2, 1)) for f, b in zip(fwd_embs, reversed(bwd_embs))]
    pred_heads, pred_labels = biaffineParser.decoding(src_encodings)
    return biaffineParser.cal_accuracy(pred_heads, pred_labels, heads, labels)
Пример #24
0
 def test_save_load(self):
     self.p.forward()
     self.p.backward()
     self.t.update()
     dy.renew_cg()
     v1 = self.p.value()
     dy.save(self.file, [self.p])
     [p2] = dy.load(self.file, self.m2)
     v2 = p2.value()
     self.assertTrue(np.allclose(v1, v2))
Пример #25
0
    def renew_cg(self):
        # renew the compute graph for every single instance
        dy.renew_cg()

        param_exprs = dict()
        param_exprs['U'] = dy.parameter(self.params['word_score_U'])
        param_exprs['pW'] = dy.parameter(self.params['predict_W'])
        param_exprs['pb'] = dy.parameter(self.params['predict_b'])
        param_exprs['<bos>'] = dy.parameter(self.params['<BoS>'])
        self.param_exprs = param_exprs
Пример #26
0
def calc_loss(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]

    # initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    # get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output()

    # Now compute mean and standard deviation of source hidden state.
    W_mean = dy.parameter(W_mean_p)
    V_mean = dy.parameter(V_mean_p)
    b_mean = dy.parameter(b_mean_p)

    W_var = dy.parameter(W_var_p)
    V_var = dy.parameter(V_var_p)
    b_var = dy.parameter(b_var_p)

    # The mean vector from the encoder.
    mu = mlp(src_output, W_mean, V_mean, b_mean)
    # This is the diagonal vector of the log co-variance matrix from the encoder
    # (regard this as log variance is easier for furture implementation)
    log_var = mlp(src_output, W_var, V_var, b_var)

    # Compute KL[N(u(x), sigma(x)) || N(0, I)]
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    kl_loss = -0.5 * dy.sum_elems(1 + log_var - dy.pow(mu, dy.inputVector([2])) - dy.exp(log_var))

    z = reparameterize(mu, log_var)

    # now step through the output sentence
    all_losses = []

    current_state = LSTM_TRG_BUILDER.initial_state().set_s([z, dy.tanh(z)])
    prev_word = trg[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_word in trg[1:]:
        # feed the current state into the
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        prev_word = next_word

    softmax_loss = dy.esum(all_losses)

    return kl_loss, softmax_loss
Пример #27
0
def calc_scores(wids):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)
    return W_sm * pool_out + b_sm
Пример #28
0
def calc_scores(words):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    word_reps = LSTM.transduce([LOOKUP[x] for x in words])

    # Softmax scores
    W = dy.parameter(W_sm)
    b = dy.parameter(b_sm)
    scores = [dy.affine_transform([b, W, x]) for x in word_reps]

    return scores
Пример #29
0
def generate_sent():
  dy.renew_cg()
  hist = [S] * N
  sent = []
  while True:
    p = dy.softmax(calc_score_of_history(hist)).npvalue()
    next_word = np.random.choice(nwords, p=p/p.sum())
    if next_word == S or len(sent) == MAX_LEN:
      break
    sent.append(next_word)
    hist = hist[1:] + [next_word]
  return sent
Пример #30
0
def calc_sent_loss(sent):
  # Create a computation graph
  dy.renew_cg()
  # The initial history is equal to end of sentence symbols
  hist = [S] * N
  # Step through the sentence, including the end of sentence token
  all_losses = []
  for next_word in sent + [S]:
    s = calc_score_of_history(hist)
    all_losses.append(dy.pickneglogsoftmax(s, next_word))
    hist = hist[1:] + [next_word]
  return dy.esum(all_losses)
Пример #31
0
def calc_scores(tree):
  dy.renew_cg()
  emb = builder.expr_for_tree(tree)
  W_sm_exp = dy.parameter(W_sm)
  b_sm_exp = dy.parameter(b_sm)
  return W_sm_exp * emb + b_sm_exp
Пример #32
0
    def _train(self,
               sentences,
               transition_system,
               evaluate,
               relations,
               triggers=None):
        start_chunk = time.time()
        start_all = time.time()
        loss_chunk = 0
        loss_all = 0
        total_chunk = 0
        total_all = 0
        losses = []
        self.set_empty_vector()

        for i, sentence in enumerate(sentences):
            if i != 0 and i % 100 == 0:
                end = time.time()
                print(
                    f'count: {i}\tloss: {loss_chunk/total_chunk:.4f}\ttime: {end-start_chunk:,.2f} secs'
                )
                start_chunk = end
                loss_chunk = 0
                total_chunk = 0
            if len(sentence) > 2:
                for e in sentence:
                    e.children = []
                # assign embedding to each word
                features = self.extract_features(sentence, drop_word=True)
                # initialize sentence parse
                state = transition_system(sentence)
                # parse sentence
                while not state.is_terminal():
                    outputs = evaluate(state.stack, state.buffer, features)

                    if triggers:
                        dy_op_scores, dy_lbl_scores, dy_tg_scores = outputs
                        np_tg_scores = dy_tg_scores.npvalue()
                    else:
                        dy_op_scores, dy_lbl_scores = outputs

                    # get scores in numpy arrays
                    np_op_scores = dy_op_scores.npvalue()
                    np_lbl_scores = dy_lbl_scores.npvalue()

                    # collect all legal transitions
                    legal_transitions = []
                    if triggers:
                        for lt in state.all_legal():
                            ix = state.t2i[lt]
                            if lt == "shift":
                                for j, tg in enumerate(triggers[1:], start=2):
                                    if (hasattr(state.buffer[0], 'is_parent')
                                            and state.buffer[0].is_parent
                                            and j == 1):
                                        continue
                                    t = new_Transition(
                                        lt, None, tg, np_op_scores[ix] +
                                        np_lbl_scores[0] + np_tg_scores[j],
                                        dy_op_scores[ix] + dy_lbl_scores[0] +
                                        dy_tg_scores[j])
                                    legal_transitions.append(t)
                            if lt == "drop":
                                t = new_Transition(
                                    lt, None, "O", np_op_scores[ix] +
                                    np_lbl_scores[0] + np_tg_scores[1],
                                    dy_op_scores[ix] + dy_lbl_scores[0] +
                                    dy_tg_scores[1])
                                legal_transitions.append(t)
                                t = new_Transition(
                                    lt, None, "Protein", np_op_scores[ix] +
                                    np_lbl_scores[0] + np_tg_scores[4],
                                    dy_op_scores[ix] + dy_lbl_scores[0] +
                                    dy_tg_scores[4])
                                legal_transitions.append(t)
                            if lt in ['left_reduce', 'left_attach']:
                                for j, r in enumerate(relations):
                                    k = 1 + 2 * j
                                    t = new_Transition(
                                        lt, r, None, np_op_scores[ix] +
                                        np_lbl_scores[k] + np_tg_scores[0],
                                        dy_op_scores[ix] + dy_lbl_scores[k] +
                                        dy_tg_scores[0])
                                    legal_transitions.append(t)
                            if lt in ['right_reduce', 'right_attach']:
                                for j, r in enumerate(relations):
                                    k = 2 + 2 * j
                                    t = new_Transition(
                                        lt, r, None, np_op_scores[ix] +
                                        np_lbl_scores[k] + np_tg_scores[0],
                                        dy_op_scores[ix] + dy_lbl_scores[k] +
                                        dy_tg_scores[0])
                                    legal_transitions.append(t)
                            if lt == "swap":
                                t = new_Transition(
                                    lt, None, None, np_op_scores[ix] +
                                    np_lbl_scores[0] + np_tg_scores[0],
                                    dy_op_scores[ix] + dy_lbl_scores[0] +
                                    dy_tg_scores[0])
                                legal_transitions.append(t)
                        # collect all correct transitions
                        correct_transitions = []
                        for t in legal_transitions:
                            if state.is_correct(t[0]):
                                relation = state.get_arc_label_for_transition(
                                    t[0])
                                label = state.get_token_label_for_transition(
                                    t[0])
                                if t[1] == relation and t[2] == label:
                                    correct_transitions.append(t)

                    else:
                        if state.is_legal('shift'):
                            ix = state.t2i['shift']
                            t = Transition('shift', None, None,
                                           np_op_scores[ix] + np_lbl_scores[0],
                                           dy_op_scores[ix] + dy_lbl_scores[0])
                            legal_transitions.append(t)
                        if state.is_legal('left_arc'):
                            ix = state.t2i['left_arc']
                            for j, r in enumerate(relations):
                                k = 1 + 2 * j
                                t = Transition(
                                    'left_arc', r, None,
                                    np_op_scores[ix] + np_lbl_scores[k],
                                    dy_op_scores[ix] + dy_lbl_scores[k])
                                legal_transitions.append(t)
                        if state.is_legal('right_arc'):
                            ix = state.t2i['right_arc']
                            for j, r in enumerate(relations):
                                k = 2 + 2 * j
                                t = Transition(
                                    'right_arc', r, None,
                                    np_op_scores[ix] + np_lbl_scores[k],
                                    dy_op_scores[ix] + dy_lbl_scores[k])
                                legal_transitions.append(t)
                        if state.is_legal('drop'):
                            ix = state.t2i['drop']
                            t = Transition('drop', None, None,
                                           np_op_scores[ix] + np_lbl_scores[0],
                                           dy_op_scores[ix] + dy_lbl_scores[0])
                            legal_transitions.append(t)
                        # collect all correct transitions
                        correct_transitions = []
                        for t in legal_transitions:
                            if state.is_correct(t):
                                if t.op in [
                                        'shift', 'drop'
                                ] or t.label in state.stack[-1].relation:
                                    correct_transitions.append(t)

                    # select transition
                    best_correct = max(correct_transitions,
                                       key=attrgetter('score'))

                    i_correct = legal_transitions.index(best_correct)
                    legal_scores = dy.concatenate(
                        [t.dy_score for t in legal_transitions])
                    loss = dy.hinge(legal_scores, i_correct)
                    # loss = dy.pickneglogsoftmax(legal_scores, i_correct)
                    losses.append(loss)

                    # perform transition
                    selected = best_correct
                    state.perform_transition(selected.op, selected.label,
                                             selected.trigger)

            # process losses in chunks
            if len(losses) > 50:
                try:
                    loss = dy.esum(losses)
                    l = loss.scalar_value()
                    loss.backward()
                    self.trainer.update()
                except:
                    pass
                dy.renew_cg()
                self.set_empty_vector()
                losses = []
                loss_chunk += l
                loss_all += l
                total_chunk += 1
                total_all += 1

        # consider any remaining losses
        if len(losses) > 0:
            try:
                loss = dy.esum(losses)
                loss.scalar_value()
                loss.backward()
                self.trainer.update()
            except:
                pass
            dy.renew_cg()
            self.set_empty_vector()

        end = time.time()
        print('\nend of epoch')
        print(
            f'count: {i}\tloss: {loss_all/total_all:.4f}\ttime: {end-start_all:,.2f} secs'
        )
Пример #33
0
    def translate_sentence(self, sent):
        dy.renew_cg()
        W_y = dy.parameter(self.W_y)
        b_y = dy.parameter(self.b_y)
        W1_att_e = dy.parameter(self.W1_att_e)
        W1_att_f = dy.parameter(self.W1_att_f)
        w2_att = dy.parameter(self.w2_att)
        W1_att_lang = dy.parameter(self.W1_att_lang)
        M_s = self.src_lookup
        M_t = self.tgt_lookup

        src_sent = sent
        src_sent_rev = list(reversed(sent))

        # Bidirectional representations
        l2r_state = self.l2r_builder.initial_state()
        r2l_state = self.r2l_builder.initial_state()
        l2r_contexts = []
        r2l_contexts = []
        for (cw_l2r, cw_r2l) in zip(src_sent, src_sent_rev):
            l2r_state = l2r_state.add_input(M_s[cw_l2r])
            r2l_state = r2l_state.add_input(M_s[cw_r2l])
            l2r_contexts.append(
                l2r_state.output())  # [<S>, x_1, x_2, ..., </S>]
            r2l_contexts.append(
                r2l_state.output())  # [</S> x_n, x_{n-1}, ... <S>]
        r2l_contexts.reverse()  # [<S>, x_1, x_2, ..., </S>]

        # Combine the left and right representations for every word
        h_fs = []
        for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts):
            h_fs.append(dy.concatenate([l2r_i, r2l_i]))
        encoded_h = h_fs[-1]
        h_fs_matrix = dy.concatenate_cols(h_fs)

        # Decoder
        trans_sentence = [u'<s>']
        cw = self.tgt_vocab[u'<s>']
        c_t = dy.vecInput(self.hidden_size * 2)
        c_t.set([0 for i in xrange(self.contextsize)])
        dec_state = self.dec_builder.initial_state([encoded_h])

        langid = self.lang_li.index(self.rsrc_vocab[sent[1]])
        langeb = dy.lookup(self.langeb_lookup, langid)

        while len(trans_sentence) < self.max_len:
            embed = dy.lookup(M_t, cw)
            dec_state = dec_state.add_input(dy.concatenate([embed, c_t]))
            h_e = dec_state.output()
            # c_t = self.__attention_mlp(h_fs_matrix, h_e)
            c_t = self.__attention_mlp(h_fs_matrix, h_e, W1_att_e, W1_att_f,
                                       w2_att, W1_att_lang, langeb)

            # calculate attention
            '''
            a_t = h_fs_matrix_t * h_e
            alignment = dy.softmax(a_t)
            c_t = h_fs_matrix * alignment'''
            ind_tem = dy.concatenate([h_e, c_t])
            ind_tem1 = W_y * ind_tem
            ind_tem2 = ind_tem1 + b_y
            score = dy.softmax(ind_tem2)
            probs1 = score.npvalue()
            cw = np.argmax(probs1)
            if cw == self.tgt_vocab[u'</s>']:
                break
            trans_sentence.append(self.rtgt_vocab[cw])
        return trans_sentence[1:]
Пример #34
0
def do_one_example(model,
                   encoder,
                   revcoder,
                   decoder,
                   encoder_params,
                   decoder_params,
                   sentence_de,
                   sentence_en,
                   downstream=False,
                   GRU=False):
    dy.renew_cg()
    total_words = len(sentence_en)
    encoder_lookup = encoder_params["lookup"]
    decoder_lookup = decoder_params["lookup"]
    R = dy.parameter(decoder_params["R"])
    bias = dy.parameter(decoder_params["bias"])

    sentence_de_forward = sentence_de
    sentence_de_reverse = sentence_de[::-1]

    s = encoder.initial_state()
    inputs = [dy.lookup(encoder_lookup, de) for de in sentence_de_forward]
    states = s.add_inputs(inputs)
    encoder_outputs = [s.output() for s in states]

    s_reverse = revcoder.initial_state()
    inputs = [dy.lookup(encoder_lookup, de) for de in sentence_de_reverse]
    states_reverse = s_reverse.add_inputs(inputs)
    revcoder_outputs = [s.output() for s in states_reverse]

    final_coding_output = encoder_outputs[-1] + revcoder_outputs[-1]
    final_state = states[-1].s()
    final_state_reverse = states_reverse[-1].s()

    if GRU:
        final_coding_state = final_state_reverse + final_state
    else:
        final_coding_state = ((final_state_reverse[0] + final_state[0]),
                              (final_state_reverse[1] + final_state[1]))
    final_combined_outputs = [
        revcoder_output + encoder_output for revcoder_output, encoder_output in
        zip(revcoder_outputs[::-1], encoder_outputs)
    ]

    s_init = decoder.initial_state().set_s(final_state_reverse)
    o_init = s_init.output()
    alpha_init = dy.softmax(
        dy.concatenate([
            dy.dot_product(o_init, final_combined_output)
            for final_combined_output in final_combined_outputs
        ]))
    c_init = attend_vector(final_combined_outputs, alpha_init)

    s_0 = s_init
    o_0 = o_init
    alpha_0 = alpha_init
    c_0 = c_init

    losses = []

    for en in sentence_en:
        #Calculate loss and append to the losses array
        scores = None
        if downstream:
            scores = R * dy.concatenate([o_0, c_0]) + bias
        else:
            scores = R * o_0 + bias
        loss = dy.pickneglogsoftmax(scores, en)
        losses.append(loss)

        #Take in input
        i_t = dy.concatenate([dy.lookup(decoder_lookup, en), c_0])
        s_t = s_0.add_input(i_t)
        o_t = s_t.output()
        alpha_t = dy.softmax(
            dy.concatenate([
                dy.dot_product(o_t, final_combined_output)
                for final_combined_output in final_combined_outputs
            ]))
        c_t = attend_vector(final_combined_outputs, alpha_t)

        #Prepare for the next iteration
        s_0 = s_t
        o_0 = o_t
        c_0 = c_t
        alpha_0 = alpha_t

    total_loss = dy.esum(losses)
    return total_loss, total_words
Пример #35
0
 def get_loss(self, input, output):
     dynet.renew_cg()
     embedded = self.embed_seq(input)
     encoded = self.encode_seq(embedded)
     return self.decode(encoded, output)
Пример #36
0
    def beam_search_generate(self, src_seq, beam_n=5):
        dynet.renew_cg()

        embedded = self.embed_seq(src_seq)
        input_vectors = self.encode_seq(embedded)

        w = dynet.parameter(self.decoder_w)
        b = dynet.parameter(self.decoder_b)

        s = self.dec_lstm.initial_state()
        s = s.add_input(
            dynet.concatenate([
                input_vectors[-1],
                dynet.vecInput(self.args.hidden_dim * 2),
                dynet.vecInput(self.pronouncer.args.hidden_dim * 2)
            ]))
        beams = [{"state": s, "out": [], "err": 0}]
        completed_beams = []
        while len(completed_beams) < beam_n:
            potential_beams = []
            for beam in beams:
                if len(beam["out"]) > 0:
                    attn_vector = self.attend(input_vectors, beam["state"])
                    embed_vector = self.tgt_lookup[beam["out"][-1].i]
                    spelling = [
                        self.pronouncer.src_vocab[letter]
                        for letter in beam["out"][-1].s.upper()
                    ]
                    embedded_spelling = self.pronouncer.embed_seq(spelling)
                    pron_vector = self.pronouncer.encode_seq(
                        embedded_spelling)[-1]
                    fpv = dynet.nobackprop(pron_vector)
                    inp = dynet.concatenate([embed_vector, attn_vector, fpv])
                    s = beam["state"].add_input(inp)

                out_vector = w * s.output() + b
                probs = dynet.softmax(out_vector)
                probs = probs.vec_value()

                for potential_next_i in range(len(probs)):
                    potential_beams.append({
                        "state":
                        s,
                        "out":
                        beam["out"] + [self.tgt_vocab[potential_next_i]],
                        "err":
                        beam["err"] - math.log(probs[potential_next_i])
                    })

            potential_beams.sort(key=lambda x: x["err"])
            beams = potential_beams[:beam_n - len(completed_beams)]
            completed_beams = completed_beams + [
                beam
                for beam in beams if beam["out"][-1] == self.tgt_vocab.END_TOK
                or len(beam["out"]) > 5 * len(src_seq)
            ]
            beams = [
                beam for beam in beams
                if beam["out"][-1] != self.tgt_vocab.END_TOK
                and len(beam["out"]) <= 5 * len(src_seq)
            ]
        completed_beams.sort(key=lambda x: x["err"])
        return [beam["out"] for beam in completed_beams]
Пример #37
0
 def start_batch(self):
     dy.renew_cg()
     self.batch_loss = []
Пример #38
0
 def test_value_sanity(self):
     dy.renew_cg()
     x = dy.inputTensor(self.v1)
     dy.renew_cg()
     self.assertRaises(RuntimeError, npvalue_callable, x)
Пример #39
0
 def test_inputTensor_except(self):
     dy.renew_cg()
     self.assertRaises(TypeError, dy.inputTensor, batched=True)
Пример #40
0
    def parse(self, t, oracle_actions=None):
        dy.renew_cg()
        if oracle_actions:
            oracle_actions = list(oracle_actions)
            oracle_actions.reverse()
        stack_top = self.stackRNN.initial_state()
        toks = list(t)
        toks.reverse()
        stack = []
        cur = self.buffRNN.initial_state()
        buffer = []
        empty_buffer_emb = dy.parameter(self.pempty_buffer_emb)
        W_comp = dy.parameter(self.pW_comp)
        b_comp = dy.parameter(self.pb_comp)
        W_s2h = dy.parameter(self.pW_s2h)
        b_s2h = dy.parameter(self.pb_s2h)
        W_act = dy.parameter(self.pW_act)
        b_act = dy.parameter(self.pb_act)
        losses = []
        for tok in toks:
            tok_embedding = self.WORDS_LOOKUP[tok]
            cur = cur.add_input(tok_embedding)
            buffer.append((cur.output(), tok_embedding, self.vocab.i2w[tok]))

        while not (len(stack) == 1 and len(buffer) == 0):
            # based on parser state, get valid actions
            valid_actions = []
            if len(buffer) > 0:  # can only reduce if elements in buffer
                valid_actions += [SHIFT]
            if len(stack) >= 2:  # can only shift if 2 elements on stack
                valid_actions += [REDUCE_L, REDUCE_R]

            # compute probability of each of the actions and choose an action
            # either from the oracle or if there is no oracle, based on the model
            action = valid_actions[0]
            log_probs = None
            if len(valid_actions) > 1:
                buffer_embedding = buffer[-1][0] if buffer else empty_buffer_emb
                stack_embedding = stack[-1][0].output(
                )  # the stack has something here
                parser_state = dy.concatenate(
                    [buffer_embedding, stack_embedding])
                h = dy.tanh(W_s2h * parser_state + b_s2h)
                logits = W_act * h + b_act
                log_probs = dy.log_softmax(logits, valid_actions)
                if oracle_actions is None:
                    action = max(enumerate(log_probs.vec_value()),
                                 key=itemgetter(1))[0]
            if oracle_actions is not None:
                action = oracle_actions.pop()
                if log_probs is not None:
                    # append the action-specific loss
                    losses.append(dy.pick(log_probs, action))

            # execute the action to update the parser state
            if action == SHIFT:
                _, tok_embedding, token = buffer.pop()
                stack_state, _ = stack[-1] if stack else (stack_top, '<TOP>')
                stack_state = stack_state.add_input(tok_embedding)
                stack.append((stack_state, token))
            else:  # one of the reduce actions
                right = stack.pop()
                left = stack.pop()
                head, modifier = (left,
                                  right) if action == REDUCE_R else (right,
                                                                     left)
                top_stack_state, _ = stack[-1] if stack else (stack_top,
                                                              '<TOP>')
                head_rep, head_tok = head[0].output(), head[1]
                mod_rep, mod_tok = modifier[0].output(), modifier[1]
                composed_rep = dy.rectify(W_comp *
                                          dy.concatenate([head_rep, mod_rep]) +
                                          b_comp)
                top_stack_state = top_stack_state.add_input(composed_rep)
                stack.append((top_stack_state, head_tok))
                if oracle_actions is None:
                    print('{0} --> {1}'.format(head_tok, mod_tok))

        # the head of the tree that remains at the top of the stack is now the root
        if oracle_actions is None:
            head = stack.pop()[1]
            print('ROOT --> {0}'.format(head))
        return -dy.esum(losses) if losses else None
 history = lambda x, y: open(os.path.join(config.save_dir, 'valid_history'),
                             'a').write('%.2f %.2f\n' % (x, y))
 while global_step < config.train_iters:
     print time.strftime(
         "%Y-%m-%d %H:%M:%S",
         time.localtime()), '\nStart training epoch #%d' % (epoch, )
     epoch += 1
     lamb = (global_step * 1.0) / config.train_iters
     for words, tags, arcs, rels, domain_flag in data:
         num = int(words.shape[1] / 2)
         words_ = [words[:, :num], words[:, num:]]
         tags_ = [tags[:, :num], tags[:, num:]]
         arcs_ = [arcs[:, :num], arcs[:, num:]]
         rels_ = [rels[:, :num], rels[:, num:]]
         for step in xrange(2):
             dy.renew_cg()
             common_top_recur, private_top_recur, p_fs, p_bs = parser.run_lstm(
                 words_[step], tags_[step])
             if domain_flag == 0:
                 arc_accuracy, rel_accuracy, overall_accuracy, parser_loss = parser.run_parser(
                     words_[step],
                     common_top_recur,
                     private_top_recur,
                     arc_targets=arcs_[step],
                     rel_targets=rels_[step])
                 parser_loss = parser_loss * 0.5
                 parser_loss.backward()
             class_loss, class_accurate = parser.run_classifier(
                 common_top_recur, words_[step], domain_flag)
             class_loss = lamb * class_loss * 0.5
             class_loss.backward()
Пример #42
0
    def predict(self, word_indices, char_indices, train=False):
        """
        predict tags for a sentence represented as char+word embeddings
        """
        dynet.renew_cg()  # new graph

        char_emb = []
        rev_char_emb = []

        wfeatures = [self.wembeds[w] for w in word_indices]

        if self.c_in_dim > 0:
            # get representation for words
            for chars_of_token in char_indices:
                # use last state as word representation
                last_state = self.char_rnn.predict_sequence(
                    [self.cembeds[c] for c in chars_of_token])[-1]
                rev_last_state = self.char_rnn.predict_sequence(
                    [self.cembeds[c] for c in reversed(chars_of_token)])[-1]
                char_emb.append(last_state)
                rev_char_emb.append(rev_last_state)

            features = [
                dynet.concatenate([w, c, rev_c]) for w, c, rev_c in zip(
                    wfeatures, char_emb, reversed(rev_char_emb))
            ]
        else:
            features = wfeatures

        if train:  # only do at training time
            features = [dynet.noise(fe, self.noise_sigma) for fe in features]

        output_expected_at_layer = self.h_layers
        output_expected_at_layer -= 1

        # go through layers
        # input is now combination of w + char emb
        prev = features
        prev_rev = features
        num_layers = self.h_layers
        for i in range(0, num_layers):
            predictor = self.predictors["inner"][i]
            forward_sequence, backward_sequence = predictor.predict_sequence(
                prev, prev_rev)
            if i > 0 and self.activation:
                # activation between LSTM layers
                forward_sequence = [
                    self.activation(s) for s in forward_sequence
                ]
                backward_sequence = [
                    self.activation(s) for s in backward_sequence
                ]

            if i == output_expected_at_layer:
                output_predictor = self.predictors["output_layers_dict"]
                concat_layer = [
                    dynet.concatenate([f, b]) for f, b in zip(
                        forward_sequence, reversed(backward_sequence))
                ]

                if train and self.noise_sigma > 0.0:
                    concat_layer = [
                        dynet.noise(fe, self.noise_sigma)
                        for fe in concat_layer
                    ]
                output = output_predictor.predict_sequence(concat_layer)
                return output

            prev = forward_sequence
            prev_rev = backward_sequence

        raise Exception("oops should not be here")
        return None
Пример #43
0
    def train(corpus, bigrams_dims, unigrams_dims, lstm_units, hidden_units,
              epochs, batch_size, train_data_file, dev_data_file,
              model_save_file, droprate, unk_params, alpha, beta):

        start_time = time.time()

        fm = corpus
        bigrams_size = corpus.total_bigrams()
        unigrams_size = corpus.total_unigrams()

        network = Network(
            bigrams_size=bigrams_size,
            unigrams_size=unigrams_size,
            bigrams_dims=bigrams_dims,
            unigrams_dims=unigrams_dims,
            lstm_units=lstm_units,
            hidden_units=hidden_units,
            label_size=fm.total_labels(),
            span_nums=fm.total_span_nums(),
            droprate=droprate,
        )

        network.init_params()

        print('Hidden units : {} ,per LSTM units : {}'.format(
            hidden_units,
            lstm_units,
        ))

        print('Embeddings: bigrams = {}, unigrams = {}'.format(
            (bigrams_size, bigrams_dims), (unigrams_size, unigrams_dims)))

        print('Dropout rate : {}'.format(droprate))
        print('Parameters initialized in [-0.01,0.01]')
        print('Random UNKing parameter z = {}'.format(unk_params))

        training_data = corpus.gold_data_from_file(train_data_file)
        num_batched = -(-len(training_data) // batch_size)
        print('Loaded {} training sentences ({} batches of size {})!'.format(
            len(training_data),
            num_batched,
            batch_size,
        ))

        parse_every = -(-num_batched // 4)

        dev_sentences = SegSentence.load_sentence_file(dev_data_file)
        print('Loaded {} validation sentences!'.format(len(dev_sentences)))

        best_acc = FScore()
        for epoch in xrange(1, epochs + 1):
            print('............ epoch {} ............'.format(epoch))

            total_cost = 0.0
            total_states = 0
            training_acc = FScore()

            np.random.shuffle(training_data)

            for b in xrange(num_batched):
                batch = training_data[(b * batch_size):(b + 1) * batch_size]

                explore = [
                    Segmenter.exploration(example,
                                          fm,
                                          network,
                                          alpha=alpha,
                                          beta=beta) for example in batch
                ]
                for (_, acc) in explore:
                    training_acc += acc

                batch = [example for (example, _) in explore]

                dynet.renew_cg()
                network.prep_params()

                errors = []
                for example in batch:
                    ## random UNKing ##
                    for (i, uni) in enumerate(example['unigrams']):
                        if uni <= 2:
                            continue

                        u_freq = fm.unigrams_freq_list[uni]
                        drop_prob = unk_params / (unk_params + u_freq)
                        r = np.random.random()
                        if r < drop_prob:
                            example['unigrams'][i] = 0

                    for (i, bi) in enumerate(example['fwd_bigrams']):
                        if bi <= 2:
                            continue

                        b_freq = fm.bigrams_freq_list[bi]
                        drop_prob = unk_params / (unk_params + b_freq)
                        r = np.random.random()
                        if r < drop_prob:
                            example['fwd_bigrams'][i] = 0

                    fwd, back = network.evaluate_recurrent(
                        example['fwd_bigrams'],
                        example['unigrams'],
                    )

                    for (left,
                         right), correct in example['label_data'].items():
                        # correct = example['label_data'][(left,right)]
                        scores = network.evaluate_labels(
                            fwd, back, left, right)

                        probs = dynet.softmax(scores)
                        loss = -dynet.log(dynet.pick(probs, correct))
                        errors.append(loss)
                    total_states += len(example['label_data'])

                batch_error = dynet.esum(errors)
                total_cost += batch_error.scalar_value()
                batch_error.backward()
                network.trainer.update()

                mean_cost = total_cost / total_states

                print(
                    '\rBatch {}  Mean Cost {:.4f}  [Train: {}]'.format(
                        b,
                        mean_cost,
                        training_acc,
                    ),
                    end='',
                )
                sys.stdout.flush()

                if ((b + 1) % parse_every) == 0 or b == (num_batched - 1):
                    dev_acc = Segmenter.evaluate_corpus(
                        dev_sentences,
                        fm,
                        network,
                    )
                    print(' [Val: {}]'.format(dev_acc))

                    if dev_acc.fscore() > best_acc.fscore():
                        best_acc = dev_acc
                        network.save(model_save_file)
                        print('    [saved model : {}]'.format(model_save_file))

            current_time = time.time()
            runmins = (current_time - start_time) / 60
            print(' Elapsed time: {:.2f}m'.format(runmins))

        return network
Пример #44
0
def CalculateLossForDaf(daf, fValidation=False):
    dy.renew_cg()
    tagged_daf = {"words": [], "file": daf["file"]}
    daf = daf["words"]

    # add a bos before and after
    seq = ['*BOS*'] + list(' '.join([word for word, _, _ in daf])) + ['*BOS*']

    # get all the char encodings for the daf
    char_embeds = [let_enc(let) for let in seq]

    # run it through the bilstm
    char_bilstm_outputs = bilstm(char_embeds)

    # now iterate and get all the separate word representations by concatenating the bilstm output
    # before and after the word
    word_bilstm_outputs = []
    iLet_start = 0
    for iLet, char in enumerate(seq):
        # if it is a bos, check if it's at the end of the sequence
        if char == '*BOS*':
            if iLet + 1 == len(seq): char = ' '
            else: continue
        # if we are at a space, take this bilstm output and the one at the letter start
        if char == ' ':
            cur_word_bilstm_output = dy.concatenate(
                [char_bilstm_outputs[iLet_start], char_bilstm_outputs[iLet]])
            # add it in
            word_bilstm_outputs.append(cur_word_bilstm_output)

            # set the iLet_start ocunter to here
            iLet_start = iLet

    # safe-check, make sure word bilstm outputs length is the same as the daf
    if len(word_bilstm_outputs) != len(daf):
        log_message('Size mismatch!! word_bilstm_outputs: ' +
                    str(len(word_bilstm_outputs)) + ', daf: ' + str(len(daf)))

    prev_pos_lstm_state = prev_pos_lstm.initial_state().add_input(
        pos_enc('*BOS*'))

    all_losses = []
    pos_prec = 0.0
    pos_items = 0

    # now iterate through the bilstm outputs, and each word in the daf
    for (word, gold_word_class,
         gold_word_pos), bilstm_output in zip(daf, word_bilstm_outputs):
        should_backprop = gold_word_class == 1

        # create the mlp input, a concatenate of the bilstm output and of the prev pos output
        mlp_input = dy.concatenate(
            [bilstm_output, prev_pos_lstm_state.output()])

        # run through the class mlp
        pos_mlp_output = pos_mlp(mlp_input)
        try:
            temp_pos_array = pos_mlp_output.npvalue()
            possible_pos_array = np.zeros(temp_pos_array.shape)
            pos_list = pos_hashtable[word]
            #pos_list.add('') #concat 'unknown' as possible pos
            possible_pos_indices = [
                pos_vocab[temp_pos] for temp_pos in pos_list
            ]
            possible_pos_array[possible_pos_indices] = temp_pos_array[
                possible_pos_indices]
        except KeyError:
            possible_pos_array = pos_mlp_output.npvalue()
            #if fValidation:
            #    possible_pos_array[pos_vocab['']] = 0.0 # don't allow validation to guess UNK b/c it never trained against that TODO this makes sense, right?
        predicted_word_pos = pos_vocab.getItem(np.argmax(possible_pos_array))
        confidence = np.max(possible_pos_array) / np.sum(possible_pos_array)
        if should_backprop:
            pos_prec += 1 if predicted_word_pos == gold_word_pos else 0
            pos_items += 1

        # if we aren't doing validation, calculate the loss
        if not fValidation:
            if should_backprop:
                all_losses.append(
                    -dy.log(dy.pick(pos_mlp_output, pos_vocab[gold_word_pos])))
            word_pos_ans = gold_word_pos
        # otherwise, set the answer to be the argmax
        else:
            if should_backprop:
                pos_conf_matrix(pos_vocab[predicted_word_pos],
                                pos_vocab[gold_word_pos])
            word_pos_ans = predicted_word_pos

        # run through the prev-pos-mlp
        prev_pos_lstm_state = prev_pos_lstm_state.add_input(
            pos_enc(word_pos_ans))

        #prev_pos_lstm_state = prev_pos_lstm_state.add_input(pos_enc(''))

        tagged_daf["words"].append({
            "word": word,
            "gold_pos": gold_word_pos,
            "predicted_pos": predicted_word_pos,
            "confidence": confidence
        })

    pos_prec = pos_prec / pos_items if pos_items > 0 else None
    #class_prec = class_prec / class_items if class_items > 0 else None

    if fValidation:
        return pos_prec, tagged_daf

    total_loss = dy.esum(all_losses) if len(all_losses) > 0 else None
    return total_loss, pos_prec
    def propogate(self, sentence):
        dy.renew_cg()
        fwdRNN_surface_init = self.fwdRNN_surface.initial_state()
        bwdRNN_surface_init = self.bwdRNN_surface.initial_state()
        if self.stemming:
            fwdRNN_root_init = self.fwdRNN_root.initial_state()
            bwdRNN_root_init = self.bwdRNN_root.initial_state()
            fwdRNN_suffix_init = self.fwdRNN_suffix.initial_state()
            bwdRNN_suffix_init = self.bwdRNN_suffix.initial_state()
        fwdRNN_context_init = self.fwdRNN_context.initial_state()
        bwdRNN_context_init = self.bwdRNN_context.initial_state()
        if self.postagging:
            W = dy.parameter(self.pW)
            b = dy.parameter(self.pb)

        # CONTEXT REPRESENTATIONS
        surface_words_rep = []
        for index, word in enumerate(sentence):
            encoded_surface_word = self._encode(word.surface_word,
                                                self.char2id)
            surface_word_char_embeddings = self._embed(
                encoded_surface_word, self.SURFACE_CHARS_LOOKUP)
            fw_exps_surface_word = fwdRNN_surface_init.transduce(
                surface_word_char_embeddings)
            bw_exps_surface_word = bwdRNN_surface_init.transduce(
                reversed(surface_word_char_embeddings))
            surface_word_rep = dy.concatenate(
                [fw_exps_surface_word[-1], bw_exps_surface_word[-1]])
            surface_words_rep.append(surface_word_rep)
        fw_exps_context = fwdRNN_context_init.transduce(surface_words_rep)
        bw_exps_context = bwdRNN_context_init.transduce(
            reversed(surface_words_rep))
        root_scores = []
        postag_scores = []
        # Stem and POS REPRESENTATIONS
        for index, word in enumerate(sentence):
            if self.stemming:
                encoded_roots = [
                    self._encode(root, self.char2id) for root in word.roots
                ]
                encoded_suffixes = [
                    self._encode(suffix, self.char2id)
                    for suffix in word.suffixes
                ]
                roots_embeddings = [
                    self._embed(root, self.ROOT_CHARS_LOOKUP)
                    for root in encoded_roots
                ]
                suffix_embeddings = [
                    self._embed(suffix, self.ROOT_CHARS_LOOKUP)
                    for suffix in encoded_suffixes
                ]
                root_stem_representations = []
                for root_embedding, suffix_embedding in zip(
                        roots_embeddings, suffix_embeddings):
                    fw_exps_root = fwdRNN_root_init.transduce(root_embedding)
                    bw_exps_root = bwdRNN_root_init.transduce(
                        reversed(root_embedding))
                    root_representation = dy.rectify(
                        dy.concatenate([fw_exps_root[-1], bw_exps_root[-1]]))
                    if len(suffix_embedding) != 0:
                        fw_exps_suffix = fwdRNN_suffix_init.transduce(
                            suffix_embedding)
                        bw_exps_suffix = bwdRNN_suffix_init.transduce(
                            reversed(suffix_embedding))
                        suffix_representation = dy.rectify(
                            dy.concatenate(
                                [fw_exps_suffix[-1], bw_exps_suffix[-1]]))
                        root_stem_representations.append(
                            dy.rectify(
                                dy.esum([
                                    root_representation, suffix_representation
                                ])))
                    else:
                        root_stem_representations.append(root_representation)

            left_context_rep = fw_exps_context[index]
            right_context_rep = bw_exps_context[len(sentence) - index - 1]
            context_rep = dy.tanh(
                dy.esum([left_context_rep, right_context_rep]))
            if self.stemming and self.postagging:
                root_scores.append(
                    (dy.reshape(context_rep, (1, context_rep.dim()[0][0])) *
                     dy.concatenate(root_stem_representations, 1))[0])
                postag_scores.append(
                    (dy.reshape(context_rep,
                                (1, context_rep.dim()[0][0])) * W + b)[0])
            elif self.stemming:
                root_scores.append(
                    (dy.reshape(context_rep, (1, context_rep.dim()[0][0])) *
                     dy.concatenate(root_stem_representations, 1))[0])
            elif self.postagging:
                postag_scores.append(
                    (dy.reshape(context_rep,
                                (1, context_rep.dim()[0][0])) * W + b)[0])

        return root_scores, postag_scores
Пример #46
0
 def test_value(self):
     dy.renew_cg()
     x = dy.inputTensor(self.v1)
     self.assertTrue(np.allclose(x.npvalue(), self.v1))
Пример #47
0
    def train(self, train_data, dev_data, num_epochs=120, batch_size=10):

        for I in range(num_epochs):

            print("EPOCH NUMBER {}".format(I))

            avg_loss = 0.
            random.shuffle(train_data)
            good, bad = 0., 0.
            avg_edit_distance = 0.
            q = 0.
            losses = []

            preds = []

            for i, (x, y) in enumerate(train_data):

                if i % batch_size == 0 and i > 0:

                    loss_sum = dy.esum(losses)
                    loss_sum.forward()
                    loss_sum.backward()
                    self.trainer.update()
                    losses = []

                    # evaluate trainset accuracy

                    for (word_probs, y_true) in preds:

                        generated_string = ""
                        for char_probs in word_probs:

                            generated_string += self.I2C[np.argmax(
                                char_probs.npvalue())]

                        if generated_string == y_true:

                            good += 1
                        else:
                            bad += 1

                    preds = []
                    dy.renew_cg()

                encoded_state, encoded_x = self.encode(x)

                loss, probs = self.decode(encoded_state,
                                          y,
                                          encoded_x,
                                          train=True)
                preds.append((probs, y))

                losses.append(loss)

                if i % 2000 == 0 and i > 0:
                    print(i)
                    #print (avg_loss)
                    avg_loss = 0.
                    #self.test(dev_data)

            #print ('DROPOUT = 0.5')
            #self.embedding_collector.collect()
            print("training accuracy: {}".format(good / (good + bad)))
            acc, edit_dis = self.evaluate(dev_data)
            self.accs.append(acc)

            patience = 8

            if I > 8 and abs(
                    min(self.accs[-patience:]) -
                    max(self.accs[-patience:])) < 0.01:

                return 0

            if acc > self.best_acc:
                self.best_acc = acc
                self.model.save("model.m." + str(self.id))

            #self.embedding_collector.collect()

        return 0
Пример #48
0
 def test_pick_batch_elem(self):
     dy.renew_cg()
     x = dy.lookup_batch(self.p, [0, 1])
     y = dy.pick_batch_elem(x, 1)
     self.assertTrue(np.allclose(y.npvalue(), self.pval[1]))
Пример #49
0
 def get_perplexity(self, input, output):
     dynet.renew_cg()
     embedded = self.embed_seq(input)
     encoded = self.encode_seq(embedded)
     loss = self.decode(encoded, output)
     return math.exp(loss.value() / (len(output) - 1))
Пример #50
0
 def test_lookup_batch(self):
     dy.renew_cg()
     x = dy.lookup_batch(self.p, [0, 1])
     self.assertTrue(np.allclose(x.npvalue(), self.pval.T))
Пример #51
0
def beamDecode(model,
               encoder,
               revcoder,
               decoder,
               encoder_params,
               decoder_params,
               sentence_de,
               downstream=False,
               k=10):
    dy.renew_cg()
    encoder_lookup = encoder_params["lookup"]
    decoder_lookup = decoder_params["lookup"]
    R = dy.parameter(decoder_params["R"])
    bias = dy.parameter(decoder_params["bias"])

    sentence_de_forward = sentence_de
    sentence_de_reverse = sentence_de[::-1]

    s = encoder.initial_state()
    inputs = [dy.lookup(encoder_lookup, de) for de in sentence_de_forward]
    states = s.add_inputs(inputs)
    encoder_outputs = [s.output() for s in states]

    s_reverse = revcoder.initial_state()
    inputs = [dy.lookup(encoder_lookup, de) for de in sentence_de_reverse]
    states_reverse = s_reverse.add_inputs(inputs)
    revcoder_outputs = [s.output() for s in states_reverse]

    final_coding_output = encoder_outputs[-1] + revcoder_outputs[-1]
    final_state = states[-1].s()
    final_state_reverse = states_reverse[-1].s()
    final_coding_state = ((final_state_reverse[0] + final_state[0]),
                          (final_state_reverse[1] + final_state[1]))
    final_combined_outputs = [
        revcoder_output + encoder_output for revcoder_output, encoder_output in
        zip(revcoder_outputs[::-1], encoder_outputs)
    ]

    s_init = decoder.initial_state().set_s(final_state_reverse)
    o_init = s_init.output()
    alpha_init = dy.softmax(
        dy.concatenate([
            dy.dot_product(o_init, final_combined_output)
            for final_combined_output in final_combined_outputs
        ]))
    c_init = attend_vector(final_combined_outputs, alpha_init)

    s_0 = s_init
    o_0 = o_init
    alpha_0 = alpha_init
    c_0 = c_init

    finishedSequences = []
    currentSequences = [
        (s_0, c_0, o_0, [], 0.0),
    ]

    #print "Beam Search Start"
    while len(finishedSequences) < 2 * k:
        candidates = []
        for currentSequence in currentSequences:
            scores = None
            if downstream:
                scores = dy.affine_transform([
                    bias, R,
                    dy.concatenate([currentSequence[2], currentSequence[1]])
                ])
            else:
                scores = dy.affine_transform([bias, R, currentSequence[2]])
            topkTokens = topk(scores.npvalue(), k)
            for topkToken in topkTokens:
                loss = (dy.pickneglogsoftmax(scores, topkToken)).value()
                candidate_i_t = dy.concatenate(
                    [dy.lookup(decoder_lookup, topkToken), currentSequence[1]])
                candidate_s_t = currentSequence[0].add_input(candidate_i_t)
                candidate_o_t = candidate_s_t.output()
                candidate_alpha_t = dy.softmax(
                    dy.concatenate([
                        dy.dot_product(candidate_o_t, final_combined_output)
                        for final_combined_output in final_combined_outputs
                    ]))
                candidate_c_t = attend_vector(final_combined_outputs,
                                              candidate_alpha_t)
                candidate_loss = currentSequence[4] + loss
                candidate_sequence = copy.deepcopy(currentSequence[3])
                candidate_sequence.append(topkToken)
                candidate = (candidate_s_t, candidate_c_t, candidate_o_t,
                             candidate_sequence, candidate_loss)
                if topkToken == STOP or len(
                        candidate_sequence) > len(sentence_de) + 10:
                    if len(candidate_sequence) > 3 or len(
                            candidate_sequence) >= len(sentence_de):
                        finishedSequences.append(candidate)
                else:
                    candidates.append(candidate)
        #Sort candidates by loss, lesser loss is better
        candidates.sort(key=lambda x: x[4])
        currentSequences = candidates[:k]

    #print "Beam Search End"

    finishedSequences.sort(key=lambda x: x[4])
    sentence_en = finishedSequences[0][3]

    return loss, sentence_en
Пример #52
0
def total_loss(model, data):
    losses = []
    for instance in tqdm(data, desc='Computing loss'):
        losses.append(nll(model, instance).value())
        dy.renew_cg()
    return sum(losses)
Пример #53
0
    def step_batch(self, batch):
        dy.renew_cg()

        W_y = dy.parameter(self.W_y)
        b_y = dy.parameter(self.b_y)
        W1_att_e = dy.parameter(self.W1_att_e)
        W1_att_f = dy.parameter(self.W1_att_f)
        w2_att = dy.parameter(self.w2_att)
        W1_att_lang = dy.parameter(self.W1_att_lang)

        M_s = self.src_lookup
        M_t = self.tgt_lookup
        src_sent, tgt_sent = zip(*batch)
        src_sent = zip(*src_sent)
        tgt_sent = zip(*tgt_sent)
        src_sent_rev = list(reversed(src_sent))

        # Bidirectional representations
        l2r_state = self.l2r_builder.initial_state()
        r2l_state = self.r2l_builder.initial_state()

        l2r_contexts = []
        r2l_contexts = []
        for (cw_l2r, cw_r2l) in zip(src_sent, src_sent_rev):
            l2r_state = l2r_state.add_input(dy.lookup_batch(M_s, cw_l2r))
            r2l_state = r2l_state.add_input(dy.lookup_batch(M_s, cw_r2l))
            l2r_contexts.append(
                l2r_state.output())  # [<S>, x_1, x_2, ..., </S>]
            r2l_contexts.append(
                r2l_state.output())  # [</S> x_n, x_{n-1}, ... <S>]

        # encoded_h1 = l2r_state.output()
        # tem1 = encoded_h1.npvalue()

        r2l_contexts.reverse()  # [<S>, x_1, x_2, ..., </S>]

        # Combine the left and right representations for every word
        h_fs = []
        for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts):
            h_fs.append(dy.concatenate([l2r_i, r2l_i]))

        encoded_h = h_fs[-1]

        h_fs_matrix = dy.concatenate_cols(h_fs)
        # h_fs_matrix_t = dy.transpose(h_fs_matrix)

        losses = []
        num_words = 0

        # Decoder
        c_t = dy.vecInput(self.hidden_size * 2)
        c_t.set([0 for i in xrange(self.contextsize)])
        encoded_h = dy.concatenate([encoded_h])
        dec_state = self.dec_builder.initial_state([encoded_h])

        langeb = dy.lookup_batch(self.langeb_lookup, [
            self.lang_li.index(self.rsrc_vocab[item]) for item in src_sent[1]
        ])

        for (cw, nw) in zip(tgt_sent[0:-1], tgt_sent[1:]):
            embed = dy.lookup_batch(M_t, cw)
            dec_state = dec_state.add_input(dy.concatenate([embed, c_t]))
            h_e = dec_state.output()
            #calculate attention
            '''
            a_t = h_fs_matrix_t * h_e
            alignment = dy.softmax(a_t)
            c_t = h_fs_matrix * alignment'''
            c_t = self.__attention_mlp_batch(h_fs_matrix, h_e, W1_att_e,
                                             W1_att_f, w2_att, W1_att_lang,
                                             langeb)
            ind_tem = dy.concatenate([h_e, c_t])
            ind_tem1 = W_y * ind_tem
            ind_tem2 = ind_tem1 + b_y
            loss = dy.pickneglogsoftmax_batch(ind_tem2, nw)  # to modify
            losses.append(loss)
            num_words += 1
        return dy.sum_batches(dy.esum(losses)), num_words
Пример #54
0
def train(model,
          train,
          dev,
          trainer,
          epochs,
          vec_drop,
          batch_size,
          logger,
          truth_ind=2,
          print_every=20000):
    """
    training method
    :param model: attacker model
    :param train: training set
    :param dev: development/test set
    :param trainer: optimizer
    :param epochs: number of epochs
    :param vec_drop: representation vector (of the sentence) dropout
    :param batch_size: size of batch
    :param logger:
    :param truth_ind: index of the truth in the train/dev set
    :param print_every: print every x examples in each epoch
    :return:
    """
    train_acc_arr, train_loss_arr = [], []
    dev_acc_arr, dev_loss_arr = [], []
    best_model_epoch = 1
    best_score = 0.0

    logger.debug('training started')
    for epoch in xrange(1, epochs + 1):
        dy.renew_cg()

        # train
        epoch_pass(train, model, trainer, True, batch_size, vec_drop,
                   truth_ind, logger, print_every)
        train_task_acc, loss = epoch_pass(train, model, trainer, False,
                                          batch_size, vec_drop, truth_ind,
                                          logger, print_every)
        train_acc_arr.append(train_task_acc)
        train_loss_arr.append(loss)
        logger.debug('train, {0}, adv acc: {1}'.format(epoch, train_task_acc))

        # dev
        dev_task_acc, loss = epoch_pass(dev, model, trainer, False, batch_size,
                                        vec_drop, truth_ind, logger,
                                        print_every)
        dev_acc_arr.append(dev_task_acc)
        dev_loss_arr.append(loss)

        logger.debug('dev, {0}, adv acc: {1}'.format(epoch, dev_task_acc))
        log_value('attacker-acc', dev_task_acc, epoch)
        if dev_task_acc > best_score:
            best_score = dev_task_acc
            best_model_epoch = epoch
            model.save(models_dir + task + '/best_attacker')
    logger.info('best_score:' + str(best_score))
    logger.info('best_epoch:' + str(best_model_epoch))
    logger.info('train_task_acc:' + str(train_acc_arr))
    logger.info('train_loss:' + str(train_loss_arr))
    logger.info('dev_task_acc:' + str(dev_acc_arr))
    logger.info('dev_loss:' + str(dev_loss_arr))
Пример #55
0
    def generate(self, s_sentence, orig_sent, max_len=150):

        dy.renew_cg()

        global beam_size

        W_y = dy.parameter(self.params["W_y"])
        b_y = dy.parameter(self.params["b_y"])
        s_lookup = self.params["s_lookup"]
        t_lookup = self.params["t_lookup"]

        # s_sentence = [self.s_vocab[EOS]] + s_sentence + [self.s_vocab[EOS]]
        # orig_sent = [EOS] + orig_sent + [EOS]
        s_sentence_rev = list(reversed(s_sentence))

        l2r_state = self.l2r_builder.initial_state()
        r2l_state = self.r2l_builder.initial_state()
        l2r_contexts = []
        r2l_contexts = []

        for cw_l2r in s_sentence:
            l2r_state = l2r_state.add_input(s_lookup[cw_l2r])
            l2r_contexts.append(l2r_state.output())

        for cw_r2l in s_sentence_rev:
            r2l_state = r2l_state.add_input(s_lookup[cw_r2l])
            r2l_contexts.append(r2l_state.output())

        r2l_contexts.reverse()

        H_f = []
        H_f = [dy.concatenate(list(p)) for p in zip(l2r_contexts, r2l_contexts)]

        H_f_mat = dy.concatenate_cols(H_f)
        W1_att = dy.parameter(self.params["W1_att"])
        w1dt = W1_att * H_f_mat

        c_t_init = dy.vecInput(2*self.HIDDEN_DIM)
        # c_t = dy.concatenate([l2r_contexts[-1], r2l_contexts[-1]])

        dec_state_init = self.dec_builder.initial_state()

        possible_list = {("<EOS>", dec_state_init, c_t_init): 0.0}

        for i in range(len(s_sentence)*2):
            t_list = {}

            count_eos = 0

            for (poss, dec_state, c_t), prob in possible_list.iteritems():
                spl_poss = poss.split(' ')

                if i > 1 and spl_poss[-1] == "<EOS>":
                    count_eos += 1
                    t_list[(poss, dec_state, c_t)] = prob
                    continue

                if unk in spl_poss[-1]:
                    word_to_lookup = unk
                else:
                    word_to_lookup = spl_poss[-1]

                embedding = t_lookup[self.t_vocab[word_to_lookup]]

                x_t = dy.concatenate([c_t, embedding])
                dec_state = dec_state.add_input(x_t)
                c_t, a_t = self.attend(H_f_mat, dec_state, w1dt, len(s_sentence), 1)
                probs = dy.softmax(W_y*dy.concatenate([c_t, dec_state.output()]) + b_y).vec_value()

                inds = self.list_nlargest(probs, beam_size)

                if len(a_t) != len(orig_sent):
                    print len(a_t)
                    print orig_sent
                    exit()

                for ind in inds:
                    word_to_add = self.t_id_lookup[ind]

                    if word_to_add == unk:
                        max_att_ind = a_t.index(max(a_t))
                        att_word = orig_sent[max_att_ind]

                        if att_word == word_to_lookup.replace(unk, ""):
                            att_word = orig_sent[max_att_ind + 1]

                        word_to_add = att_word + unk

                    sent = poss + " " + word_to_add
                    sent_prob = prob + math.log(probs[ind])

                    # lp = (5 + len(sent.split()))/(5+1)
                    # sent_prob = sent_prob/pow(lp, alpha)

                    t_list[(sent, dec_state, c_t)] = sent_prob

            if count_eos == beam_size:
                break

            possible_list = {}

            for tup in self.dict_nlargest(t_list, beam_size*2):
                possible_list[tup] = t_list[tup]

        final_sent = self.dict_nlargest(possible_list, 1)[0][0]
        return " ".join(final_sent.replace("<EOS>", " ").replace(unk, " ").strip().split())
Пример #56
0
    def predict_beamsearch(self, encoder, input_seq):
        if len(input_seq) == 0:
            return []

        dn.renew_cg()

        self.readout = dn.parameter(self.params['readout'])
        self.bias = dn.parameter(self.params['bias'])
        self.w_c = dn.parameter(self.params['w_c'])
        self.u_a = dn.parameter(self.params['u_a'])
        self.v_a = dn.parameter(self.params['v_a'])
        self.w_a = dn.parameter(self.params['w_a'])

        alphas_mtx = []

        # encode input sequence
        blstm_outputs, input_masks = encoder.encode_batch([input_seq])

        # complete sequences and their probabilities
        final_states = []

        # initialize the decoder rnn
        s_0 = self.decoder_rnn.initial_state()

        # holds beam step index mapped to (sequence, probability, decoder state, attn_vector) tuples
        beam = {-1: [([common.BEGIN_SEQ], 1.0, s_0, self.init_lookup[0])]}
        i = 0

        # expand another step if didn't reach max length and there's still beams to expand
        while i < self.max_prediction_len and len(beam[i - 1]) > 0:

            # create all expansions from the previous beam:
            new_hypos = []
            for hypothesis in beam[i - 1]:
                prefix_seq, prefix_prob, prefix_decoder, prefix_attn = hypothesis
                last_hypo_symbol = prefix_seq[-1]

                # cant expand finished sequences
                if last_hypo_symbol == common.END_SEQ:
                    continue

                # expand from the last symbol of the hypothesis
                try:
                    prev_output_vec = self.output_lookup[self.y2int[last_hypo_symbol]]
                except KeyError:
                    # not a known symbol
                    print 'impossible to expand, key error: ' + str(last_hypo_symbol)
                    continue

                decoder_input = dn.concatenate([prev_output_vec, prefix_attn])
                s = prefix_decoder.add_input(decoder_input)
                decoder_rnn_output = s.output()

                # perform attention step
                attention_output_vector, alphas = self.attend(blstm_outputs, decoder_rnn_output)

                # save attention weights for plotting
                # TODO: add attention weights properly to allow building the attention matrix for the best path
                if self.plot:
                    val = alphas.vec_value()
                    alphas_mtx.append(val)

                # compute output probabilities
                # h = readout * attention_output_vector + bias
                h = dn.affine_transform([self.bias, self.readout, attention_output_vector])
                probs = dn.softmax(h)
                probs_val = probs.npvalue()

                # TODO: maybe should choose nbest from all expansions and not only from nbest of each hypothesis?
                # find best candidate outputs
                n_best_indices = common.argmax(probs_val, self.beam_size)
                for index in n_best_indices:
                    p = probs_val[index]
                    new_seq = prefix_seq + [self.int2y[index]]
                    new_prob = prefix_prob * p
                    if new_seq[-1] == common.END_SEQ or i == self.max_prediction_len - 1:
                        # TODO: add to final states only if fits in k best?
                        # if found a complete sequence or max length - add to final states
                        final_states.append((new_seq[1:-1], new_prob))
                    else:
                        new_hypos.append((new_seq, new_prob, s, attention_output_vector))

            # add the most probable expansions from all hypotheses to the beam
            new_probs = np.array([p for (s, p, r, a) in new_hypos])
            argmax_indices = common.argmax(new_probs, self.beam_size)
            beam[i] = [new_hypos[l] for l in argmax_indices]
            i += 1

        # get nbest results from final states found in search
        final_probs = np.array([p for (s, p) in final_states])
        argmax_indices = common.argmax(final_probs, self.beam_size)
        nbest_seqs = [final_states[l] for l in argmax_indices]

        return nbest_seqs, alphas_mtx
Пример #57
0
 def start_batch(self):
     self.losses = []
     dy.renew_cg()
Пример #58
0
    def train(self, train_file, epochs):
        # matplotlib config
        loss_values = []
        plt.ion()
        ax = plt.gca()
        ax.set_xlim([0, 10])
        ax.set_ylim([0, 3])
        plt.title("Loss over time")
        plt.xlabel("Minibatch")
        plt.ylabel("Loss")

        for i in range(epochs):
            print('started epoch', (i + 1))
            losses = []
            train_data = open(train_file, 'r').read().strip().split('\n')

            # shuffle the training data.
            random.shuffle(train_data)

            step = 0
            for line in train_data:

                fields = line.strip().split()
                features, label = fields[:-1], fields[-1]
                gold_label = self.vocab.action2id(label)
                result = self.build_graph(features)

                # getting loss with respect to negative log softmax function and the gold label.
                loss = dynet.pickneglogsoftmax(result, gold_label)

                # appending to the minibatch losses
                losses.append(loss)
                step += 1

                if len(losses) >= self.properties.minibatch_size:
                    # now we have enough loss values to get loss for minibatch
                    minibatch_loss = dynet.esum(losses) / len(losses)

                    # calling dynet to run forward computation for all minibatch items
                    minibatch_loss.forward()

                    # getting float value of the loss for current minibatch
                    minibatch_loss_value = minibatch_loss.value()

                    # printing info and plotting
                    loss_values.append(minibatch_loss_value)
                    if len(loss_values) % 10 == 0:
                        ax.set_xlim([0, len(loss_values) + 10])
                        ax.plot(loss_values)
                        plt.draw()
                        plt.pause(0.0001)
                        progress = round(100 * float(step) / len(train_data),
                                         2)
                        print('current minibatch loss', minibatch_loss_value,
                              'progress:', progress, '%')

                    # calling dynet to run backpropagation
                    minibatch_loss.backward()

                    # calling dynet to change parameter values with respect to current backpropagation
                    self.updater.update()

                    # empty the loss vector
                    losses = []

                    # refresh the memory of dynet
                    dynet.renew_cg()

            # there are still some minibatch items in the memory but they are smaller than the minibatch size
            # so we ask dynet to forget them
            dynet.renew_cg()
Пример #59
0
epoch = all_sents = dev_time = all_words = this_words = this_loss = 0
random_training_instance = shuffled_infinite_list(train)
for updates in xrange(1, updates):
    if updates % int(500 / FLAGS_batch_size) == 0:
        trainer.status()
        train_time = time.time() - start - dev_time
        all_words += this_words
        print("loss=%.4f, words per second=%.4f" %
              (this_loss / this_words, all_words / train_time))
        this_loss = this_words = 0
    if updates % int(10000 / FLAGS_batch_size) == 0:
        dev_start = time.time()
        dev_loss = dev_words = 0
        for i in xrange(0, len(valid), FLAGS_batch_size):
            valid_minibatch = valid[i:i + FLAGS_batch_size]
            dy.renew_cg()  # Clear existing computation graph.
            loss_exp, mb_words = lm.minibatch_lm_loss(valid_minibatch)
            dev_loss += loss_exp.scalar_value()
            dev_words += mb_words
        print("nll=%.4f, ppl=%.4f, words=%r, time=%.4f, word_per_sec=%.4f" %
              (dev_loss / dev_words, math.exp(dev_loss / dev_words), dev_words,
               train_time, all_words / train_time))

        # Compute loss for one training minibatch.
    minibatch = [
        next(random_training_instance) for _ in xrange(FLAGS_batch_size)
    ]
    dy.renew_cg()  # Clear existing computation graph.
    loss_exp, mb_words = lm.minibatch_lm_loss(minibatch)
    this_loss += loss_exp.scalar_value()
    this_words += mb_words
Пример #60
0
 def parse_event(self, sentence):
     for e in sentence:
         e.children = []
     self.set_empty_vector()
     # assign embedding to each word
     features = self.extract_features(sentence)
     # initialize sentence parse
     state = CustomTransitionSystem(sentence)
     # parse sentence
     while not state.is_terminal():
         op_scores, lbl_scores, tg_scores = self.evaluate_events(
             state.stack, state.buffer, features)
         # get numpy arrays
         op_scores = op_scores.npvalue()
         lbl_scores = lbl_scores.npvalue()
         tg_scores = tg_scores.npvalue()
         # select transition
         left_lbl_score, left_lbl = max(
             zip(lbl_scores[1::2], self.ev_relations))
         right_lbl_score, right_lbl = max(
             zip(lbl_scores[2::2], self.ev_relations))
         trigger_score, trigger = max(zip(tg_scores[2:], self.i2tg[1:]))
         # collect all legal transitions
         transitions = []
         # if state.is_legal('shift'):
         #     t = ('shift', None, trigger, op_scores[state.t2i['shift']] + lbl_scores[0] + trigger_score)
         #     transitions.append(t)
         # if state.is_legal('left_arc'):
         #     t = ('left_arc', left_lbl, None, op_scores[state.t2i['left_arc']] + left_lbl_score + tg_scores[0])
         #     transitions.append(t)
         # if state.is_legal('right_arc'):
         #     t = ('right_arc', right_lbl, None, op_scores[state.t2i['right_arc']] + right_lbl_score + tg_scores[0])
         #     transitions.append(t)
         # if state.is_legal('drop'):
         #     t = ('drop', None, "O", op_scores[state.t2i['drop']] + lbl_scores[0] + tg_scores[1])
         #     transitions.append(t)
         #     t = ('drop', None, "Protein", op_scores[state.t2i['drop']] + lbl_scores[0] + tg_scores[4])
         #     transitions.append(t)
         # print('LEGAL:', list(state.all_legal()))
         for lt in state.all_legal():
             ix = state.t2i[lt]
             if lt == "shift":
                 t = (lt, None, trigger, op_scores[state.t2i[lt]] +
                      lbl_scores[0] + trigger_score)
                 transitions.append(t)
             if lt == "drop":
                 t = (lt, None, "O", op_scores[state.t2i[lt]] +
                      lbl_scores[0] + tg_scores[1])
                 transitions.append(t)
                 t = (lt, None, "Protein", op_scores[state.t2i[lt]] +
                      lbl_scores[0] + tg_scores[4])
                 transitions.append(t)
             if lt in ['left_reduce', 'left_attach']:
                 t = (lt, left_lbl, None, op_scores[state.t2i[lt]] +
                      left_lbl_score + tg_scores[0])
                 transitions.append(t)
             if lt in ['right_reduce', 'right_attach']:
                 t = (lt, right_lbl, None, op_scores[state.t2i[lt]] +
                      right_lbl_score + tg_scores[0])
                 transitions.append(t)
             if lt == "swap":
                 t = (lt, None, None, op_scores[state.t2i[lt]] +
                      lbl_scores[0] + tg_scores[0])
                 transitions.append(t)
         # print('STACK:', state.stack)
         # print('BUFFER:', state.buffer)
         # print('ARCS:', state.arcs)
         # select best legal transition
         best_act, best_lbl, best_tg, best_socre = max(transitions,
                                                       key=itemgetter(3))
         # print (best_act)
         # print ("----------------------------")
         # perform transition
         state.perform_transition(best_act, best_lbl, best_tg)
     dy.renew_cg()
     return sentence