Exemplo n.º 1
0
 def embed(self, x):
   if self.word_dropout > 0.0 and self.word_id_mask is None:
     batch_size = len(x) if xnmt.batcher.is_batched(x) else 1
     self.word_id_mask = [set(np.random.choice(self.vocab_size, int(self.vocab_size * self.word_dropout), replace=False)) for _ in range(batch_size)]
   emb_e = dy.parameter(self.embeddings)
   # single mode
   if not xnmt.batcher.is_batched(x):
     if self.train and self.word_id_mask and x in self.word_id_mask[0]:
       ret = dy.zeros((self.emb_dim,))
     else:
       ret = dy.pick(emb_e, index=x)
       if self.fix_norm != None:
         ret = dy.cdiv(ret, dy.l2_norm(ret))
         if self.fix_norm != 1:
           ret *= self.fix_norm
   # minibatch mode
   else:
     ret = dy.concatenate_to_batch([dy.pick(emb_e, index=xi) for xi in x])
     if self.fix_norm != None:
       ret = dy.cdiv(ret, dy.l2_norm(ret))
       if self.fix_norm != 1:
         ret *= self.fix_norm
     if self.train and self.word_id_mask and any(x[i] in self.word_id_mask[i] for i in range(len(x))):
       dropout_mask = dy.inputTensor(np.transpose([[0.0]*self.emb_dim if x[i] in self.word_id_mask[i] else [1.0]*self.emb_dim for i in range(len(x))]), batched=True)
       ret = dy.cmult(ret, dropout_mask)
   if self.train and self.weight_noise > 0.0:
     ret = dy.noise(ret, self.weight_noise)
   return ret
    def forward_unlabeled(self, features, correct_output, pred_output):
        init_alphas = [-1e10] * self.num_labels
        init_alphas[self.label2idx[START]] = 0

        for_expr = dy.inputVector(init_alphas)
        for pos, obs in enumerate(features):
            alphas_t = []
            if correct_output[pos] != self.o_id:
                for next_tag in range(self.num_labels):
                    obs_broadcast = dy.concatenate([dy.pick(obs, next_tag)] *
                                                   self.num_labels)
                    next_tag_expr = for_expr + self.transition[next_tag] + obs_broadcast + dy.inputVector([self.lambda_l] * self.num_labels) \
                        if next_tag != pred_output[pos] else for_expr + self.transition[next_tag] + obs_broadcast
                    alphas_t.append(max_score(next_tag_expr))
            else:
                for next_tag in range(self.num_labels):
                    obs_broadcast = dy.concatenate([dy.pick(obs, next_tag)] *
                                                   self.num_labels)
                    next_tag_expr = for_expr + self.transition[next_tag] + obs_broadcast + dy.inputVector([self.lambda_u] * self.num_labels) \
                        if next_tag != pred_output[pos] else for_expr + self.transition[next_tag] + obs_broadcast
                    alphas_t.append(max_score(next_tag_expr))
            for_expr = dy.concatenate(alphas_t)
            # for_expr = dy.max_dim(alphas_t)
            # dy.emax()
        terminal_expr = for_expr + self.transition[self.label2idx[STOP]]
        alpha = max_score(terminal_expr)
        return alpha
Exemplo n.º 3
0
 def viterbi(self, observations):
     backpointers = []
     init_pis = [0, 0]
     forward_mess = dy.inputVector(init_pis)
     transitions = [self.transitions[idx] for idx in range(2)]
     for i in range(len(observations) - 1):
         bp_t = []
         pi_t = []
         for next_tag in range(2):
             next_tag_expr = forward_mess + transitions[next_tag]
             next_tag_arr = next_tag_expr.npvalue()
             best_tag_id = np.argmax(next_tag_arr)
             bp_t.append(best_tag_id)
             pi_t.append(dy.pick(next_tag_expr, best_tag_id))
         forward_mess = dy.concatenate(pi_t) + observations[i]
         backpointers.append(bp_t)
     # find the highrst scoring final state and the corresponding score
     best_tag_id = np.argmax(forward_mess.npvalue())
     path_score = dy.pick(forward_mess, best_tag_id)
     # backtracking
     best_path = [best_tag_id]
     for bp_t in reversed(backpointers):
         best_tag_id = bp_t[best_tag_id]
         best_path.append(best_tag_id)
     best_path.pop()
     best_path.reverse()
     return best_path, path_score
Exemplo n.º 4
0
    def learn(self, seq):

        for entry in seq:
            if entry.upos != 'NUM' and entry.upos != 'PROPN':
                losses = []
                unilemma = unicode(entry.lemma, 'utf-8')
                n_chars = len(unilemma)
                softmax_output_list = self._predict(entry.word,
                                                    entry.upos,
                                                    entry.xpos,
                                                    entry.attrs,
                                                    num_chars=n_chars + 1,
                                                    gs_chars=unilemma)
                # print unilemma.encode('utf-8')#, softmax_output_list
                for softmax, char in zip(softmax_output_list[:-1], unilemma):

                    char_index = -1
                    if char.lower() == char:
                        casing = 0
                    else:
                        casing = 1
                    char = char.lower()
                    if char in self.encodings.char2int:
                        char_index = self.encodings.char2int[char]
                    if char_index != -1:
                        losses.append(-dy.log(dy.pick(softmax[0], char_index)))
                    losses.append(-dy.log(dy.pick(softmax[1], casing)))
                    # print np.argmax(softmax[0].npvalue()), char_index, softmax

                losses.append(-dy.log(
                    dy.pick(softmax_output_list[-1][0],
                            len(self.encodings.char2int))))
                loss = dy.esum(losses)
                self.losses.append(loss)
Exemplo n.º 5
0
 def viterbi_decoding(self, observations):
     backpointers = []
     init_vvars = [-1e10] * self.dim_output
     init_vvars[self.sp_s] = 0
     for_expr = dy.inputVector(init_vvars)
     trans_exprs = [self.trans[idx] for idx in range(self.dim_output)]
     for obs in observations:
         bptrs_t = []
         vvars_t = []
         for next_tag in range(self.dim_output):
             next_tag_expr = for_expr + trans_exprs[next_tag]
             next_tag_arr = next_tag_expr.npvalue()
             best_tag_id = np.argmax(next_tag_arr)
             bptrs_t.append(best_tag_id)
             vvars_t.append(dy.pick(next_tag_expr, best_tag_id))
         for_expr = dy.concatenate(vvars_t) + obs
         backpointers.append(bptrs_t)
     terminal_expr = for_expr + trans_exprs[self.sp_e]
     terminal_arr = terminal_expr.npvalue()
     best_tag_id = np.argmax(terminal_arr)
     path_score = dy.pick(terminal_expr, best_tag_id)
     best_path = [best_tag_id]
     for bptrs_t in reversed(backpointers):
         best_tag_id = bptrs_t[best_tag_id]
         best_path.append(best_tag_id)
     start = best_path.pop()
     best_path.reverse()
     assert start == self.sp_s
     return best_path, path_score
Exemplo n.º 6
0
    def forward_backward(self, observations):
        init_alphas = [0, 0]
        forward_mess = dy.inputVector(init_alphas)
        alpha = []
        for i in range(len(observations) - 1):
            alphas_t = []
            for next_tag in range(2):
                obs_broadcast = dy.concatenate(
                    [dy.pick(observations[i], next_tag)] * 2)
                next_tag_expr = forward_mess + self.transitions[
                    next_tag] + obs_broadcast
                alphas_t.append(self.log_sum_exp(next_tag_expr))
            forward_mess = dy.concatenate(alphas_t)
            alpha.append(forward_mess)

        init_betas = [0, 0]
        backward_mess = dy.inputVector(init_betas)
        beta = []
        for i in range(len(observations) - 1):
            beta_t = []
            for next_tag in range(2):
                obs = observations[len(observations) - i - 1]
                next_tag_expr = backward_mess + self.transitions[next_tag] + obs
                beta_t.append(self.log_sum_exp(next_tag_expr))
            backward_mess = dy.concatenate(beta_t)
            beta.append(backward_mess)

        mu = [x + y for x, y in zip(alpha, beta[::-1])]
        # compute marginal probablities
        prob = [dy.pick(dy.softmax(w), 1) for w in mu]
        return prob
Exemplo n.º 7
0
def viterbi(emissions, transition, start_idx, end_idx, norm=False):
    n_tags = emissions[0].dim()[0][0]
    backpointers = []

    inits = [-1e4] * n_tags
    inits[start_idx] = 0
    alphas = dy.inputVector(inits)
    alphas = dy.log_softmax(alphas) if norm else alphas

    for emission in emissions:
        next_vars = dy.colwise_add(dy.transpose(transition), alphas)
        best_tags = np.argmax(next_vars.npvalue(), 0)
        v_t = dy.max_dim(next_vars, 0)
        alphas = v_t + emission
        backpointers.append(best_tags)

    terminal_expr = alphas + dy.pick(transition, end_idx)
    best_tag = np.argmax(terminal_expr.npvalue())
    path_score = dy.pick(terminal_expr, best_tag)

    best_path = [best_tag]
    for bp_t in reversed(backpointers):
        best_tag = bp_t[best_tag]
        best_path.append(best_tag)
    _ = best_path.pop()
    best_path.reverse()
    return best_path, path_score
Exemplo n.º 8
0
    def sample_token(self, sentence):
        """
        Samples a token from the conditional distrib
        @param sentence: a list of token indexes
        @return the index of the sampled token
        """
        ctxt = sentence[-3:]
        if self.tied:
            dy.renew_cg()
            W = dy.parameter(self.hidden_weights)
            E = dy.parameter(self.embedding_matrix)
            embeddings = [dy.pick(E, x) for x in ctxt]
            xdense = dy.concatenate(embeddings)
            ypred = dy.softmax(E * dy.tanh(W * xdense))

            Ypred = np.array(ypred.value())
            Ypred /= Ypred.sum()  #fixes numerical instabilities
            return choice(self.lexicon_size, p=Ypred)
        else:
            dy.renew_cg()
            O = dy.parameter(self.output_weights)
            W = dy.parameter(self.hidden_weights)
            E = dy.parameter(self.embedding_matrix)
            for x, y in zip(X, Y):
                embeddings = [dy.pick(E, x) for x in ctxt]
                xdense = dy.concatenate(embeddings)
                ypred = dy.softmax(O * dy.tanh(W * xdense), y)

            Ypred = np.array(ypred.value())
            Ypred /= Ypred.sum()  #fixes numerical instabilities
            return choice(self.lexicon_size, p=Ypred)
Exemplo n.º 9
0
def viterbi(emissions, transition, start_idx, end_idx, norm=False):
    n_tags = emissions[0].dim()[0][0]
    backpointers = []

    inits = [-1e4] * n_tags
    inits[start_idx] = 0
    alphas = dy.inputVector(inits)
    alphas = dy.log_softmax(alphas) if norm else alphas

    for emission in emissions:
        next_vars = dy.colwise_add(dy.transpose(transition), alphas)
        best_tags = np.argmax(next_vars.npvalue(), 0)
        v_t = dy.max_dim(next_vars, 0)
        alphas = v_t + emission
        backpointers.append(best_tags)

    terminal_expr = alphas + dy.pick(transition, end_idx)
    best_tag = np.argmax(terminal_expr.npvalue())
    path_score = dy.pick(terminal_expr, best_tag)

    best_path = [best_tag]
    for bp_t in reversed(backpointers):
        best_tag = bp_t[best_tag]
        best_path.append(best_tag)
    _ = best_path.pop()
    best_path.reverse()
    return best_path, path_score
Exemplo n.º 10
0
    def decode(self, emissions):
        """Viterbi decode to find the best sequence.

        :param emissions: List[dy.Expression]

        Returns:
            List[int], dy.Expression ((1,), B)
        """
        if self.add_ends:
            emissions = CRF._prep_input(emissions)
        backpointers = []
        transitions = self.transitions

        inits = [-1e4] * self.n_tags
        inits[self.start_idx] = 0
        alphas = dy.inputVector(inits)

        for emission in emissions:
            next_vars = dy.colwise_add(dy.transpose(transitions), alphas)
            best_tags = np.argmax(next_vars.npvalue(), 0)
            v_t = dy.max_dim(next_vars, 0)
            alphas = v_t + emission
            backpointers.append(best_tags)

        terminal_expr = alphas + dy.pick(transitions, self.end_idx)
        best_tag = np.argmax(terminal_expr.npvalue())
        path_score = dy.pick(terminal_expr, best_tag)

        best_path = [best_tag]
        for bp_t in reversed(backpointers):
            best_tag = bp_t[best_tag]
            best_path.append(best_tag)
        _ = best_path.pop()
        best_path.reverse()
        return best_path, path_score
Exemplo n.º 11
0
 def viterbi_decoding(self, observations):
     backpointers = []
     init_vvars = [-1e10] * (self.n_tags + 2)
     init_vvars[self.b_id] = 0  # <Start> has all the probability
     for_expr = dynet.inputVector(init_vvars)
     trans_exprs = [self.transitions[idx] for idx in range(self.n_tags + 2)]
     for obs in observations:
         bptrs_t = []
         vvars_t = []
         for next_tag in range(self.n_tags + 2):
             next_tag_expr = for_expr + trans_exprs[next_tag]
             next_tag_arr = next_tag_expr.npvalue()
             best_tag_id = np.argmax(next_tag_arr)
             bptrs_t.append(best_tag_id)
             vvars_t.append(dynet.pick(next_tag_expr, best_tag_id))
         for_expr = dynet.concatenate(vvars_t) + obs
         backpointers.append(bptrs_t)
     # Perform final transition to terminal
     terminal_expr = for_expr + trans_exprs[self.e_id]
     terminal_arr = terminal_expr.npvalue()
     best_tag_id = np.argmax(terminal_arr)
     path_score = dynet.pick(terminal_expr, best_tag_id)
     # Reverse over the backpointers to get the best path
     best_path = [best_tag_id
                  ]  # Start with the tag that was best for terminal
     for bptrs_t in reversed(backpointers):
         best_tag_id = bptrs_t[best_tag_id]
         best_path.append(best_tag_id)
     start = best_path.pop()  # Remove the start symbol
     best_path.reverse()
     assert start == self.b_id
     # Return best path and best path's score
     return best_path, path_score
 def pick_gold_score(self, preds, golds):
     score = 0
     prev_tag = len(self.pos)
     for pred, gold in zip(preds, golds):
         score += dynet.pick(pred, gold) + dynet.pick(
             self.nertrans_lookup[gold], prev_tag)
         prev_tag = gold
     score += dynet.pick(self.nertrans_lookup[len(self.pos) + 1], prev_tag)
     return score
Exemplo n.º 13
0
    def learn(self, labels, f_cont, f_disc):
        #trunchiem pauzele:
        f_cont = f_cont[labels[0].stop * self.dataset.sample_rate / 1000 -
                        80:labels[-1].start * self.dataset.sample_rate / 1000 +
                        80]
        f_disc = f_disc[labels[0].stop * self.dataset.sample_rate / 1000 -
                        80:labels[-1].start * self.dataset.sample_rate / 1000 +
                        80]

        clipped = False
        if self.clip != 0 and len(f_cont) > self.clip:
            clipped = True
            f_cont = f_cont[:self.clip]
            f_disc = f_disc[:self.clip]

        num_batches = (len(f_cont) + 1) / self.config.batch_size
        if (len(f_cont) + 1) % self.config.batch_size != 0:
            num_batches += 1

        att = None
        total_loss = 0

        last_proc = 0
        last_decoder_state = None
        for iBatch in range(num_batches):
            proc = iBatch * 100 / num_batches
            while last_proc + 10 < proc:
                last_proc += 10
                sys.stdout.write(" " + str(last_proc))
                sys.stdout.flush()
            dy.renew_cg()
            start_sample = iBatch * self.config.batch_size
            stop_sample = start_sample + self.config.batch_size
            if stop_sample > len(f_disc) + 1:
                stop_sample = len(f_disc) + 1
            pred_samples, att, samples_cont, rnn = self._predict(
                labels, start_sample, stop_sample, f_cont, att,
                last_decoder_state, False)
            last_decoder_state = [s.value() for s in rnn.s()]
            losses = []
            for iSample in range(stop_sample - start_sample):
                if iSample + start_sample != len(f_cont):
                    losses.append(-dy.log(
                        dy.pick(pred_samples[iSample], f_disc[iSample +
                                                              start_sample])))
                elif not clipped:
                    losses.append(-dy.log(dy.pick(
                        pred_samples[-1], 256)))  #special end of sequence
            loss = dy.esum(losses)
            total_loss += loss.value()
            loss.backward()
            self.trainer.update()
            p_one = 1
            if clipped:
                p_one = 0

        return total_loss, len(f_disc) + p_one
Exemplo n.º 14
0
    def train(self, trainning_set):
        losses = []
        for datapoint in trainning_set:
            sentence = datapoint[0]
            chars = datapoint[6]
            pos = datapoint[5]
            entity = datapoint[2]
            triggers = datapoint[3]
            rules = datapoint[-1]
            features = self.encode_sentence(sentence, pos, chars)
            labels = datapoint[4]

            entity_vec = features[entity]
            contexts = self.entity_attend(features, entity_vec)

            for i, c in enumerate(contexts):
                if i != entity:
                    h_t = dy.concatenate([c, entity_vec])
                    hidden = dy.tanh(self.lb.expr() * h_t +
                                     self.lb_bias.expr())
                    out_vector = dy.softmax(self.lb2.expr() * hidden +
                                            self.lb2_bias.expr())
                    if i in triggers:
                        label = labels[triggers.index(i)]
                    else:
                        label = 0
                    losses.append(-dy.log(dy.pick(out_vector, label)))
                    if i in triggers and len(rules[triggers.index(i)]) > 1:
                        # Get decoding losses
                        last_output_embeddings = self.pattern_embeddings[0]
                        context = c
                        s = self.decoder_lstm.initial_state().add_input(
                            dy.concatenate([context, last_output_embeddings]))
                        for pattern in rules[triggers.index(i)]:
                            h_t = s.output()
                            context, A = self.attend(contexts, h_t)
                            # p_gen = dy.logistic(self.gen_c * context + self.gen_h * h_t + self.gen_i *
                            #     dy.concatenate([context, last_output_embeddings]) + self.gen_bias)
                            out_vector = self.pt.expr() * dy.concatenate(
                                [context, h_t]) + self.pt_bias.expr()
                            probs = dy.softmax(out_vector)
                            losses.append(-dy.log(dy.pick(probs, pattern)))
                            last_output_embeddings = self.pattern_embeddings[
                                pattern]
                            s = s.add_input(
                                dy.concatenate(
                                    [context, last_output_embeddings]))

            try:
                loss = dy.esum(losses)
                loss.backward()
                self.trainer.update()
                dy.renew_cg()
                losses = []
            except:
                pass
Exemplo n.º 15
0
 def score_sentence(self, observations, tags):
     assert len(observations) == len(tags)
     score_seq = [0]
     score = dy.scalarInput(0)
     tags = [t2i["<START>"]] + tags
     for i, obs in enumerate(observations):
         score = score + dy.pick(self.transitions[tags[i+1]], tags[i]) + dy.pick(obs, tags[i+1])
         score_seq.append(score.value())
     score = score + dy.pick(self.transitions[t2i["<STOP>"]], tags[-1])
     return score
Exemplo n.º 16
0
    def learn(self, seq):
        # remove compound words
        tmp = []
        for ss in seq:
            if not ss.is_compound_entry:
                tmp.append(ss)
        seq = tmp
        arc_matrix, aux_arc_matrix, proj_labels, softmax_morphology = self._predict_arc(
            seq, runtime=False)
        gold_heads = [entry.head for entry in seq]
        gold_labels = [entry.label for entry in seq]

        softmax_labels = self._predict_label(gold_heads,
                                             proj_labels,
                                             runtime=False)

        losses = []

        for gold_head, gold_label, arc_probs, softmax_label, entry in zip(
                gold_heads, gold_labels, arc_matrix[1:], softmax_labels, seq):
            label_index = self.encodings.label2int[gold_label]
            losses.append(-dy.log(arc_probs[gold_head]))
            losses.append(-dy.log(dy.pick(softmax_label, label_index)))

        if not self.config.predict_morphology:
            for gold_head, aux_probs, entry in zip(gold_heads,
                                                   aux_arc_matrix[1:], seq):
                losses.append(-dy.log(aux_probs[gold_head]) *
                              self.aux_softmax_weight)

        else:
            for softmax_morph, entry in zip(softmax_morphology, seq):
                loss_upos = -dy.log(
                    dy.pick(softmax_morph[0],
                            self.encodings.upos2int[entry.upos]))
                losses.append(loss_upos * (self.aux_softmax_weight / 3))

                if len(
                        self.encodings.xpos2int
                ) > 1:  # stability check (some languages are missing attributes or XPOS, resulting in numerical overflow during backpropagation
                    loss_xpos = -dy.log(
                        dy.pick(softmax_morph[1],
                                self.encodings.xpos2int[entry.xpos]))
                    losses.append(loss_xpos * (self.aux_softmax_weight / 3))

                if len(
                        self.encodings.attrs2int
                ) > 1:  # stability check (some languages are missing attributes or XPOS, resulting in numerical overflow during backpropagation
                    loss_attrs = -dy.log(
                        dy.pick(softmax_morph[2],
                                self.encodings.attrs2int[entry.attrs]))
                    losses.append(loss_attrs * (self.aux_softmax_weight / 3))

        loss = dy.esum(losses)
        self.batch_loss.append(loss)
Exemplo n.º 17
0
 def score_sentence(self, score_vecs, tags):
     assert (len(score_vecs) == len(tags))
     tags.insert(0, START_TAG)  # add start
     total = dy.scalarInput(.0)
     for i, obs in enumerate(score_vecs):
         # transition to next from i and emission
         next_tag = tags[i + 1]
         total += dy.pick(self.trans_mat[next_tag], tags[i]) + dy.pick(
             obs, next_tag)
     total += dy.pick(self.trans_mat[END_TAG], tags[-1])
     return total
Exemplo n.º 18
0
    def score_sentence(self, features, t_features, tags):
        score = dy.scalarInput(0)
        tags = [self.begin_tag] + tags

        for i, feat in enumerate(features):
            score = (score + dy.pick(t_features[i][tags[i + 1]], tags[i]) +
                     dy.pick(feat, tags[i + 1]))

        # Last transition to end tag from last tag
        score = score + dy.pick(t_features[-1][self.end_tag], tags[-1])
        return score
Exemplo n.º 19
0
    def forward_labeled(self, features, tags):
        score = dy.scalarInput(0)
        tags = [self.label2idx[w] for w in tags]
        tags = [self.label2idx[START]] + tags
        for i, obs in enumerate(features):
            score = score + dy.pick(self.transition[tags[i + 1]],
                                    tags[i]) + dy.pick(obs, tags[i + 1])
        labeled_score = score + dy.pick(self.transition[self.label2idx[STOP]],
                                        tags[-1])

        return labeled_score
Exemplo n.º 20
0
 def score_sentence(self, observations, tags):
     assert len(observations) == len(tags)
     #score_seq = [0]
     score = dy.scalarInput(0)
     tags = [self.t2i["<SOS>"]] + [self.t2i[t] for t in tags]
     for i, obs in enumerate(observations):
         #+ dy.pick(dy.lookup(self.transitions, tags[i+1]),tags[i])
         score = score + dy.pick(self.transitions[tags[i + 1]],
                                 tags[i]) + dy.pick(obs, tags[i + 1])
         #score_seq.append(score.value())
     score = score + dy.pick(self.transitions[self.t2i["<EOS>"]], tags[-1])
     return score
Exemplo n.º 21
0
 def score_sentence(self, observations, tags):
     if len(tags) == 0:
         tags = [-1] * len(observations)
     assert len(observations) == len(tags)
     score_seq = [0]
     score = dy.scalarInput(0)
     tags = [self.vocab.START_ID] + tags
     for i, obs in enumerate(observations):
         score = score + dy.pick(self.transitions[tags[i + 1]], tags[i]) + dy.pick(obs, tags[i + 1])
         score_seq.append(score.value())
     score = score + dy.pick(self.transitions[self.vocab.END_ID], tags[-1])
     return score
def forced_decoding(vecs, tags):
    # Initialize
    for_expr = dy.scalarInput(0)
    for_tag = S_T
    # Perform the forward pass through the sentence
    for i, vec in enumerate(vecs):
        my_tag = vt.w2i[tags[i]]
        for_expr = for_expr + dy.pick(TRANS_LOOKUP[my_tag],
                                      for_tag) + vec[my_tag]
        for_tag = my_tag
    for_expr = for_expr + dy.pick(TRANS_LOOKUP[S_T], for_tag)
    return for_expr
Exemplo n.º 23
0
def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH):

    dy.renew_cg()

    encoder_hidden = encoder.initHidden()

    input_length = len(inputs)
    target_length = len(targets)

    encoder_outputs = [dy.zeros(hidden_dim) for _ in range(max_length)]

    losses = []

    for i in range(input_length):
        encoder_output, encoder_hidden = encoder(inputs[i], encoder_hidden)
        encoder_outputs[i] = encoder_output

    encoder_outputs = dy.concatenate(encoder_outputs, 1)

    decoder_input = SOS_token
    decoder_hidden = encoder_hidden

    if r.random() < teacher_forcing_ratio:
        use_teacher_forcing = True
    else:
        use_teacher_forcing = False

    if use_teacher_forcing:
        for i in range(target_length):
            decoder_output, decoder_hidden, _ = decoder(decoder_input,
                                                        decoder_hidden,
                                                        encoder_outputs,
                                                        dropout=True)
            losses.append(-dy.log(dy.pick(decoder_output, targets[i])))
            decoder_input = targets[i]
    else:
        for i in range(target_length):
            decoder_output, decoder_hidden, _ = decoder(decoder_input,
                                                        decoder_hidden,
                                                        encoder_outputs,
                                                        dropout=True)
            losses.append(-dy.log(dy.pick(decoder_output, targets[i])))
            probs = decoder_output.vec_value()
            decoder_input = probs.index(max(probs))
            if decoder_input == EOS_token:
                break

    loss = dy.esum(losses) / len(losses)
    loss.backward()
    trainer.update()

    return loss.value()
Exemplo n.º 24
0
  def __getitem__(self, key):
    """Get a single item.

    Returns:
      sequence item (expression); does not result in explicit conversion to list
    """
    if self.expr_list: return self.expr_list[key]
    else:
      if key < 0: key += len(self)
      if self.expr_tensor:
        return dy.pick(self.expr_tensor, key, dim=len(self.expr_tensor.dim()[0])-1)
      else:
        return dy.pick(self.expr_transposed_tensor, key, dim=0)
Exemplo n.º 25
0
Arquivo: model.py Projeto: yyht/nagisa
    def score_sentence(self, observations, tags):
        if not len(observations) == len(tags):
            raise AssertionError("len(observations) != len(tags)")

        score_seq = [0]
        score = dy.scalarInput(0)
        tags = [self.sp_s] + tags
        for i, obs in enumerate(observations):
            score = score + dy.pick(self.trans[tags[i + 1]],
                                    tags[i]) + dy.pick(obs, tags[i + 1])
            score_seq.append(score.value())
        score = score + dy.pick(self.trans[self.sp_e], tags[-1])
        return score
Exemplo n.º 26
0
    def train(self, rnnlm, train_quatrains, dev_quatrains):
        min_dev_loss = sys.maxsize
        for i in tqdm(range(self.epochs), desc='Training'):

            losses = []
            tqdm.write('Epoch {}'.format(i))
            total_loss = 0
            state = rnnlm.initialize()

            for count, quatrain in enumerate(train_quatrains):
                for token, (next_word, _, _, _) in zip(quatrain, quatrain[1:]):
                    state, probs = rnnlm.add_input(state, token)
                    loss = -dy.log(dy.pick(probs, next_word))
                    losses.append(loss)

                if count % self.BATCH_SIZE == 0:
                    loss = dy.esum(losses)
                    total_loss += loss.value()
                    loss.backward()
                    self.trainer.update()
                    losses = []
                    dy.renew_cg()
                    state = rnnlm.initialize()

                #if (count + 1)% 4 == 0:
                #  dy.renew_cg()
                #  state = rnnlm.initialize()

            dev_loss = 0
            state = rnnlm.initialize()
            for count, quatrain in enumerate(dev_quatrains):
                for token, (next_word, _, _, _) in zip(quatrain, quatrain[1:]):
                    state, probs = rnnlm.add_input(state, token)
                    loss = -dy.log(dy.pick(probs, next_word))
                    dev_loss += loss.value()
                if (count + 1) % 4 == 0:
                    dy.renew_cg()
                    state = rnnlm.initialize()
            tqdm.write('Dev loss: {}'.format(dev_loss))
            if dev_loss < min_dev_loss:
                tqdm.write('Best dev loss. Saving parameters...')
                self.pc.save('model.pt')
                min_loss = dev_loss
            else:
                tqdm.write('Not brst dev loss. Restarting with smaller...')
                self.lr = self.lr * .5
                self.trainer.restart(self.lr)

            tqdm.write('Training Loss: {}'.format(total_loss))
            rnnlm.generate(rnnlm.initialize())
Exemplo n.º 27
0
 def predict_next_best_action(self,config,prev_action,sentence):
     """
     Predicts the next best couple (configuration,action)
     @param config: the current configuration
     @param sentence: the sentence to parse
     @return a couple (next_config, action_taken)
     """
     S,F,B,A,prefix_score = config
     if F is None and len(B) > 0 : #lexical action
         unk_token = self.word_codes[ArcEagerGenerativeParser.UNKNOWN_TOKEN]
         next_word = self.word_codes.get(sentence[B[0]],unk_token)
         X = self.make_representation(config,None,sentence,structural=False)
         if self.tied:
             dy.renew_cg()
             W = dy.parameter(self.hidden_weights)
             E = dy.parameter(self.input_embeddings)
             embeddings = [dy.pick(E, xidx) for xidx in X]
             xdense     = dy.concatenate(embeddings)
             pred       = dy.pickneglogsoftmax(E * dy.tanh( W * xdense ),next_word)
             C = self.generate(config,local_score= -pred.value())
             action =  (ArcEagerGenerativeParser.GENERATE,sentence[B[0]])
             return (C,action)
         else:    
             dy.renew_cg()
             W = dy.parameter(self.hidden_weights)
             E = dy.parameter(self.input_embeddings)
             O = dy.parameter(self.output_embeddings)
             embeddings = [dy.pick(E, xidx) for xidx in X]
             xdense     = dy.concatenate(embeddings)
             pred       = dy.pickneglogsoftmax(O * dy.tanh( W * xdense ),next_word)
             C = self.generate(config,local_score= -pred.value())
             action = (ArcEagerGenerativeParser.GENERATE,sentence[B[0]])
             return (C,action)
     else:  #structural action
         X = self.make_representation(config,None,sentence,structural=True) 
         dy.renew_cg()
         W = dy.parameter(self.hidden_weights)
         E = dy.parameter(self.input_embeddings)
         A = dy.parameter(self.action_weights)
         embeddings = [dy.pick(E, xidx) for xidx in X]
         xdense     = dy.concatenate(embeddings)
         preds      = dy.softmax(A * dy.tanh( W * xdense )).npvalue()
         action_mask = self.mask_actions(config,prev_action,len(sentence))
         max_idx = np.argmax(preds*action_mask)
         score = log(preds[max_idx])
         C = self.actions[max_idx](config,local_score=score) #this just execs the predicted action..
         action = self.rev_action_codes[max_idx]
         return (C,action)
def attend(blstm_outputs, h_t, W_c, v_a, W__a, U__a):
    # iterate through input states to compute alphas
    # print 'computing scores...'
    # scores = [W_a * pc.concatenate([h_t, h_input]) for h_input in blstm_outputs]
    scores = [v_a * pc.tanh(W__a * h_t + U__a * h_input) for h_input in blstm_outputs]
    # print 'computed scores'
    # normalize to alphas using softmax
    # print 'computing alphas...'
    alphas = pc.softmax(pc.concatenate(scores))
    # print 'computed alphas...'
    # compute c using alphas
    # print 'computing c...'

    # import time
    # s = time.time()
    # dim = len(blstm_outputs[0].vec_value())
    # stacked_alphas = pc.concatenate_cols([alphas for j in xrange(dim)])
    # stacked_vecs = pc.concatenate_cols([h_input for h_input in blstm_outputs])
    # c = pc.esum(pc.cwise_multiply(stacked_vecs, stacked_alphas))
    # print "stack time:", time.time() - s

    # s = time.time()
    c = pc.esum([h_input * pc.pick(alphas, j) for j, h_input in enumerate(blstm_outputs)])
    # print "pick time:", time.time() - s
    # print 'computed c'
    # print 'c len is {}'.format(len(c.vec_value()))
    # compute output state h~ using c and the decoder's h (global attention variation from Loung and Manning 2015)
    # print 'computing h~...'
    h_output = pc.tanh(W_c * pc.concatenate([h_t, c]))
    # print 'len of h_output is {}'.format(len(h_output.vec_value()))
    # print 'computed h~'

    return h_output, alphas, W__a.value()
Exemplo n.º 29
0
    def train(self, words, lemmas, gold, bad):
        dy.renew_cg()
        W = dy.parameter(self.pW)
        b = dy.parameter(self.pb)

        losses = []
        gold_scores = []
        bad_scores = []

        for item in gold:
            lf, denotation = item[0], item[1]
            feature = self.extract_feature(words, lemmas, lf, denotation)
            feature_vec = dy.vecInput(self.nfeatures)
            feature_vec.set(feature)
            gold_scores.append(W * feature_vec + b)

        for item in bad:
            lf, denotation = item[0], item[1]
            feature = self.extract_feature(words, lemmas, lf, denotation)
            feature_vec = dy.vecInput(self.nfeatures)
            feature_vec.set(feature)
            bad_scores.append(W * feature_vec + b)

        log_prob = dy.log_softmax(dy.concatenate(gold_scores + bad_scores))
        for i in range(len(gold_scores)):
            losses.append(dy.pick(log_prob, i))

        return -dy.esum(losses)
Exemplo n.º 30
0
    def get_loss(self, input_sentence, label):

        dy.renew_cg()

        w = dy.parameter(self.w)

        b1 = dy.parameter(self.b1)

        u = dy.parameter(self.u)

        b2 = dy.parameter(self.b2)

        embedded = self.embed_sentence(input_sentence)

        encoded = self.encoded_sentence(embedded)

        acc_lstm = self.run_lstm(self.accecptor_lstm.initial_state(), encoded)

        mlp_input = acc_lstm[-1]

        h = dy.tanh((w * mlp_input) + b1)

        y_pred = dy.softmax((u * h) + b2)

        loss = -dy.log(dy.pick(y_pred, label))

        return loss
Exemplo n.º 31
0
def decode(dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(vectors)
    w1dt = None

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE*2), last_output_embeddings]))
    loss = []

    for char in output:
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate([attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
Exemplo n.º 32
0
    def forward(self, observations):
        def log_sum_exp(scores):
            npval = scores.npvalue()
            argmax_score = np.argmax(npval)
            max_score_expr = dy.pick(scores, argmax_score)
            max_score_expr_broadcast = dy.concatenate([max_score_expr] *
                                                      self.tagset_size)
            return max_score_expr + dy.log(
                dy.sum_cols(
                    dy.transpose(dy.exp(scores - max_score_expr_broadcast))))

        init_alphas = [-1e10] * self.tagset_size
        init_alphas[t2i[START_TAG]] = 0
        for_expr = dy.inputVector(init_alphas)
        for obs in observations:
            alphas_t = []
            for next_tag in range(self.tagset_size):
                obs_broadcast = dy.concatenate([dy.pick(obs, next_tag)] *
                                               self.tagset_size)
                next_tag_expr = for_expr + self.transitions[
                    next_tag] + obs_broadcast
                alphas_t.append(log_sum_exp(next_tag_expr))
            for_expr = dy.concatenate(alphas_t)
        terminal_expr = for_expr + self.transitions[t2i["<STOP>"]]
        alpha = log_sum_exp(terminal_expr)
        return alpha
Exemplo n.º 33
0
def decode(dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(vectors)
    w1dt = None

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(
        dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings]))
    loss = []

    for char in output:
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate(
            [attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
Exemplo n.º 34
0
    def _forward(self, emissions):

        """Viterbi forward to calculate all path scores.

        :param emissions: List[dy.Expression]

        Returns:
            dy.Expression ((1,), B)
        """
        init_alphas = [-1e4] * self.n_tags
        init_alphas[self.start_idx] = 0

        alphas = dy.inputVector(init_alphas)
        transitions = self.transitions
        # len(emissions) == T
        for emission in emissions:
            add_emission = dy.colwise_add(transitions, emission)
            scores = dy.colwise_add(dy.transpose(add_emission), alphas)
            # dy.logsumexp takes a list of dy.Expression and computes logsumexp
            # elementwise across the lists so for example the logsumexp is calculated
            # for [0] in each list. This means we want the scores for a given
            # transition scores for a tag to be in the columns
            alphas = dy.logsumexp([x for x in scores])
        last_alpha = alphas + dy.pick(transitions, self.end_idx)
        alpha = dy.logsumexp([x for x in last_alpha])
        return alpha
Exemplo n.º 35
0
    def score_sentence(self, emissions, tags):
        """Get the score of a given sentence.

        :param emissions: List[dy.Expression ((H,), B)]
        :param tags: List[int]

        Returns:
            dy.Expression ((1,), B)
        """
        tags = np.concatenate((np.array([self.start_idx], dtype=int), tags))
        score = dy.scalarInput(0)
        transitions = self.transitions
        for i, e in enumerate(emissions):
            # Due to Dynet being column based it is best to use the transition
            # matrix so that x -> y is T[y, x].
            score += dy.pick(dy.pick(transitions, tags[i + 1]), tags[i]) + dy.pick(e, tags[i + 1])

        score += dy.pick(dy.pick(transitions, self.end_idx), tags[-1])
        return score
def attend2(blstm_outputs, s_prev, y_feedback, v_a, W_a, U_a, U_o, V_o, C_o):

    # attention mechanism - Bahdanau style
    # iterate through input states to compute alphas
    # print 'computing scores...'

    # W_a: hidden x hidden, U_a: hidden x 2 hidden, v_a: hidden, each score: scalar
    scores = [v_a * pc.tanh(W_a * s_prev + U_a * h_j) for h_j in blstm_outputs]
    alphas = pc.softmax(pc.concatenate(scores))

    # c_i: 2 hidden
    c_i = pc.esum([h_input * pc.pick(alphas, j) for j, h_input in enumerate(blstm_outputs)])

    # U_o = 2l x hidden, V_o = 2l x input, C_o = 2l x 2 hidden
    attention_output_vector = U_o * s_prev + V_o * y_feedback + C_o * c_i

    return attention_output_vector, alphas
Exemplo n.º 37
0
 def transduce(self, inputs, train):
     xs = inputs[:self.max_length]
     if not xs:
         return []
     for i in range(self.lstm_layers):
         for n, d in ("f", 1), ("b", -1):
             Wr, br, Wh = [self.params["%s%d%s" % (p, i, n)] for p in ("Wr", "br", "Wh")]
             hs_ = self.params["rnn%d%s" % (i, n)].initial_state().transduce(xs[::d])
             hs = [hs_[0]]
             for t in range(1, len(hs_)):
                 r = dy.logistic(Wr * dy.concatenate([hs[t - 1], xs[t]]) + br)
                 hs.append(dy.cmult(r, hs_[t]) + dy.cmult(1 - r, Wh * xs[t]))
             xs = hs
             if train:
                 x = dy.dropout_dim(dy.concatenate(xs, 1), 1, self.dropout)
                 xs = [dy.pick(x, i, 1) for i in range(len(xs))]
     return xs
Exemplo n.º 38
0
def decode(dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE*2), last_output_embeddings]))
    loss = []
    for char in output:
        vector = dy.concatenate([attend(vectors, s), last_output_embeddings])

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
Exemplo n.º 39
0
def create_network_return_loss(inputs, expected_output):
    '''
    inputs is a list of numbers
    '''
    dy.renew_cg()
    W = dy.parameter(pW) # from parameters to expressions
    b = dy.parameter(pB)
    
    if(len(inputs) > documentLength):
       inputs = inputs[0:documentLength]
    
    emb_vectors = [lookup[i] for i in inputs]
    
    while(len(emb_vectors) < documentLength):
        pad = dy.vecInput(embDimension)
        pad.set(np.zeros(embDimension))
        emb_vectors.append(pad)
    
    net_input = dy.concatenate(emb_vectors)
    net_output = dy.softmax( (W*net_input) + b)
    loss = -dy.log(dy.pick(net_output, expected_output))
    return loss
Exemplo n.º 40
0
def CalculateLossForWord(word_obj, fValidation=False, fRunning=False):
    dy.renew_cg()

    if not fRunning: gold_lang = word_obj['tag']
    # add a bos before and after
    seq = ['*BOS*'] + list(word_obj['word']) + ['*BOS*']

    # get all the char encodings for the daf
    char_embeds = [let_enc(let) for let in seq]

    # run it through the bilstm
    char_bilstm_outputs = bilstm(char_embeds)
    bilistm_output = dy.concatenate([char_bilstm_outputs[0],char_bilstm_outputs[-1]])

    mlp_input = bilistm_output
    mlp_out = lang_mlp(mlp_input)
    predicted_lang = lang_tags[np.argmax(mlp_out.npvalue())]
    confidence = (mlp_out.npvalue()[:2] / np.sum(mlp_out.npvalue()[:2])).tolist() #skip ambiguous
    # if we aren't doing validation, calculate the loss
    if not fValidation and not fRunning:
        loss = -dy.log(dy.pick(mlp_out, gold_lang))
    # otherwise, set the answer to be the argmax
    elif not fRunning and fValidation:
        loss = None
        lang_conf_matrix(np.argmax(mlp_out.npvalue()), gold_lang)
    else:
        return predicted_lang,confidence

    pos_prec = 1 if predicted_lang == lang_tags[gold_lang] else 0

    tagged_word = { 'word': word_obj['word'], 'tag': predicted_lang, 'confidence':confidence, 'gold_tag':lang_tags[gold_lang]}

    if fValidation:
        return pos_prec, tagged_word

    return loss, pos_prec
Exemplo n.º 41
0
# regular lookup
a = lp[1].npvalue()
b = lp[2].npvalue()
c = lp[3].npvalue()

# batch lookup instead of single elements.
# two ways of doing this.
abc1 = dy.lookup_batch(lp, [1,2,3])
print(abc1.npvalue())

abc2 = lp.batch([1,2,3])
print(abc2.npvalue())

print(np.hstack([a,b,c]))


# use pick and pickneglogsoftmax in batch mode
# (must be used in conjunction with lookup_batch):
print("\nPick")
W = dy.parameter( m.add_parameters((5, 10)) )
h = W * lp.batch([1,2,3])
print(h.npvalue())
print(dy.pick_batch(h,[1,2,3]).npvalue())
print(dy.pick(W*lp[1],1).value(), dy.pick(W*lp[2],2).value(), dy.pick(W*lp[3],3).value())

# using pickneglogsoftmax_batch
print("\nPick neg log softmax")
print((-dy.log(dy.softmax(h))).npvalue())
print(dy.pickneglogsoftmax_batch(h,[1,2,3]).npvalue())
Exemplo n.º 42
0
  def parse(self, t, oracle_actions=None):
    dy.renew_cg()
    self.NULL_REP = self.WORDS_LOOKUP[self.nwords-1]
    if oracle_actions:
      oracle_actions = list(oracle_actions)
      oracle_actions.reverse()
    toks = list(t)
    toks.reverse()
    stack = []
    buffer = []
    W1 = dy.parameter(self.pW1)
    b1 = dy.parameter(self.pb1)
    W_act = dy.parameter(self.pW_act)
    b_act = dy.parameter(self.pb_act)
    losses = []
    for tok in toks:
      tok_embedding = self.WORDS_LOOKUP[tok]
      buffer.append(Head(self.vocab.i2w[tok], tok_embedding))

    while not (len(stack) == 1 and len(buffer) == 0):
      # based on parser state, get valid actions
      valid_actions = []
      if len(buffer) > 0:  # can only reduce if elements in buffer
        valid_actions += [SHIFT]
      if len(stack) >= 2:  # can only shift if 2 elements on stack
        valid_actions += [REDUCE_L, REDUCE_R]

      # compute probability of each of the actions and choose an action
      # either from the oracle or if there is no oracle, based on the model
      action = valid_actions[0]
      log_probs = None
      if len(valid_actions) > 1:
        representations = self.extract_features(stack, buffer)
        h = dy.cube(W1*dy.concatenate(representations) + b1)
        logits = W_act * h + b_act
        log_probs = dy.log_softmax(logits, valid_actions)
        if oracle_actions is None:
          action = max(enumerate(log_probs.vec_value()), key=itemgetter(1))[0]
      if oracle_actions is not None:
        action = oracle_actions.pop()
        if log_probs is not None:
          # append the action-specific loss
          losses.append(dy.pick(log_probs, action))

      # execute the action to update the parser state
      if action == SHIFT:
        token = buffer.pop()
        stack.append(token)
      else: # one of the reduce actions
        right = stack.pop()
        left = stack.pop()
        head, modifier = (left, right) if action == REDUCE_R else (right, left)
        #add the tokens and their embeddings into the children list
        if action == REDUCE_R:
          head.add_child(modifier, 'right')
        else:
          head.add_child(modifier, 'left')
        stack.append(head)
        if oracle_actions is None:
          print('{0} --> {1}'.format(head.word, modifier.word))

    # the head of the tree that remains at the top of the stack is now the root
    if oracle_actions is None:
      head = stack.pop().word
      print('ROOT --> {0}'.format(head))
    return -dy.esum(losses) if losses else None
Exemplo n.º 43
0
def CalculateLossForDaf(daf, fValidation=False, fRunning=False):
    dy.renew_cg()
    tagged_daf = {"words":[],"file":daf["file"]}
    daf = daf["words"]

    # add a bos before and after
    seq = ['*BOS*'] + list(' '.join([word for word, _, _, _ in daf])) + ['*BOS*']

    # get all the char encodings for the daf
    char_embeds = [let_enc(let) for let in seq]

    # run it through the bilstm
    char_bilstm_outputs = bilstm(char_embeds)

    # now iterate and get all the separate word representations by concatenating the bilstm output
    # before and after the word
    word_bilstm_outputs = []
    iLet_start = 0
    for iLet, char in enumerate(seq):
        # if it is a bos, check if it's at the end of the sequence
        if char == '*BOS*':
            if iLet + 1 == len(seq):
                char = ' '
            else:
                continue
        # if we are at a space, take this bilstm output and the one at the letter start
        if char == ' ':
            cur_word_bilstm_output = dy.concatenate([char_bilstm_outputs[iLet_start], char_bilstm_outputs[iLet]])
            # add it in
            word_bilstm_outputs.append(cur_word_bilstm_output)

            # set the iLet_start ocunter to here
            iLet_start = iLet

    # safe-check, make sure word bilstm outputs length is the same as the daf
    if len(word_bilstm_outputs) != len(daf):
        log_message('Size mismatch!! word_bilstm_outputs: ' + str(len(word_bilstm_outputs)) + ', daf: ' + str(len(daf)))

    prev_pos_lstm_state = prev_pos_lstm.initial_state().add_input(pos_enc('*BOS*'))

    all_losses = []
    pos_prec = 0.0
    rough_pos_prec = 0.0
    pos_items = 0
    class_prec = 0.0
    class_items = 0.0
    # now iterate through the bilstm outputs, and each word in the daf
    for (word, gold_word_class, gold_word_pos, gold_word_lang), bilstm_output in zip(daf, word_bilstm_outputs):
        should_backprop = gold_word_class == 1

        # create the mlp input, a concatenate of the bilstm output and of the prev pos output
        mlp_input = dy.concatenate([bilstm_output, prev_pos_lstm_state.output()])

        # run through the class mlp
        class_mlp_output = class_mlp(mlp_input)

        predicted_word_class = np.argmax(class_mlp_output.npvalue())
        confidence = np.max(class_mlp_output.npvalue()) / np.sum(class_mlp_output.npvalue())


        # prec
        if should_backprop:
            class_prec += 1 if predicted_word_class == gold_word_class else 0
            class_items += 1

        # if we aren't doing validation, calculate the loss
        if not fValidation and not fRunning:
            if should_backprop: all_losses.append(-dy.log(dy.pick(class_mlp_output, gold_word_class)))
            word_class_ans = gold_word_class
        # otherwise, set the answer to be the argmax
        else:
            word_class_ans = predicted_word_class

        # if the word_class answer is 1, do the pos!
        # alternatively, if validating and it's aramic, do the pos!
        if word_class_ans or (fValidation and gold_word_lang) or (fRunning and gold_word_lang):
            # run the pos mlp output
            pos_mlp_output = pos_mlp(mlp_input)
            try:
                temp_pos_array = pos_mlp_output.npvalue()
                possible_pos_array = np.zeros(temp_pos_array.shape)
                pos_list = pos_hashtable[word]
                # pos_list.add('') #concat 'unknown' as possible pos
                possible_pos_indices = [pos_vocab[temp_pos] for temp_pos in pos_list]
                possible_pos_array[possible_pos_indices] = temp_pos_array[possible_pos_indices]
            except KeyError:
                possible_pos_array = pos_mlp_output.npvalue()
                # if fValidation:
                #    possible_pos_array[pos_vocab['']] = 0.0 # don't allow validation to guess UNK b/c it never trained against that TODO this makes sense, right?

            predicted_word_pos = pos_vocab.getItem(np.argmax(possible_pos_array))
            confidence = np.max(possible_pos_array) / np.sum(possible_pos_array)
            # prec
            if should_backprop:
                pos_prec += 1 if predicted_word_pos == gold_word_pos else 0
                rough_pos_prec += 1 if predicted_word_pos[0] == gold_word_pos[0] else 0 # you got at least the rough pos right
                pos_items += 1

            # if we aren't doing validation, calculate the loss
            if not fValidation and not fRunning:
                if should_backprop: all_losses.append(-dy.log(dy.pick(pos_mlp_output, pos_vocab[gold_word_pos])))
                word_pos_ans = gold_word_pos
            # otherwise, set the answer to be the argmax
            elif not fRunning and fValidation:
                if should_backprop: pos_conf_matrix(pos_vocab[predicted_word_pos], pos_vocab[gold_word_pos])
                word_pos_ans = predicted_word_pos
            else:
                word_pos_ans = predicted_word_pos

            # run through the prev-pos-mlp
            predicted = predicted_word_pos
            prev_pos_lstm_state = prev_pos_lstm_state.add_input(pos_enc(word_pos_ans))
        # if the answer is 0, put a '' through the prev-pos lstm
        else:
            predicted = 'UNK'
            prev_pos_lstm_state = prev_pos_lstm_state.add_input(pos_enc(''))

        tagged_daf["words"].append({"word":word,"gold_pos":gold_word_pos,"gold_class":gold_word_class,"predicted":predicted,"confidence":confidence, "lang": gold_word_lang})

    if fRunning:
        return tagged_daf

    pos_prec = pos_prec / pos_items if pos_items > 0 else None
    rough_pos_prec = rough_pos_prec / pos_items if pos_items > 0 else None
    class_prec = class_prec / class_items if class_items > 0 else None

    if fValidation:
        return class_prec, pos_prec,tagged_daf, rough_pos_prec

    total_loss = dy.esum(all_losses) if len(all_losses) > 0 else None
    return total_loss, class_prec, pos_prec, rough_pos_prec
Exemplo n.º 44
0
def CalculateLossForDaf(daf, fValidation=False, fRunning=False):
    dy.renew_cg()
    tagged_daf = {"words": []}

    # add a bos before and after
    seq = ['*BOS*'] + list(' '.join([word for word, _ in daf])) + ['*BOS*']

    # get all the char encodings for the daf
    char_embeds = [let_enc(let) for let in seq]

    # run it through the bilstm
    char_bilstm_outputs = bilstm(char_embeds)

    # now iterate and get all the separate word representations by concatenating the bilstm output
    # before and after the word
    word_bilstm_outputs = []
    iLet_start = 0
    for iLet, char in enumerate(seq):
        # if it is a bos, check if it's at the end of the sequence
        if char == '*BOS*':
            if iLet + 1 == len(seq):
                char = ' '
            else:
                continue
        # if we are at a space, take this bilstm output and the one at the letter start
        if char == ' ':
            cur_word_bilstm_output = dy.concatenate([char_bilstm_outputs[iLet_start], char_bilstm_outputs[iLet]])
            # add it in
            word_bilstm_outputs.append(cur_word_bilstm_output)

            # set the iLet_start ocunter to here
            iLet_start = iLet

    # safe-check, make sure word bilstm outputs length is the same as the daf
    if len(word_bilstm_outputs) != len(daf):
        log_message('Size mismatch!! word_bilstm_outputs: ' + str(len(word_bilstm_outputs)) + ', daf: ' + str(len(daf)))

    prev_lang_lstm_state = prev_lang_lstm.initial_state().add_input(lang_enc('*BOS*'))

    all_losses = []
    lang_prec = 0.0
    lang_items = 0

    # now iterate through the bilstm outputs, and each word in the daf
    for (word, gold_word_lang), bilstm_output in zip(daf, word_bilstm_outputs):

        # create the mlp input, a concatenate of the bilstm output and of the prev pos output
        mlp_input = dy.concatenate([bilstm_output, prev_lang_lstm_state.output()])

        # run through the class mlp
        lang_mlp_output = lang_mlp(mlp_input)
        predicted_word_lang = lang_vocab.getItem(np.argmax(lang_mlp_output.npvalue()))
        confidence = np.max(lang_mlp_output.npvalue()) / np.sum(lang_mlp_output.npvalue())
        lang_prec += 1 if predicted_word_lang == gold_word_lang else 0
        lang_items += 1


        tagged_daf["words"].append(
            {"word": word, "predicted_lang": predicted_word_lang, "confidence": confidence})
        # if we aren't doing validation, calculate the loss
        if not fValidation and not fRunning:
            all_losses.append(-dy.log(dy.pick(lang_mlp_output, lang_vocab[gold_word_lang])))
            word_pos_ans = gold_word_lang
        # otherwise, set the answer to be the argmax
        elif not fRunning and fValidation:
            lang_conf_matrix(lang_vocab[predicted_word_lang], lang_vocab[gold_word_lang])
            word_pos_ans = predicted_word_lang
        else:
            continue

        # run through the prev-pos-mlp
        prev_lang_lstm_state = prev_lang_lstm_state.add_input(lang_enc(word_pos_ans))

        # prev_pos_lstm_state = prev_pos_lstm_state.add_input(pos_enc(''))



    lang_prec = lang_prec / lang_items if lang_items > 0 else None
    # class_prec = class_prec / class_items if class_items > 0 else None

    if fValidation:
        return lang_prec, tagged_daf

    if fRunning:
        return tagged_daf

    total_loss = dy.esum(all_losses) if len(all_losses) > 0 else None
    return total_loss, lang_prec