Пример #1
0
    def evaluate_network_from_embs(self, wembs, renew=True):
        params = self.params
        if renew:
            dy.renew_cg()
        builders = params["builders"]
        W = params["W"]
        v = params["v"]

        lstms = [b.initial_state() for b in builders]

        # wembs = [dy.noise(we, 0.1) for we in wembs]

        # running the first level for getting b

        fw_lstm1 = lstms[0].transduce(wembs)
        bw_lstm1 = reversed(lstms[1].transduce(reversed(wembs)))

        inputs_to_2nd_layer = [
            dy.concatenate([f, b]) for f, b in zip(fw_lstm1, bw_lstm1)
        ]

        fw_lstm2 = lstms[2].transduce(inputs_to_2nd_layer)
        bw_lstm2 = reversed(lstms[3].transduce(reversed(inputs_to_2nd_layer)))

        y = [dy.concatenate([f, b]) for f, b in zip(fw_lstm2, bw_lstm2)]
        tags_hat = [W * t + v for t in y]
        return tags_hat
Пример #2
0
    def build_representations_bi(self,
                                 sentence,
                                 training,
                                 prefix=[],
                                 do_not_renew=False):
        if not do_not_renew:
            dy.renew_cg(immediate_compute=True, check_validity=True)
        coded_sentence = self.vocabulary.code_sentence_cw(sentence, training)
        coded_prefix = self.vocabulary.code_sentence_cw(prefix, training)

        w_init_f = self.wrnn[F].initial_state()
        w_init_b = self.wrnn[B].initial_state()

        f_lstm_input = self.get_static_representations(coded_prefix +
                                                       coded_sentence)
        b_lstm_input = self.get_static_representations(
            coded_prefix + list(reversed(coded_sentence)))

        contextual_embeddings = [
            w_init_f.transduce(f_lstm_input),
            list(reversed(w_init_b.transduce(b_lstm_input)))
        ]

        return (dy.concatenate([
            contextual_embeddings[F][-1], contextual_embeddings[B][0]
        ]), [dy.concatenate(list(fb)) for fb in zip(*contextual_embeddings)])
Пример #3
0
    def forward(self, observations):
        # calculate forward pass
        def log_sum_exp(scores):
            npval = scores.npvalue()
            argmax_score = np.argmax(npval)
            max_score_expr = dynet.pick(scores, argmax_score)
            max_score_expr_broadcast = dynet.concatenate([max_score_expr] *
                                                         self.num_tags)
            return max_score_expr + dynet.logsumexp_dim(
                (scores - max_score_expr_broadcast), 0)

        init_alphas = [-1e10] * self.num_tags
        init_alphas[START_TAG] = 0
        for_expr = dynet.inputVector(init_alphas)
        for obs in observations:
            alphas_t = []
            for next_tag in range(self.num_tags):
                obs_broadcast = dynet.concatenate([dynet.pick(obs, next_tag)] *
                                                  self.num_tags)
                next_tag_expr = for_expr + self.trans_mat[
                    next_tag] + obs_broadcast
                alphas_t.append(log_sum_exp(next_tag_expr))
            for_expr = dynet.concatenate(alphas_t)
        terminal_expr = for_expr + self.trans_mat[END_TAG]
        alpha = log_sum_exp(terminal_expr)
        return alpha
Пример #4
0
    def __call__(self, h, s):
        # hT -> ((L, h_dim), B), s -> ((s_dim, L), B)
        if len(h.dim()[0]) == 2:
            L = h.dim()[0][1]
            if self.h_bias:
                s = dy.concatenate(
                    [s, dy.inputTensor(np.ones((1, L), dtype=np.float32))])
            if self.s_bias:
                h = dy.concatenate(
                    [h, dy.inputTensor(np.ones((1, L), dtype=np.float32))])
        else:
            if self.h_bias:
                s = dy.concatenate(
                    [s, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
            if self.s_bias:
                h = dy.concatenate(
                    [h, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
        hT = dy.transpose(h)
        lin = self.U * s  # ((h_dim*n_label, L), B)
        if self.n_label > 1:
            lin = dy.reshape(lin, (self.h_dim, self.n_label))

        blin = hT * lin
        if self.n_label == 1:
            return blin
        else:
            return dy.transpose(blin)
Пример #5
0
    def get_features(self, words, train=False, update=True):
        """
        get feature representations
        """
        # word embeddings
        wfeatures = np.array([
            self.get_w_repr(word, train=train, update=update) for word in words
        ])

        lex_features = []
        if self.dictionary and not self.type_constraint:
            ## add lexicon features
            lex_features = np.array(
                [self.get_lex_repr(word) for word in words])
        # char embeddings
        if self.c_in_dim > 0:
            cfeatures = [self.get_c_repr(word, train=train) for word in words]
            if len(lex_features) > 0:
                lex_features = dynet.inputTensor(lex_features)
                features = [
                    dynet.concatenate([w, c, l])
                    for w, c, l in zip(wfeatures, cfeatures, lex_features)
                ]
            else:
                features = [
                    dynet.concatenate([w, c])
                    for w, c in zip(wfeatures, cfeatures)
                ]
        else:
            features = wfeatures
        if train:  # only do at training time
            features = [dynet.noise(fe, self.noise_sigma) for fe in features]
        return features
 def get_pointergen_probs(self, c_t, state, x_t, a_t, probs, src1):
     if not self.pointer_gen:
         return probs, 1.0
     unk_idx = self.tgt_vocab.str2int(UNK)
     p_gen = dy.logistic(
         self.ptr_w_c * c_t
         + self.ptr_w_s * dy.concatenate(list(state.s()))
         + self.ptr_w_x * x_t
     )
     gen_probs = probs * p_gen
     copy_probs = a_t * (1 - p_gen)
     copy_probs_update = []
     for i in gen_probs:
         copy_probs_update.append([i])
     for char, prob in zip(src1, copy_probs):
         cur_idx = self.tgt_vocab.str2int(self.src1_vocab.int2str(char))
         if cur_idx == unk_idx:
             continue
         if isinstance(cur_idx, int):
             copy_probs_update[cur_idx].append(prob)
         else:
             for idx in cur_idx:
                 copy_probs_update[idx].append(prob / len(cur_idx))
     sum_probs = dy.concatenate([dy.esum(exps) for exps in copy_probs_update])
     return sum_probs, p_gen.scalar_value()
Пример #7
0
 def __call__(self, h, s):
     if self.h_bias:
         if len(h.dim()[0]) == 2:
             h = dy.concatenate([
                 h,
                 dy.inputTensor(
                     np.ones((1, h.dim()[0][1]), dtype=np.float32))
             ])
         else:
             h = dy.concatenate(
                 [h, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
     if self.s_bias:
         if len(s.dim()[0]) == 2:
             s = dy.concatenate([
                 s,
                 dy.inputTensor(
                     np.ones((1, s.dim()[0][1]), dtype=np.float32))
             ])
         else:
             s = dy.concatenate(
                 [s, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
     lin = self.U * s
     if self.n_label > 1:
         lin = dy.reshape(lin, (self.h_dim, self.n_label))
     blin = dy.transpose(h) * lin
     return blin
Пример #8
0
    def __call__(self,
                 inputs,
                 init_vecs=None,
                 dropout_x=0.,
                 dropout_h=0.,
                 train=False):
        batch_size = inputs[0].dim()[1]
        if not self.fb_fusion:
            if self.param_init:
                f, b = self.f.initial_state(self.f_init), self.b.initial_state(
                    self.b_init)
            elif init_vecs:
                f, b = self.f.initial_state(
                    init_vecs["fwd"]), self.b.initial_state(init_vecs["bwd"])
            else:
                f, b = self.f.initial_state(), self.b.initial_state()
            if train:
                self.f.set_dropouts(dropout_x, dropout_h)
                self.f.set_dropout_masks(batch_size)
                self.b.set_dropouts(dropout_x, dropout_h)
                self.b.set_dropout_masks(batch_size)
            else:
                self.f.set_dropouts(0., 0.)
                self.f.set_dropout_masks(batch_size)
                self.b.set_dropouts(0., 0.)
                self.b.set_dropout_masks(batch_size)
            f_in, b_in = inputs, reversed(inputs)
            f_out, b_out = f.add_inputs(f_in), b.add_inputs(b_in)
            f_last, b_last = f_out[-1].s(), b_out[-1].s()
            f_out, b_out = [state.h()[-1] for state in f_out
                            ], [state.h()[-1] for state in b_out]
            out = [
                dy.concatenate([f, b]) for f, b in zip(f_out, reversed(b_out))
            ]
            last = [dy.concatenate([f, b]) for f, b in zip(f_last, b_last)]
            return (last, out)

        else:
            for f_lstm, b_lstm in self.DeepBiLSTM:
                f, b = f_lstm.initial_state(update=True), b_lstm.initial_state(
                    update=True)
                if train:
                    f_lstm.set_dropouts(dropout_x, dropout_h)
                    f_lstm.set_dropout_masks(batch_size)
                    b_lstm.set_dropouts(dropout_x, dropout_h)
                    b_lstm.set_dropout_masks(batch_size)
                else:
                    f_lstm.set_dropouts(0., 0.)
                    f_lstm.set_dropout_masks(batch_size)
                    b_lstm.set_dropouts(0., 0.)
                    b_lstm.set_dropout_masks(batch_size)
                fs, bs = f.transduce(inputs), b.transduce(reversed(inputs))
                inputs = [
                    dy.concatenate([f, b]) for f, b in zip(fs, reversed(bs))
                ]
            return inputs
Пример #9
0
        def process_one_instance(instance,
                                 update=True,
                                 x_y_vectors=None,
                                 features=None,
                                 mode='train'):
            lemma_lookup = self.model_parameters['lemma_lookup']
            if self.opt['use_path']:
                pos_lookup = self.model_parameters['pos_lookup']
                dep_lookup = self.model_parameters['dep_lookup']
                dir_lookup = self.model_parameters['dir_lookup']
                # Add the empty path
                paths = instance
                if len(paths) == 0:
                    paths[EMPTY_PATH] = 1

                # Compute the averaged path
                num_paths = reduce(lambda x, y: x + y, instance.itervalues())
                path_embeddings = [
                    self.get_path_embedding_from_cache(
                        lemma_lookup, pos_lookup, dep_lookup, dir_lookup, path,
                        update, mode) * count
                    for path, count in instance.iteritems()
                ]
                input_vec = dy.esum(path_embeddings) * (1.0 / num_paths)

            # Concatenate x and y embeddings
            if self.opt['use_xy_embeddings']:
                x_vector, y_vector = dy.lookup(lemma_lookup,
                                               x_y_vectors[0]), dy.lookup(
                                                   lemma_lookup,
                                                   x_y_vectors[1])
                if self.opt['use_path']:
                    input_vec = dy.concatenate([x_vector, input_vec, y_vector])
                else:
                    input_vec = dy.concatenate([x_vector, y_vector])
            if self.opt['use_features']:
                for k in feat_dims:
                    if 'diff' in k and not self.opt['use_freq_features']:
                        continue
                    feat = dy.lookup(self.model_parameters[k], features[k])
                    input_vec = dy.concatenate([input_vec, feat])

            if self.opt['use_height_ebd']:
                if j in tree.term_height:
                    h = tree.get_height(j) - 1
                else:
                    h = 0
                height_vector = dy.lookup(
                    self.model_parameters['height_lookup'], h)
                input_vec = dy.concatenate([input_vec, height_vector])
            return input_vec
Пример #10
0
    def set_initial_states(self, x):
        self.xt_embs = [dy.lookup(self.F, x_t) for x_t in x]

        if self.encoder_type == 'bow':
            self.W_enc = self.W * dy.average(self.xt_embs)

        elif self.encoder_type == 'attention':
            self.xb = dy.concatenate([
                dy.esum(self.xt_embs[max(i - self.q, 0
                                         ):min(len(x) - 1 + 1, i + self.q +
                                               1)]) / self.q
                for i in range(len(x))
            ],
                                     d=1)
            self.xt = dy.transpose(dy.concatenate(self.xt_embs, d=1))
 def attend(self, input_mat, state, w1dt, w2, v, coverage):
     w2dt = w2 * dy.concatenate(list(state.s()))
     if coverage:
         w1dt = w1dt + self.w_cov * dy.transpose(coverage)
     a_t = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt)))
     a_t = dy.softmax(a_t)
     return a_t, (input_mat * a_t)
 def encode(self, embeds, fwd_lstm, bwd_lstm):
     embeds_rev = list(reversed(embeds))
     fwd_vectors = self.run_lstm(fwd_lstm.initial_state(), embeds)
     bwd_vectors = self.run_lstm(bwd_lstm.initial_state(), embeds_rev)
     bwd_vectors = list(reversed(bwd_vectors))
     vectors = [dy.concatenate(list(p)) for p in zip(fwd_vectors, bwd_vectors)]
     return vectors
Пример #13
0
    def calc_compare(self, a_vecs, b_vecs, alphas, betas, dropout):
        ### not batched at the moment
        l_a = a_vecs.dim()[1]
        l_b = b_vecs.dim()[1]
        v1_i = [
            self.compare.evaluate_network(
                dy.concatenate([dy.pick_batch_elem(a_vecs, i), betas[i]]),
                True, dropout) for i in range(l_a)
        ]
        v2_j = [
            self.compare.evaluate_network(
                dy.concatenate([dy.pick_batch_elem(b_vecs, j), alphas[j]]),
                True, dropout) for j in range(l_b)
        ]

        return v1_i, v2_j
Пример #14
0
    def __call__(self, sentence, c2i, maxn_char, act, train=False):
        words_batch = []
        for token in sentence:
            chars_emb = [self.clookup[int(c2i.get(c, 0))] for c in token.chars]
            c2w = dy.concatenate_cols(chars_emb)
            c2w = dy.reshape(c2w, tuple(list(c2w.dim()[0]) + [1]))
            words_batch.append(c2w)

        words_batch = dy.concatenate_to_batch(words_batch)
        convds = [
            dy.conv2d(words_batch, W, stride=(1, 1), is_valid=True)
            for W in self.Ws
        ]
        actds = [act(convd) for convd in convds]
        poolds = [
            dy.maxpooling2d(actd,
                            ksize=(1, maxn_char - win_size + 1),
                            stride=(1, 1))
            for win_size, actd in zip(self.win_sizes, actds)
        ]
        words_batch = [
            dy.reshape(poold, (poold.dim()[0][2], )) for poold in poolds
        ]
        words_batch = dy.concatenate([out for out in words_batch])

        c2w_emb = []
        for idx, token in enumerate(sentence):
            c2w_emb.append(dy.pick_batch_elem(words_batch, idx))
        return c2w_emb
Пример #15
0
 def __call__(self, embeds, masks):
     # embeds: list(step) of {(n_emb, ), batch_size}, using padding for batches
     b_size = bs(embeds[0])
     outputs = [embeds]
     # # todo(warn), disable masks for speeding up (although might not be critical)
     # masks = [None for _ in masks]
     for i, nn in zip(range(self.n_layers), self.nodes):
         init_hidden = dy.zeroes((self.n_hidden, ), batch_size=b_size)
         tmp_f = []  # forward
         tmp_f_prev = {"H": init_hidden, "C": init_hidden}
         for e, m in zip(outputs[-1], masks):
             one_output = nn[0](e, tmp_f_prev, m)
             tmp_f.append(one_output["H"])
             tmp_f_prev = one_output
         tmp_b = []  # forward
         tmp_b_prev = {"H": init_hidden, "C": init_hidden}
         for e, m in zip(reversed(outputs[-1]), reversed(masks)):
             one_output = nn[1](e, tmp_b_prev, m)
             tmp_b.append(one_output["H"])
             tmp_b_prev = one_output
         # concat
         ctx = [
             dy.concatenate([f, b]) for f, b in zip(tmp_f, reversed(tmp_b))
         ]
         outputs.append(ctx)
     return outputs[-1]
Пример #16
0
def get_path_embedding(builder,
                       lemma_lookup,
                       pos_lookup,
                       dep_lookup,
                       dir_lookup,
                       path,
                       update=True,
                       drop=0.0):
    """
    Get a vector representing a path
    :param builder: the LSTM builder
    :param lemma_lookup: the lemma embeddings lookup table
    :param pos_lookup: the part-of-speech embeddings lookup table
    :param dep_lookup: the dependency label embeddings lookup table
    :param dir_lookup: the direction embeddings lookup table
    :param path: sequence of edges
    :param update: whether to update the lemma embeddings
    :return: a vector representing a path
    """

    # Concatenate the edge components to one vector
    inputs = [
        dy.concatenate([
            word_dropout(lemma_lookup, edge[0], drop, update),
            word_dropout(pos_lookup, edge[1], drop),
            word_dropout(dep_lookup, edge[2], drop),
            word_dropout(dir_lookup, edge[3], drop)
        ]) for edge in path
    ]

    return builder.initial_state().transduce(inputs)[-1]
Пример #17
0
    def get_label_scores(self, lstm_outputs, left, right):
        '''
            Get label scores and fix the score of empty label to zero.
        '''

        non_empty_label_scores = self.f_label(
            self.get_span_encoding(lstm_outputs, left, right))
        return dy.concatenate([dy.zeros(1), non_empty_label_scores])
Пример #18
0
 def log_sum_exp(scores):
     npval = scores.npvalue()
     argmax_score = np.argmax(npval)
     max_score_expr = dynet.pick(scores, argmax_score)
     max_score_expr_broadcast = dynet.concatenate([max_score_expr] *
                                                  self.num_tags)
     return max_score_expr + dynet.logsumexp_dim(
         (scores - max_score_expr_broadcast), 0)
Пример #19
0
    def get_span_encoding(self, lstm_outputs, left, right):
        '''
            Get the span representation using the difference of lstm_outputs of left and right.
        '''

        forward = (lstm_outputs[right + 1][:self.lstm_dim] -
                   lstm_outputs[left][:self.lstm_dim])
        backward = (lstm_outputs[left + 1][self.lstm_dim:] -
                    lstm_outputs[right + 2][self.lstm_dim:])
        return dy.concatenate([forward, backward])
Пример #20
0
    def get_top_k_paths(self, all_paths, relation_index, threshold):
        """
        Get the top k scoring paths
        """
        builder = self.builder
        model = self.model
        model_parameters = self.model_parameters
        lemma_lookup = model_parameters['lemma_lookup']
        pos_lookup = model_parameters['pos_lookup']
        dep_lookup = model_parameters['dep_lookup']
        dir_lookup = model_parameters['dir_lookup']

        path_scores = []

        for i, path in enumerate(all_paths):

            if i % 1000 == 0:
                cg = dy.renew_cg()
                W1 = dy.parameter(model_parameters['W1'])
                b1 = dy.parameter(model_parameters['b1'])
                W2 = None
                b2 = None

                if self.num_hidden_layers == 1:
                    W2 = dy.parameter(model_parameters['W2'])
                    b2 = dy.parameter(model_parameters['b2'])

            path_embedding = get_path_embedding(builder, lemma_lookup,
                                                pos_lookup, dep_lookup,
                                                dir_lookup, path)

            if self.use_xy_embeddings:
                zero_word = dy.inputVector([0.0] * self.lemma_embeddings_dim)
                path_embedding = dy.concatenate(
                    [zero_word, path_embedding, zero_word])

            h = W1 * path_embedding + b1

            if self.num_hidden_layers == 1:
                h = W2 * dy.tanh(h) + b2

            path_score = dy.softmax(h).npvalue().T
            path_scores.append(path_score)

        path_scores = np.vstack(path_scores)

        top_paths = []
        for i in range(len(relation_index)):
            indices = np.argsort(-path_scores[:, i])
            top_paths.append([
                (all_paths[index], path_scores[index, i]) for index in indices
                if threshold is None or path_scores[index, i] >= threshold
            ])

        return top_paths
Пример #21
0
    def attend(self, encoded_inputs, h_t, input_masks=None):
        # encoded_inputs dimension is: seq len x 2*h x batch size, h_t dimension is h x batch size (for bilstm encoder)
        if len(encoded_inputs) == 1:
            # no need to attend if only one input state, compute output directly
            h_output = dn.tanh(self.w_c *
                               dn.concatenate([h_t, encoded_inputs[0]]))
            # return trivial alphas (all 1's since one input gets all attention)
            if input_masks:
                # if batching
                alphas = dn.inputTensor([1] * len(input_masks[0]),
                                        batched=True)
            else:
                alphas = dn.inputTensor([1], batched=True)
            return h_output, alphas

        # iterate through input states to compute attention scores
        # scores = [v_a * dn.tanh(w_a * h_t + u_a * h_input) for h_input in blstm_outputs]
        w_a_h_t = self.w_a * h_t
        scores = [
            self.v_a *
            dn.tanh(dn.affine_transform([w_a_h_t, self.u_a, h_input]))
            for h_input in encoded_inputs
        ]

        concatenated = dn.concatenate(scores)
        if input_masks:
            # if batching, multiply attention scores with input masks to zero-out scores for padded inputs
            dn_masks = dn.inputTensor(input_masks, batched=True)
            concatenated = dn.cmult(concatenated, dn_masks)

        # normalize scores
        alphas = dn.softmax(concatenated)

        # compute context vector with weighted sum for each seq in batch
        bo = dn.concatenate_cols(encoded_inputs)
        c = bo * alphas
        # c = dn.esum([h_input * dn.pick(alphas, j) for j, h_input in enumerate(blstm_outputs)])

        # compute output vector using current decoder state and context vector
        h_output = dn.tanh(self.w_c * dn.concatenate([h_t, c]))

        return h_output, alphas
Пример #22
0
def build_network(params, x_data):
    _, E, b, U, W, bp = params
    if type(x_data) == dict:
        # print("DICT")
        prefix_ordinals = x_data['prefix']
        suffix_ordinals = x_data['suffix']
        x_ordinals = x_data['fullwords']
    else:
        prefix_ordinals = None
        suffix_ordinals = None
        x_ordinals = x_data
    x = dy.concatenate([E[ord] for ord in x_ordinals])
    if prefix_ordinals:
        x_pre = dy.concatenate([E[ord] for ord in prefix_ordinals])
        x = x + x_pre
    if suffix_ordinals:
        x_suf = dy.concatenate([E[ord] for ord in suffix_ordinals])
        x = x + x_suf
    output = dy.softmax(U * (dy.tanh(W * x + b)) + bp)
    return output
Пример #23
0
    def __call__(self, x_embs):
        x_len = len(x_embs)

        # BiGRU
        hf = dy.concatenate_cols(
            self.fGRUBuilder.initial_state().transduce(x_embs))
        hb = dy.concatenate_cols(self.bGRUBuilder.initial_state().transduce(
            x_embs[::-1])[::-1])
        h = dy.concatenate([hf, hb])

        # Selective Gate
        hb_1 = dy.pick(hb, index=0, dim=1)
        hf_n = dy.pick(hf, index=x_len - 1, dim=1)
        s = dy.concatenate([hb_1, hf_n])

        # Selection
        sGate = dy.logistic(dy.colwise_add(self.Ws * h, self.Us * s + self.bs))
        hp = dy.cmult(h, sGate)

        return hp, hb_1
Пример #24
0
 def __call__(self, h, s):
     # hT -> ((L, h_dim), B), s -> ((s_dim, L), B)
     hT = dy.transpose(h)
     lin = self.U * s  # ((h_dim*n_label, L), B)
     if self.n_label > 1:
         lin = dy.reshape(lin, (self.h_dim, self.n_label))
     blin = hT * lin
     if self.n_label == 1:
         return blin + (hT * self.B if self.bias else 0)
     else:
         return dy.transpose(blin) + (self.V * dy.concatenate([h, s]) +
                                      self.B if self.bias else 0)
 def get_coverage(self, a_t, prev_coverage, training=True):
     if not self.coverage:
         if not training:
             return None
         return dy.scalarInput(0), None
     coverage = a_t + prev_coverage
     if training:
         return (
             dy.sum_elems(dy.min_dim(dy.concatenate([a_t, coverage], d=1), d=1)),
             coverage,
         )
     return coverage
Пример #26
0
    def evaluate_network_from_sentence(self, sentence):
        char_coded_sentence = self.p3b.encode_sentence(sentence)
        char_lstm_vectors = self.p3b.construct_vector(char_coded_sentence)

        word_coded_sentecne = self.encoder.encode_sentence_words(sentence)
        E = self.params["E"]
        word_embed_vectors = [E[w] for w, _ in word_coded_sentecne]

        concat_vec = [
            dy.concatenate([e, c])
            for e, c in zip(word_embed_vectors, char_lstm_vectors)
        ]
        return self.common.evaluate_network_from_embs(concat_vec, False)
Пример #27
0
 def _feed_one(self, s, inputs, caches, prev_embeds):
     # first layer with attetion
     next_caches = self.anode(s, caches["hid"][0]["H"], caches)
     g_input = dy.concatenate([inputs, next_caches["ctx"]])
     hidd = self.gnodes[0](g_input, caches["hid"][0])
     this_hiddens = [hidd]
     # later layers
     for i in range(1, self.n_layers):
         ihidd = self.gnodes[i](this_hiddens[i - 1]["H"], caches["hid"][i])
         this_hiddens.append(ihidd)
     # append and return
     next_caches["hid"] = this_hiddens
     return next_caches
Пример #28
0
    def forward(self, observations):
        # calculate forward pass
        def log_sum_exp(scores):
            npval = scores.npvalue()
            argmax_score = np.argmax(npval)
            max_score_expr = dynet.pick(scores, argmax_score)
            max_score_expr_broadcast = dynet.concatenate([max_score_expr] * self.num_tags)
            return max_score_expr + dynet.logsumexp_dim((scores - max_score_expr_broadcast),0)

        init_alphas = [-1e10] * self.num_tags
        init_alphas[START_TAG] = 0
        for_expr = dynet.inputVector(init_alphas)
        for obs in observations:
            alphas_t = []
            for next_tag in range(self.num_tags):
                obs_broadcast = dynet.concatenate([dynet.pick(obs, next_tag)] * self.num_tags)
                next_tag_expr = for_expr + self.trans_mat[next_tag] + obs_broadcast
                alphas_t.append(log_sum_exp(next_tag_expr))
            for_expr = dynet.concatenate(alphas_t)
        terminal_expr = for_expr + self.trans_mat[END_TAG]
        alpha = log_sum_exp(terminal_expr)
        return alpha
Пример #29
0
    def get_embeddings(self,
                       word_inds,
                       tag_inds,
                       is_train=False,
                       train_bert_embedding=None):
        if is_train:
            self.char_lstm.set_dropout(self.dropout)
        else:
            self.char_lstm.disable_dropout()

        embeddings = []
        for idx, (w, t) in enumerate(zip(word_inds, tag_inds)):
            if w > 2:
                count = self.vocab.word_freq_list[w]
                if not count or (is_train
                                 and np.random.rand() < self.unk_param /
                                 (self.unk_param + count)):
                    w = 0

            tag_embedding = self.tag_embeddings[t]
            chars = list(self.vocab.i2w[w]) if w > 2 else [self.vocab.i2w[w]]
            char_lstm_outputs = self.char_lstm.transduce([
                self.char_embeddings[self.vocab.c2i[char]]
                for char in [Vocabulary.START] + chars + [Vocabulary.STOP]
            ])
            char_embedding = dy.concatenate([
                char_lstm_outputs[-1][:self.char_lstm_dim],
                char_lstm_outputs[0][self.char_lstm_dim:]
            ])
            word_embedding = self.word_embeddings[w]
            embs = [tag_embedding, char_embedding, word_embedding]
            if train_bert_embedding is not None:
                if w != 0:
                    embs.append(dy.inputTensor(train_bert_embedding[idx]))
                else:
                    embs.append(dy.zeros(768))
            embeddings.append(dy.concatenate(embs))

        return embeddings
    def decode_loss(self, src1, src2, tgt):
        src1_mat, src2_mat, src1_w1dt, src2_w1dt, decoder_state = self.encoder_forward(
            src1, src2
        )
        _, prev_coverage = self.get_coverage(
            a_t=dy.vecInput(len(src1)), prev_coverage=dy.vecInput(len(src1))
        )

        loss = []
        cov_loss = []
        diag_loss = []

        embedded_tgt = self.embed_idx(tgt, self.tgt_lookup)
        last_output_embeddings = self.tgt_lookup[self.tgt_vocab.str2int(EOS)]

        for t, (char, embedded_char) in enumerate(zip(tgt, embedded_tgt)):
            a_t, c1_t = self.attend(
                src1_mat,
                decoder_state,
                src1_w1dt,
                self.att1_w2,
                self.att1_v,
                prev_coverage,
            )
            if not self.single_source:
                _, c2_t = self.attend(
                    src2_mat, decoder_state, src2_w1dt, self.att2_w2, self.att2_v, None
                )
            else:
                c2_t = dy.vecInput(2 * HIDDEN_DIM)

            x_t = dy.concatenate([c1_t, c2_t, last_output_embeddings])
            decoder_state = decoder_state.add_input(x_t)

            out_vector = self.dec_w * decoder_state.output() + self.dec_b
            probs = dy.softmax(out_vector)
            probs, _ = self.get_pointergen_probs(
                c1_t, decoder_state, x_t, a_t, probs, src1
            )

            loss.append(-dy.log(dy.pick(probs, char)))
            cov_loss_cur, prev_coverage = self.get_coverage(a_t, prev_coverage)
            cov_loss.append(cov_loss_cur)
            diag_loss.append(self.get_diag_loss(a_t, t))

            last_output_embeddings = embedded_char

        loss = dy.esum(loss)
        cov_loss = dy.esum(cov_loss)
        diag_loss = dy.esum(diag_loss)
        return loss + COV_LOSS_WEIGHT * cov_loss + DIAG_LOSS_WEIGHT * diag_loss
    def encoder_forward(self, src1, src2):
        embedded_src1 = self.embed_idx(src1, self.src1_lookup)
        if self.single_source:
            embedded_src2 = [dy.vecInput(EMBEDDING_DIM) for idx in src2]
        else:
            embedded_src2 = self.embed_idx(src2, self.src2_lookup)

        encoded_src1 = self.encode(
            embedded_src1, self.enc1_fwd_lstm, self.enc1_bwd_lstm
        )
        encoded_src2 = self.encode(
            embedded_src2, self.enc2_fwd_lstm, self.enc2_bwd_lstm
        )

        src1_mat = dy.concatenate_cols(encoded_src1)
        src1_w1dt = self.att1_w1 * src1_mat
        src2_mat = dy.concatenate_cols(encoded_src2)
        src2_w1dt = self.att2_w1 * src2_mat

        if not self.single_source:
            start = (
                self.W_s * dy.concatenate([encoded_src1[-1], encoded_src2[-1]])
                + self.b_s
            )
        else:
            start = (
                self.W_s
                * dy.concatenate([encoded_src1[-1], dy.vecInput(2 * HIDDEN_DIM)])
                + self.b_s
            )

        last_output_embeddings = self.tgt_lookup[self.tgt_vocab.str2int(EOS)]
        c1_t = dy.vecInput(2 * HIDDEN_DIM)
        c2_t = dy.vecInput(2 * HIDDEN_DIM)
        decoder_state = self.dec_lstm.initial_state([start, dy.tanh(start)]).add_input(
            dy.concatenate([c1_t, c2_t, last_output_embeddings])
        )
        return src1_mat, src2_mat, src1_w1dt, src2_w1dt, decoder_state
Пример #32
0
    def get_features(self, words, train=False, update=True):
        """
        get feature representations
        """
        # word embeddings
        wfeatures = np.array([self.get_w_repr(word, train=train, update=update) for word in words])

        lex_features = []
        if self.dictionary and not self.type_constraint:
            ## add lexicon features
            lex_features = np.array([self.get_lex_repr(word) for word in words])
        # char embeddings
        if self.c_in_dim > 0:
            cfeatures = [self.get_c_repr(word, train=train) for word in words]
            if len(lex_features) > 0:
                lex_features = dynet.inputTensor(lex_features)
                features = [dynet.concatenate([w,c,l]) for w,c,l in zip(wfeatures,cfeatures,lex_features)]
            else:
                features = [dynet.concatenate([w, c]) for w, c in zip(wfeatures, cfeatures)]
        else:
            features = wfeatures
        if train: # only do at training time
            features = [dynet.noise(fe,self.noise_sigma) for fe in features]
        return features
Пример #33
0
    def get_c_repr(self, word, train=False):
        """
        Get representation of word via characters sub-LSTMs
        """
        # get representation for words
        if word in self.w2c_cache:
            chars_of_token = self.w2c_cache[word]
            if train:
                chars_of_token = [drop(c, self.ccount, self.c_dropout_rate) for c in chars_of_token]
        else:
            chars_of_token = array.array('I',[self.c2i[WORD_START]]) + array.array('I',[self.get_c_idx(c, train=train) for c in word]) + array.array('I',[self.c2i[WORD_END]])

        char_feats = [self.cembeds[c_id] for c_id in chars_of_token]
        # use last state as word representation
        f_char, b_char = self.char_rnn.predict_sequence(char_feats, char_feats)
        return dynet.concatenate([f_char[-1], b_char[-1]])
Пример #34
0
    def predict(self, seq, train=False, output_confidences=False, unk_tag=None, update_embeds=True):
        """
        predict tags for a sentence represented as char+word embeddings and compute losses for this instance
        """
        if not train:
            dynet.renew_cg()
        features = self.get_features(seq.words, train=train, update=update_embeds)

        output_expected_at_layer = self.predictors["task_expected_at"][seq.task_id]
        output_expected_at_layer -=1

        # go through layers
        # input is now combination of w + char emb
        prev = features
        prev_rev = features
        num_layers = self.h_layers

        for i in range(0,num_layers):
            predictor = self.predictors["inner"][i]
            forward_sequence, backward_sequence = predictor.predict_sequence(prev, prev_rev)        
            if i > 0 and self.activation:
                # activation between LSTM layers
                forward_sequence = [self.activation(s) for s in forward_sequence]
                backward_sequence = [self.activation(s) for s in backward_sequence]

            if i == output_expected_at_layer:
                output_predictor = self.predictors["output_layers_dict"][seq.task_id]
                concat_layer = [dynet.concatenate([f, b]) for f, b in zip(forward_sequence,reversed(backward_sequence))]

                if train and self.noise_sigma > 0.0:
                    concat_layer = [dynet.noise(fe,self.noise_sigma) for fe in concat_layer]
                # fill-in predictions and get loss per tag
                losses = output_predictor.predict_sequence(seq, concat_layer,
                                                           train=train, output_confidences=output_confidences,
                                                           unk_tag=unk_tag, dictionary=self.dictionary,
                                                           type_constraint=self.type_constraint)

            prev = forward_sequence
            prev_rev = backward_sequence 

        if train:
            # return losses
            return losses
        else:
            return seq.pred_tags, seq.tag_confidences
Пример #35
0
    def viterbi(self, observations, unk_tag=None, dictionary=None):
        #if dictionary:
        #    raise NotImplementedError("type constraints not yet implemented for CRF")
        backpointers = []
        init_vvars   = [-1e10] * self.num_tags
        init_vvars[START_TAG] = 0 # <Start> has all the probability
        for_expr     = dynet.inputVector(init_vvars)
        trans_exprs  = [self.trans_mat[idx] for idx in range(self.num_tags)]
        for obs in observations:
            bptrs_t = []
            vvars_t = []
            for next_tag in range(self.num_tags):
                next_tag_expr = for_expr + trans_exprs[next_tag]
                next_tag_arr = next_tag_expr.npvalue()
                best_tag_id  = np.argmax(next_tag_arr)
                if unk_tag:
                    best_tag = self.index2tag[best_tag_id]
                    if best_tag == unk_tag:
                        next_tag_arr[np.argmax(next_tag_arr)] = 0 # set to 0
                        best_tag_id = np.argmax(next_tag_arr) # get second best

                bptrs_t.append(best_tag_id)
                vvars_t.append(dynet.pick(next_tag_expr, best_tag_id))
            for_expr = dynet.concatenate(vvars_t) + obs
            backpointers.append(bptrs_t)
        # Perform final transition to terminal
        terminal_expr = for_expr + trans_exprs[END_TAG]
        terminal_arr  = terminal_expr.npvalue()
        best_tag_id   = np.argmax(terminal_arr)
        path_score    = dynet.pick(terminal_expr, best_tag_id)
        # Reverse over the backpointers to get the best path
        best_path = [best_tag_id] # Start with the tag that was best for terminal
        for bptrs_t in reversed(backpointers):
            best_tag_id = bptrs_t[best_tag_id]
            best_path.append(best_tag_id)
        start = best_path.pop() # Remove the start symbol
        best_path.reverse()
        assert start == START_TAG
        # Return best path and best path's score
        return best_path, path_score
Пример #36
0
 def log_sum_exp(scores):
     npval = scores.npvalue()
     argmax_score = np.argmax(npval)
     max_score_expr = dynet.pick(scores, argmax_score)
     max_score_expr_broadcast = dynet.concatenate([max_score_expr] * self.num_tags)
     return max_score_expr + dynet.logsumexp_dim((scores - max_score_expr_broadcast),0)