示例#1
0
 def __call__(self, h, s):
     if self.h_bias:
         if len(h.dim()[0]) == 2:
             h = dy.concatenate([
                 h,
                 dy.inputTensor(
                     np.ones((1, h.dim()[0][1]), dtype=np.float32))
             ])
         else:
             h = dy.concatenate(
                 [h, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
     if self.s_bias:
         if len(s.dim()[0]) == 2:
             s = dy.concatenate([
                 s,
                 dy.inputTensor(
                     np.ones((1, s.dim()[0][1]), dtype=np.float32))
             ])
         else:
             s = dy.concatenate(
                 [s, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
     lin = self.U * s
     if self.n_label > 1:
         lin = dy.reshape(lin, (self.h_dim, self.n_label))
     blin = dy.transpose(h) * lin
     return blin
示例#2
0
    def __call__(self, h, s):
        # hT -> ((L, h_dim), B), s -> ((s_dim, L), B)
        if len(h.dim()[0]) == 2:
            L = h.dim()[0][1]
            if self.h_bias:
                s = dy.concatenate(
                    [s, dy.inputTensor(np.ones((1, L), dtype=np.float32))])
            if self.s_bias:
                h = dy.concatenate(
                    [h, dy.inputTensor(np.ones((1, L), dtype=np.float32))])
        else:
            if self.h_bias:
                s = dy.concatenate(
                    [s, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
            if self.s_bias:
                h = dy.concatenate(
                    [h, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
        hT = dy.transpose(h)
        lin = self.U * s  # ((h_dim*n_label, L), B)
        if self.n_label > 1:
            lin = dy.reshape(lin, (self.h_dim, self.n_label))

        blin = hT * lin
        if self.n_label == 1:
            return blin
        else:
            return dy.transpose(blin)
示例#3
0
 def __call__(self, input_exp, hidden_exp, mask=None):
     # two kinds of dropouts
     if self.idrop > 0.:
         input_exp = dy.dropout(input_exp, self.idrop)
     input_exp_g = input_exp_t = input_exp
     hidden_exp_g = hidden_exp_t = hidden_exp["H"]
     if self.gdrop > 0.:
         input_exp_g = dy.cmult(input_exp, self.masks[0])
         hidden_exp_g = dy.cmult(hidden_exp_g, self.masks[1])
         input_exp_t = dy.cmult(input_exp, self.masks[2])
         hidden_exp_t = dy.cmult(hidden_exp_t, self.masks[3])
     rzt = dy.affine_transform([
         self.iparams["brz"], self.iparams["x2rz"], input_exp_g,
         self.iparams["h2rz"], hidden_exp_g
     ])
     rzt = dy.logistic(rzt)
     rt, zt = dy.pick_range(rzt, 0, self.n_hidden), BK.pick_range(
         rzt, self.n_hidden, 2 * self.n_hidden)
     h_reset = dy.cmult(rt, hidden_exp_t)
     ht = dy.affine_transform([
         self.iparams["bh"], self.iparams["x2h"], input_exp_t,
         self.iparams["h2h"], h_reset
     ])
     ht = dy.tanh(ht)
     hidden = dy.cmult(zt, hidden_exp["H"]) + dy.cmult(
         (1. - zt), ht)  # first one use original hh
     # mask: if 0 then pass through
     if mask is not None:
         mask_array = np.asarray(mask).reshape((1, -1))
         m1 = dy.inputTensor(mask_array, True)  # 1.0 for real words
         m0 = dy.inputTensor(1.0 - mask_array,
                             True)  # 1.0 for padding words (mask=0)
         hidden = hidden * m1 + hidden_exp["H"] * m0
     return {"H": hidden}
示例#4
0
    def get_features(self, words, train=False, update=True):
        """
        get feature representations
        """
        # word embeddings
        wfeatures = np.array([
            self.get_w_repr(word, train=train, update=update) for word in words
        ])

        lex_features = []
        if self.dictionary and not self.type_constraint:
            ## add lexicon features
            lex_features = np.array(
                [self.get_lex_repr(word) for word in words])
        # char embeddings
        if self.c_in_dim > 0:
            cfeatures = [self.get_c_repr(word, train=train) for word in words]
            if len(lex_features) > 0:
                lex_features = dynet.inputTensor(lex_features)
                features = [
                    dynet.concatenate([w, c, l])
                    for w, c, l in zip(wfeatures, cfeatures, lex_features)
                ]
            else:
                features = [
                    dynet.concatenate([w, c])
                    for w, c in zip(wfeatures, cfeatures)
                ]
        else:
            features = wfeatures
        if train:  # only do at training time
            features = [dynet.noise(fe, self.noise_sigma) for fe in features]
        return features
示例#5
0
    def evaluate_network(self, x_np, apply_final_activation=True, dropout=False):
        """
        return an expression that is the result of feeding the input through the entire 
        network, except the last activation
        """
        #self.check_input_size(x_np)
        n_stages = self.n_layers-1

        # will be skipped for x_np that are already 
        # _dynet.__tensorInputExpression or _dynet._vecInputExpression
        if type(x_np) == np.ndarray:
            print "ndarray"
            x = dy.vecInput()
            x.set(x_np)
        elif type(x_np) == list:
            print "list"
            x = dy.inputTensor(x_np, batched = True)
        else:
            x = x_np
        final_activation = self.output_activation if apply_final_activation else lambda x: x
        activation = self.hidden_activation
        for i, W, b in zip(range(n_stages), self.params["W"], self.params["b"]):
            #print "i", i
            if i == n_stages-1:
                    # print "final layer"
                    activation = final_activation
            x = activation(W*x + b)
            if dropout:
                x = dy.dropout(x, DROPOUT_RATE)
        return x
示例#6
0
    def GCN(self, A, H):
        '''
            GCN: H_2 = RELU(D^{-0.5} * A * D^{-0.5} * H * W_GCN)
        '''

        D = np.diag(np.power(np.sum(A, axis=0), -0.5))
        C = np.dot(np.dot(D, A), D)
        H_2 = dy.rectify(dy.inputTensor(C) * H * self.W_GCN)
        return H_2
示例#7
0
    def attend(self, encoded_inputs, h_t, input_masks=None):
        # encoded_inputs dimension is: seq len x 2*h x batch size, h_t dimension is h x batch size (for bilstm encoder)
        if len(encoded_inputs) == 1:
            # no need to attend if only one input state, compute output directly
            h_output = dn.tanh(self.w_c *
                               dn.concatenate([h_t, encoded_inputs[0]]))
            # return trivial alphas (all 1's since one input gets all attention)
            if input_masks:
                # if batching
                alphas = dn.inputTensor([1] * len(input_masks[0]),
                                        batched=True)
            else:
                alphas = dn.inputTensor([1], batched=True)
            return h_output, alphas

        # iterate through input states to compute attention scores
        # scores = [v_a * dn.tanh(w_a * h_t + u_a * h_input) for h_input in blstm_outputs]
        w_a_h_t = self.w_a * h_t
        scores = [
            self.v_a *
            dn.tanh(dn.affine_transform([w_a_h_t, self.u_a, h_input]))
            for h_input in encoded_inputs
        ]

        concatenated = dn.concatenate(scores)
        if input_masks:
            # if batching, multiply attention scores with input masks to zero-out scores for padded inputs
            dn_masks = dn.inputTensor(input_masks, batched=True)
            concatenated = dn.cmult(concatenated, dn_masks)

        # normalize scores
        alphas = dn.softmax(concatenated)

        # compute context vector with weighted sum for each seq in batch
        bo = dn.concatenate_cols(encoded_inputs)
        c = bo * alphas
        # c = dn.esum([h_input * dn.pick(alphas, j) for j, h_input in enumerate(blstm_outputs)])

        # compute output vector using current decoder state and context vector
        h_output = dn.tanh(self.w_c * dn.concatenate([h_t, c]))

        return h_output, alphas
示例#8
0
    def tree2graph(tree):
        '''
            Return the upper triangular adjacency matrix of the tree.
        '''

        G_np = np.zeros((len(tree.sentence), len(tree.sentence)))
        for i in range(len(tree.sentence)):
            for j in range(i, len(tree.sentence)):
                if i == j:
                    G_np[i][j] = 1
                label, crossing = tree.span_labels(i, j)
                label = label[::-1]
                if (len(label) > 0):
                    G_np[i, j] = 1

        G = dy.inputTensor(G_np)
        return G
示例#9
0
    def get_embeddings(self,
                       word_inds,
                       tag_inds,
                       is_train=False,
                       train_bert_embedding=None):
        if is_train:
            self.char_lstm.set_dropout(self.dropout)
        else:
            self.char_lstm.disable_dropout()

        embeddings = []
        for idx, (w, t) in enumerate(zip(word_inds, tag_inds)):
            if w > 2:
                count = self.vocab.word_freq_list[w]
                if not count or (is_train
                                 and np.random.rand() < self.unk_param /
                                 (self.unk_param + count)):
                    w = 0

            tag_embedding = self.tag_embeddings[t]
            chars = list(self.vocab.i2w[w]) if w > 2 else [self.vocab.i2w[w]]
            char_lstm_outputs = self.char_lstm.transduce([
                self.char_embeddings[self.vocab.c2i[char]]
                for char in [Vocabulary.START] + chars + [Vocabulary.STOP]
            ])
            char_embedding = dy.concatenate([
                char_lstm_outputs[-1][:self.char_lstm_dim],
                char_lstm_outputs[0][self.char_lstm_dim:]
            ])
            word_embedding = self.word_embeddings[w]
            embs = [tag_embedding, char_embedding, word_embedding]
            if train_bert_embedding is not None:
                if w != 0:
                    embs.append(dy.inputTensor(train_bert_embedding[idx]))
                else:
                    embs.append(dy.zeros(768))
            embeddings.append(dy.concatenate(embs))

        return embeddings
    def calculate_loss(self, sents):
        dy.renew_cg()
        losses = []
        for sent in sents:
            features, t_features, feat_reconstruct = self.get_features_for_tagging(
                sent, True
            )
            gold_tags = [tag for chars, word, feats, tag in sent]
            cur_loss = self.crf_module.negative_log_loss(
                features, t_features, gold_tags
            )
            if self.autoencoder:
                autoencoder_loss = [
                    dy.binary_log_loss(reconstruct, dy.inputTensor(feats))
                    for reconstruct, (chars, word, feats, tag) in zip(
                        feat_reconstruct, sent
                    )
                ]
            else:  # remove autoencoder loss
                autoencoder_loss = [dy.scalarInput(0)]
            losses.append(cur_loss + (dy.esum(autoencoder_loss) / self.featsize))

        return dy.esum(losses)
示例#11
0
    def get_features(self, words, train=False, update=True):
        """
        get feature representations
        """
        # word embeddings
        wfeatures = np.array([self.get_w_repr(word, train=train, update=update) for word in words])

        lex_features = []
        if self.dictionary and not self.type_constraint:
            ## add lexicon features
            lex_features = np.array([self.get_lex_repr(word) for word in words])
        # char embeddings
        if self.c_in_dim > 0:
            cfeatures = [self.get_c_repr(word, train=train) for word in words]
            if len(lex_features) > 0:
                lex_features = dynet.inputTensor(lex_features)
                features = [dynet.concatenate([w,c,l]) for w,c,l in zip(wfeatures,cfeatures,lex_features)]
            else:
                features = [dynet.concatenate([w, c]) for w, c in zip(wfeatures, cfeatures)]
        else:
            features = wfeatures
        if train: # only do at training time
            features = [dynet.noise(fe,self.noise_sigma) for fe in features]
        return features
示例#12
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Convolutional Neural Networks for Sentence Classification in DyNet')

    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='GPU ID to use. For cpu, set -1 [default: 0]')
    parser.add_argument(
        '--train_x_path',
        type=str,
        default='./data/train_x.txt',
        help='File path of train x data [default: `./data/train_x.txt`]')
    parser.add_argument(
        '--train_y_path',
        type=str,
        default='./data/train_y.txt',
        help='File path of train y data [default: `./data/train_x.txt`]')
    parser.add_argument(
        '--valid_x_path',
        type=str,
        default='./data/valid_x.txt',
        help='File path of valid x data [default: `./data/valid_x.txt`]')
    parser.add_argument(
        '--valid_y_path',
        type=str,
        default='./data/valid_y.txt',
        help='File path of valid y data [default: `./data/valid_y.txt`]')
    parser.add_argument('--n_epochs',
                        type=int,
                        default=10,
                        help='Number of epochs [default: 10]')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='Mini batch size [default: 64]')
    parser.add_argument('--win_sizes',
                        type=int,
                        nargs='*',
                        default=[3, 4, 5],
                        help='Window sizes of filters [default: [3, 4, 5]]')
    parser.add_argument(
        '--num_fil',
        type=int,
        default=100,
        help='Number of filters in each window size [default: 100]')
    parser.add_argument('--s',
                        type=float,
                        default=3.0,
                        help='L2 norm constraint on w [default: 3.0]')
    parser.add_argument('--dropout_prob',
                        type=float,
                        default=0.5,
                        help='Dropout probability [default: 0.5]')
    parser.add_argument(
        '--v_strategy',
        type=str,
        default='static',
        help=
        'Embedding strategy. rand: Random  initialization. static: Load pretrained embeddings and do not update during the training. non-static: Load pretrained embeddings and update during the training. [default: static]'
    )
    parser.add_argument(
        '--alloc_mem',
        type=int,
        default=4096,
        help='Amount of memory to allocate [mb] [default: 4096]')
    args = parser.parse_args()
    print(args)

    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)

    N_EPOCHS = args.n_epochs
    WIN_SIZES = args.win_sizes
    BATCH_SIZE = args.batch_size
    EMB_DIM = 300
    OUT_DIM = 1
    L2_NORM_LIM = args.s
    NUM_FIL = args.num_fil
    DROPOUT_PROB = args.dropout_prob
    V_STRATEGY = args.v_strategy
    ALLOC_MEM = args.alloc_mem

    if V_STRATEGY in ['rand', 'static', 'non-static']:
        NUM_CHA = 1
    else:
        NUM_CHA = 2

    # FILE paths
    W2V_PATH = './GoogleNews-vectors-negative300.bin'
    TRAIN_X_PATH = args.train_x_path
    TRAIN_Y_PATH = args.train_y_path
    VALID_X_PATH = args.valid_x_path
    VALID_Y_PATH = args.valid_y_path

    # DyNet setting
    dyparams = dy.DynetParams()
    dyparams.set_random_seed(RANDOM_SEED)
    dyparams.set_mem(ALLOC_MEM)
    dyparams.init()

    # Load pretrained embeddings
    pretrained_model = gensim.models.KeyedVectors.load_word2vec_format(
        W2V_PATH, binary=True)
    vocab = pretrained_model.wv.vocab.keys()
    w2v = pretrained_model.wv

    # Build dataset =======================================================================================================
    w2c = build_w2c(TRAIN_X_PATH, vocab=vocab)
    w2i, i2w = build_w2i(TRAIN_X_PATH, w2c, unk='unk')
    train_x, train_y = build_dataset(TRAIN_X_PATH,
                                     TRAIN_Y_PATH,
                                     w2i,
                                     unk='unk')
    valid_x, valid_y = build_dataset(VALID_X_PATH,
                                     VALID_Y_PATH,
                                     w2i,
                                     unk='unk')

    train_x, train_y = sort_data_by_length(train_x, train_y)
    valid_x, valid_y = sort_data_by_length(valid_x, valid_y)

    VOCAB_SIZE = len(w2i)
    print('VOCAB_SIZE:', VOCAB_SIZE)

    V_init = init_V(w2v, w2i)

    with open(os.path.join(RESULTS_DIR, './w2i.dump'),
              'wb') as f_w2i, open(os.path.join(RESULTS_DIR, './i2w.dump'),
                                   'wb') as f_i2w:
        pickle.dump(w2i, f_w2i)
        pickle.dump(i2w, f_i2w)

    # Build model =================================================================================
    model = dy.Model()
    trainer = dy.AdamTrainer(model)

    # V1
    V1 = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM))
    if V_STRATEGY in ['static', 'non-static', 'multichannel']:
        V1.init_from_array(V_init)
    if V_STRATEGY in ['static', 'multichannel']:
        V1_UPDATE = False
    else:  # 'rand', 'non-static'
        V1_UPDATE = True
    make_emb_zero(V1, [w2i['<s>'], w2i['</s>']], EMB_DIM)

    # V2
    if V_STRATEGY == 'multichannel':
        V2 = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM))
        V2.init_from_array(V_init)
        V2_UPDATE = True
        make_emb_zero(V2, [w2i['<s>'], w2i['</s>']], EMB_DIM)

    layers = [
        CNNText(model, EMB_DIM, WIN_SIZES, NUM_CHA, NUM_FIL, dy.tanh,
                DROPOUT_PROB),
        Dense(model, 3 * NUM_FIL, OUT_DIM, dy.logistic)
    ]

    # Train model ================================================================================
    n_batches_train = math.ceil(len(train_x) / BATCH_SIZE)
    n_batches_valid = math.ceil(len(valid_x) / BATCH_SIZE)

    start_time = time.time()
    for epoch in range(N_EPOCHS):
        # Train
        loss_all_train = []
        pred_all_train = []
        for i in tqdm(range(n_batches_train)):
            # Create a new computation graph
            dy.renew_cg()
            associate_parameters(layers)

            # Create a mini batch
            start = i * BATCH_SIZE
            end = start + BATCH_SIZE
            x = build_batch(train_x[start:end], w2i, max(WIN_SIZES)).T
            t = np.array(train_y[start:end])

            sen_len = x.shape[0]

            if V_STRATEGY in ['rand', 'static', 'non-static']:
                x_embs = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs = dy.transpose(x_embs)
                x_embs = dy.reshape(x_embs, (sen_len, EMB_DIM, 1))
            else:  # multichannel
                x_embs1 = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs2 = dy.concatenate_cols(
                    [dy.lookup_batch(V2, x_t, update=V2_UPDATE) for x_t in x])
                x_embs1 = dy.transpose(x_embs1)
                x_embs2 = dy.transpose(x_embs2)
                x_embs = dy.concatenate([x_embs1, x_embs2], d=2)

            t = dy.inputTensor(t, batched=True)
            y = forwards(layers, x_embs, test=False)

            mb_loss = dy.mean_batches(dy.binary_log_loss(y, t))

            # Forward prop
            loss_all_train.append(mb_loss.value())
            pred_all_train.extend(list(binary_pred(y.npvalue().flatten())))

            # Backward prop
            mb_loss.backward()
            trainer.update()

            # L2 norm constraint
            layers[1].scale_W(L2_NORM_LIM)

            # Make padding embs zero
            if V_STRATEGY in ['rand', 'non-static']:
                make_emb_zero(V1, [w2i['<s>'], w2i['</s>']], EMB_DIM)
            elif V_STRATEGY in ['multichannel']:
                make_emb_zero(V2, [w2i['<s>'], w2i['</s>']], EMB_DIM)

        # Valid
        loss_all_valid = []
        pred_all_valid = []
        for i in range(n_batches_valid):
            # Create a new computation graph
            dy.renew_cg()
            associate_parameters(layers)

            # Create a mini batch
            start = i * BATCH_SIZE
            end = start + BATCH_SIZE
            x = build_batch(valid_x[start:end], w2i, max(WIN_SIZES)).T
            t = np.array(valid_y[start:end])

            sen_len = x.shape[0]

            if V_STRATEGY in ['rand', 'static', 'non-static']:
                x_embs = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs = dy.transpose(x_embs)
                x_embs = dy.reshape(x_embs, (sen_len, EMB_DIM, 1))
            else:  # multichannel
                x_embs1 = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs2 = dy.concatenate_cols(
                    [dy.lookup_batch(V2, x_t, update=V2_UPDATE) for x_t in x])
                x_embs1 = dy.transpose(x_embs1)
                x_embs2 = dy.transpose(x_embs2)
                x_embs = dy.concatenate([x_embs1, x_embs2], d=2)

            t = dy.inputTensor(t, batched=True)
            y = forwards(layers, x_embs, test=True)

            mb_loss = dy.mean_batches(dy.binary_log_loss(y, t))

            # Forward prop
            loss_all_valid.append(mb_loss.value())
            pred_all_valid.extend(list(binary_pred(y.npvalue().flatten())))

        print(
            'EPOCH: %d, Train Loss:: %.3f (F1:: %.3f, Acc:: %.3f), Valid Loss:: %.3f (F1:: %.3f, Acc:: %.3f), Time:: %.3f[s]'
            % (
                epoch + 1,
                np.mean(loss_all_train),
                f1_score(train_y, pred_all_train),
                accuracy_score(train_y, pred_all_train),
                np.mean(loss_all_valid),
                f1_score(valid_y, pred_all_valid),
                accuracy_score(valid_y, pred_all_valid),
                time.time() - start_time,
            ))

        # Save model =========================================================================================================================
        if V_STRATEGY in ['rand', 'static', 'non-static']:
            dy.save(os.path.join(RESULTS_DIR, './model_e' + str(epoch + 1)),
                    [V1] + layers)
        else:
            dy.save(os.path.join(RESULTS_DIR, './model_e' + str(epoch + 1)),
                    [V1, V2] + layers)
    def get_features_for_tagging(self, sentence, training):
        word_feats = [
            dy.affine_transform(
                [
                    self.feat_b,
                    self.feat_w,
                    dy.inputTensor(feats.reshape(self.featsize, 1)),
                ]
            )
            for chars, word, feats, tag in sentence
        ]
        zero_feats = [
            dy.inputTensor(np.zeros(shape=(FEAT_OUT_SIZE, 1)))
            for chars, word, feats, tag in sentence
        ]

        # Non-linear transform for soft gazetteer features
        if self.feat_func == "tanh":
            word_feats = [dy.tanh(feat) for feat in word_feats]
        elif self.feat_func == "relu":
            word_feats = [dy.rectify(feat) for feat in word_feats]

        # Soft gazetteer features at the LSTM level
        if self.lstm_feats:
            cur_feats = word_feats
        else:
            cur_feats = zero_feats
        word_reps = [
            dy.concatenate(
                [self.cnn.encode(chars, training), self.word_embeds[word], enc_feat]
            )
            for enc_feat, (chars, word, feats, tag) in zip(cur_feats, sentence)
        ]

        contexts = self.word_lstm.transduce(word_reps)

        # Soft gazetteer features at the CRF level
        if self.crf_feats:
            cur_feats = word_feats
        else:
            cur_feats = zero_feats

        features = [
            dy.affine_transform(
                [
                    self.context_to_emit_b,
                    self.context_to_emit_w,
                    dy.concatenate([context, feats]),
                ]
            )
            for context, feats in zip(contexts, cur_feats)
        ]
        t_features = [
            dy.reshape(
                dy.affine_transform(
                    [
                        self.context_to_trans_b,
                        self.context_to_trans_w,
                        dy.concatenate([context, feats]),
                    ]
                ),
                (self.num_tags, self.num_tags),
            )
            for context, feats in zip(contexts, cur_feats)
        ]

        # Autoencoder feature reconstruction
        if self.lstm_feats:
            feat_reconstruct = [
                dy.logistic(
                    dy.affine_transform(
                        [self.feat_reconstruct_b, self.feat_reconstruct_w, context]
                    )
                )
                for context in contexts
            ]
        else:
            feat_reconstruct = [
                dy.inputTensor(np.zeros(shape=(self.featsize,))) for context in contexts
            ]

        return features, t_features, feat_reconstruct
示例#14
0
def generator(encoder, decoder, params_encoder, params_decoder, sentence, env,
              first, previous):
    pos_lookup = params_encoder["pos_lookup"]
    char_lookup = params_encoder["char_lookup"]
    char_v = params_decoder["attention_v"]
    char_w1 = params_decoder["attention_wc"]
    char_w2 = params_decoder["attention_bc"]
    sc_vector = []
    for i, world in enumerate(_state(env)):
        world = world
        sc0 = char_encoder.initial_state()
        sc = sc0
        for char in world:
            sc = sc.add_input(char_lookup[char2int[char]])
        sc_vector.append(dy.concatenate([sc.output(), pos_lookup[i]]))
    dy_sc_vector = dy.concatenate(sc_vector, d=1)
    s0 = encoder.initial_state()
    s = s0
    lookup = params_encoder["lookup"]
    attention_w = params_decoder["attention_w"]
    attention_b = params_decoder["attention_b"]
    sentence = sentence + ' <end>'
    sentence = [
        vocab.index(c) if c in vocab else vocab.index('<unknown>')
        for c in sentence.split()
    ]
    s_vector = []
    generate = []
    for word in (sentence):
        s = s.add_input(lookup[word])
        s_vector.append(dy.softmax(attention_w * s.output() + attention_b))
    encode_output = s.output()
    dy_s_vector = dy.concatenate(s_vector, d=1)
    _s0 = decoder.initial_state(s.s())
    _s = _s0
    R = params_decoder["R"]
    bias = params_decoder["bias"]
    input_word = "<start>"
    _lookup = params_decoder["lookup"]
    repeat = 0
    while True:
        dy_env = dy.inputTensor(get_state_embed3(env))
        repeat += 1
        word = vocab_out.index(input_word)
        weight = dy.softmax(
            dy.concatenate([dy.dot_product(x, _s.output()) for x in s_vector]))
        weight_char = dy.softmax(
            dy.concatenate([
                char_v * dy.tanh(char_w1 * x + char_w2 * _s.output())
                for x in sc_vector
            ]))
        encode_state = dy_sc_vector * weight_char
        encode_output = dy_s_vector * weight
        _s = _s.add_input(
            dy.concatenate([_lookup[word], encode_output, encode_state]))
        probs = dy.softmax((R) * _s.output() + bias)
        top = 0
        while True:
            top += 1
            if top == 50:
                top = 1
                break
            prediction = np.argsort(probs.vec_value())[-top]
            if (vocab_out[prediction] == '<end>'): break
            if (vocab_out[prediction] == '<start>'): continue
            new_env = str(execute(env, [vocab_out[prediction]]))
            if new_env == 'None': continue
            break
        prediction = np.argsort(probs.vec_value())[-top]
        input_word = vocab_out[prediction]
        if input_word == '<end>':
            break
        if repeat >= 10:
            break
        generate.append(input_word)
        env = str(execute(env, [input_word]))
        if env == 'None':
            env = '1:_ 2:_ 3:_ 4:_ 5:_ 6:_ 7:_'
    while '<start>' in generate:
        generate.remove('<start>')
    previous = s.output()
    return generate, previous
示例#15
0
def do_one_sentence(encoder, decoder, params_encoder, params_decoder, sentence,
                    output, env, first, previous):
    pos_lookup = params_encoder["pos_lookup"]
    char_lookup = params_encoder["char_lookup"]
    char_v = params_decoder["attention_v"]
    char_w1 = params_decoder["attention_wc"]
    char_w2 = params_decoder["attention_bc"]
    sc_vector = []
    for i, world in enumerate(_state(env)):
        world = world
        sc0 = char_encoder.initial_state()
        sc = sc0
        for char in world:
            sc = sc.add_input(char_lookup[char2int[char]])
        sc_vector.append(dy.concatenate([sc.output(), pos_lookup[i]]))
    dy_sc_vector = dy.concatenate(sc_vector, d=1)
    s0 = encoder.initial_state()
    s = s0
    lookup = params_encoder["lookup"]
    attention_w = params_decoder["attention_w"]
    attention_b = params_decoder["attention_b"]
    sentence = sentence + ' <end>'
    sentence = [
        vocab.index(c) if c in vocab else vocab.index('<unknown>')
        for c in sentence.split(' ')
    ]
    loss = []
    generate = []
    s_vector = []
    for word in (sentence):
        s = s.add_input(lookup[word])
        s_vector.append(dy.softmax(attention_w * s.output() + attention_b))
    encode_output = s.output()
    dy_s_vector = dy.concatenate(s_vector, d=1)
    _s0 = decoder.initial_state(s.s())
    _s = _s0
    R = params_decoder["R"]
    bias = params_decoder["bias"]
    index = 1
    input_word = "<start>"
    _lookup = params_decoder["lookup"]
    while True:
        dy_env = dy.inputTensor(get_state_embed3(env))
        word = vocab_out.index(input_word)
        gt_y = vocab_out.index(output[index])

        weight = dy.softmax(
            dy.concatenate([dy.dot_product(x, _s.output()) for x in s_vector]))
        weight_char = dy.softmax(
            dy.concatenate([
                char_v * dy.tanh(char_w1 * x + char_w2 * _s.output())
                for x in sc_vector
            ]))

        encode_output = dy_s_vector * weight
        encode_state = dy_sc_vector * weight_char
        _s = _s.add_input(
            dy.concatenate([_lookup[word], encode_output, encode_state]))
        probs = dy.softmax((R) * _s.output() + bias)
        prediction = np.argsort(probs.npvalue())[-1]
        if (vocab_out[prediction]) == '<start>':
            prediction = np.argsort(probs.npvalue())[-2]
        generate.append(vocab_out[prediction])
        loss.append(-dy.log(dy.pick(probs, gt_y)))
        if output[index] == '<end>':
            break
        index += 1
        input_word = vocab_out[prediction]
        if input_word == '<end>':
            continue
        env = str(execute(env, [input_word]))
        if env == 'None':
            env = '1:_ 2:_ 3:_ 4:_ 5:_ 6:_ 7:_'
    loss = dy.esum(loss)
    while '<start>' in generate:
        generate.remove('<start>')
    previous = s.output()
    return loss, generate, previous
示例#16
0
 def get_graph(self, embedding):
     dy.renew_cg()
     w = dy.parameter(self.pW)
     u = dy.parameter(self.pU)
     return u * dy.tanh(w * dy.inputTensor(embedding))