Пример #1
0
  def calc_loss(self, src, db_idx, src_mask=None, trg_mask=None):
    src_embeddings = self.src_embedder.embed_sent(src, mask=src_mask)
    self.src_encoder.set_input(src)
    src_encodings = self.exprseq_pooling(self.src_encoder.transduce(src_embeddings))
    trg_batch, trg_mask = self.database[db_idx]
    # print("trg_mask=\n",trg_mask)
    trg_encodings = self.encode_trg_example(trg_batch, mask=trg_mask)
    dim = trg_encodings.dim()
    trg_reshaped = dy.reshape(trg_encodings, (dim[0][0], dim[1]))
    # ### DEBUG
    # trg_npv = trg_reshaped.npvalue()
    # for i in range(dim[1]):
    #   print("--- trg_reshaped {}: {}".format(i,list(trg_npv[:,i])))
    # ### DEBUG
    prod = dy.transpose(src_encodings) * trg_reshaped
    # ### DEBUG
    # prod_npv = prod.npvalue()
    # for i in range(dim[1]):
    #   print("--- prod {}: {}".format(i,list(prod_npv[0].transpose()[i])))
    # ### DEBUG
    id_range = list(range(len(db_idx)))
    # This is ugly:
    if self.loss_direction == "forward":
      prod = dy.transpose(prod)
      loss = dy.sum_batches(dy.hinge_batch(prod, id_range))
    elif self.loss_direction == "bidirectional":
      prod = dy.reshape(prod, (len(db_idx), len(db_idx)))
      loss = dy.sum_elems(
        dy.hinge_dim(prod, id_range, d=0) + dy.hinge_dim(prod, id_range, d=1))
    else:
      raise RuntimeError("Illegal loss direction {}".format(self.loss_direction))

    return loss
Пример #2
0
    def hier_attend(self, context_pre, context_pos, state):
        w2 = dy.parameter(self.hier_w2)
        v = dy.parameter(self.hier_v)

        w2dt = w2 * dy.concatenate(list(state.s()))

        # context_pre
        w1_pre = dy.parameter(self.hier_w1_pre)
        w1dt_pre = w1_pre * context_pre
        energy_pre = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt_pre, w2dt)))

        w_pre = dy.parameter(self.hier_w_pre)
        wdt_pre = w_pre * context_pre

        # context_pos
        w1_pos = dy.parameter(self.hier_w1_pos)
        w1dt_pos = w1_pos * context_pos
        energy_pos = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt_pos, w2dt)))

        w_pos = dy.parameter(self.hier_w_pos)
        wdt_pos = w_pos * context_pos

        beta = dy.softmax(dy.concatenate([energy_pre, energy_pos]))
        wdt = dy.concatenate_cols([wdt_pre, wdt_pos])
        context = wdt * beta
        return context
Пример #3
0
    def encode_batch_seq(self, src_seq, src_seq_rev, sentLengths):

        # [src[i] for src in src_seq for i in range(len(src_seq[0]))]
        fwd_vectors = [
            self.enc_fwd_lstm.initial_state().transduce(src) for src in src_seq
        ]
        bwd_vectors = [
            self.enc_bwd_lstm.initial_state().transduce(src_rev)
            for src_rev in src_seq_rev
        ]

        bwd_vectors_T = dynet.transpose(bwd_vectors)

        i = 0
        for vec, sentLen in izip(bwd_vectors_T, range(len(sentLengths))):
            sent_vec = vec[:sentLen][::-1]
            vec[:sentLen] = sent_vec
            bwd_vectors_T[i] = vec
            i += 1

        bwd_vectors = dynet.transpose(bwd_vectors_T)

        vectors = [
            dynet.concatenate(list(p)) for p in zip(fwd_vectors, bwd_vectors)
        ]
        return vectors
Пример #4
0
    def cal_scores(self, src_encodings,predict=False):

        src_len = len(src_encodings)
        src_encodings = dy.concatenate_cols(src_encodings)  # src_ctx_dim, src_len, batch_size
        batch_size = src_encodings.dim()[1]

        W_pos = dy.parameter(self.W_pos)
        b_pos = dy.parameter(self.b_pos)
        W_xpos = dy.parameter(self.W_xpos)
        b_xpos = dy.parameter(self.b_xpos)


        W_affine_pos = dy.parameter(self.W_affine_pos)
        b_affine_pos = dy.parameter(self.b_affine_pos)
        W_affine_xpos = dy.parameter(self.W_affine_xpos)
        b_affine_xpos = dy.parameter(self.b_affine_xpos)

        if predict:
            pos = self.leaky_ReLu(dy.affine_transform([b_pos, W_pos, src_encodings]))  # n_pos_mlp_units, src_len, bs
            xpos = self.leaky_ReLu(dy.affine_transform([b_xpos, W_xpos, src_encodings]))

        else:
            src_encodings = dy.dropout_dim(src_encodings,1,self.dropout)
            pos = dy.dropout_dim(self.leaky_ReLu(dy.affine_transform([b_pos, W_pos, src_encodings])),1,self.dropout)  # n_pos_mlp_units, src_len, bs
            xpos = dy.dropout_dim(self.leaky_ReLu(dy.affine_transform([b_xpos, W_xpos, src_encodings])),1,self.dropout)


        pos_label = dy.affine_transform([b_affine_pos, dy.transpose(W_affine_pos), pos])
        xpos_label = dy.affine_transform([b_affine_xpos, dy.transpose(W_affine_xpos), xpos])

        return pos_label, xpos_label
Пример #5
0
 def calc_attention(self, state):
   logger.warning("BilinearAttender does currently not do masking, which may harm training results.")
   Wa = dy.parameter(self.pWa)
   scores = (dy.transpose(state) * Wa) * self.I
   normalized = dy.softmax(scores)
   self.attention_vecs.append(normalized)
   return dy.transpose(normalized)
 def attend(self, H_e, h_t):
     H_e =dy.concatenate_cols(H_e)
     S = dy.transpose(h_t) * self.attention_weight * H_e
     S = dy.transpose(S)
     A = dy.softmax(S)
     context_vector = H_e * A
     return context_vector
Пример #7
0
    def calc_attention(self, state):
        V = dy.parameter(self.pV)
        U = dy.parameter(self.pU)

        WI = self.WI
        curr_sent_mask = self.curr_sent.mask
        if self.attention_vecs:
            conv_feats = dy.conv2d(self.attention_vecs[-1],
                                   self.pL,
                                   stride=[1, 1],
                                   is_valid=False)
            conv_feats = dy.transpose(
                dy.reshape(conv_feats,
                           (conv_feats.dim()[0][0], self.hidden_dim),
                           batch_size=conv_feats.dim()[1]))
            h = dy.tanh(dy.colwise_add(WI + conv_feats, V * state))
        else:
            h = dy.tanh(dy.colwise_add(WI, V * state))
        scores = dy.transpose(U * h)
        if curr_sent_mask is not None:
            scores = curr_sent_mask.add_to_tensor_expr(scores,
                                                       multiplicator=-100.0)
        normalized = dy.softmax(scores)
        self.attention_vecs.append(normalized)
        return normalized
Пример #8
0
 def recurrence(self, xt, hmtm1, cmtm1, h_tilde_tm1, dropout_flag):
     """
     recurrence function of LSTM with truncated self-attention
     :param xt: current input, shape: (n_in)
     :param hmtm1: hidden memory [htm1, ..., h1], shape: (n_steps, n_out)
     :param cmtm1: cell memory: (n_steps, n_out)
     :param h_tilde_tm1: previous hidden summary, shape: (n_out, )
     :param h_tilde_tm1: previous cell summary
     :param dropout_flag: where perform partial dropout
     :return:
     """
     score = dy.concatenate([dy.dot_product(self.u, dy.tanh(\
         self.W_h * hmtm1[i] + self.W_x * xt + self.W_htilde * h_tilde_tm1)) for i in range(self.n_steps)])
     # normalize the attention score
     score = dy.softmax(score)
     # shape: (1, n_out)
     h_tilde_t = dy.reshape(dy.transpose(score) * hmtm1, d=(self.n_out,))
     c_tilde_t = dy.transpose(score) * cmtm1
     Wx = self.W * xt
     if dropout_flag:
         # perform partial dropout over the lstm
         Wx = dy.dropout(Wx, self.dropout_rate)
     Uh = self.U * h_tilde_t
     # shape: (4*n_out)
     sum_item = Wx + Uh + self.b
     it = dy.logistic(sum_item[:self.n_out])
     ft = dy.logistic(sum_item[self.n_out:2*self.n_out])
     ot = dy.logistic(sum_item[2*self.n_out:3*self.n_out])
     c_hat = dy.tanh(sum_item[3*self.n_out:])
     ct = dy.cmult(ft, dy.reshape(c_tilde_t, d=(self.n_out,))) + dy.cmult(it, c_hat)
     ht = dy.cmult(ot, dy.tanh(ct))
     hmt = dy.concatenate([hmtm1[1:], dy.reshape(ht, (1, self.n_out))])
     cmt = dy.concatenate([cmtm1[1:], dy.reshape(ct, (1, self.n_out))])
     return hmt, cmt, h_tilde_t
Пример #9
0
    def dycosine(query_vec, question_vec):
        num = dy.transpose(query_vec) * question_vec
        dem1 = dy.sqrt(dy.transpose(query_vec) * query_vec)
        dem2 = dy.sqrt(dy.transpose(question_vec) * question_vec)
        dem = dem1 * dem2

        return dy.cdiv(num, dem)
Пример #10
0
    def word_assoc_score(self, source_idx, target_idx, relation):
        """
        NOTE THAT DROPOUT IS BEING APPLIED HERE
        :param source_idx: embedding index of source atom
        :param target_idx: embedding index of target atom
        :param relation: relation type
        :return: score
        """
        # prepare
        s = self.embeddings[source_idx]
        if self.no_assoc:
            A = dy.const_parameter(self.word_assoc_weights[relation])
        else:
            A = dy.parameter(self.word_assoc_weights[relation])
        dy.dropout(A, self.dropout)
        t = self.embeddings[target_idx]

        # compute
        if self.mode == BILINEAR_MODE:
            return dy.transpose(s) * A * t
        elif self.mode == DIAG_RANK1_MODE:
            diag_A = dyagonalize(A[0])
            rank1_BC = A[1] * dy.transpose(A[2])
            ABC = diag_A + rank1_BC
            return dy.transpose(s) * ABC * t
        elif self.mode == TRANSLATIONAL_EMBED_MODE:
            return -dy.l2_norm(s - t + A)
        elif self.mode == DISTMULT:
            return dy.sum_elems(dy.cmult(dy.cmult(s, A), t))
Пример #11
0
 def word_assoc_score(self, source_idx, target_idx, relation):
     """
     NOTE THAT DROPOUT IS BEING APPLIED HERE
     :param source_idx: embedding index of source atom
     :param target_idx: embedding index of target atom
     :param relation: relation type
     :return: score
     """
     # prepare
     s = self.embeddings[source_idx]
     if self.no_assoc:
         A = dy.const_parameter(self.word_assoc_weights[relation])
     else:
         A = dy.parameter(self.word_assoc_weights[relation])
     dy.dropout(A, self.dropout)
     t = self.embeddings[target_idx]
     
     # compute
     if self.mode == BILINEAR_MODE:
         return dy.transpose(s) * A * t
     elif self.mode == DIAG_RANK1_MODE:
         diag_A = dyagonalize(A[0])
         rank1_BC = A[1] * dy.transpose(A[2])
         ABC = diag_A + rank1_BC
         return dy.transpose(s) * ABC * t
     elif self.mode == TRANSLATIONAL_EMBED_MODE:
         return -dy.l2_norm(s - t + A)
     elif self.mode == DISTMULT:
         return dy.sum_elems(dy.cmult(dy.cmult(s, A), t))
Пример #12
0
 def attend(self, H_e, h_t):
     H_e = dy.transpose(H_e)
     S = dy.transpose(h_t) * self.attention_weight.expr() * H_e
     S = dy.transpose(S)
     A = dy.softmax(S)
     context_vector = H_e * A
     return context_vector, A
Пример #13
0
    def apply(self, sent1, sent2):
        eL = dy.parameter(self.linear)
        sent1 = dy.inputTensor(self.embedding.all_embeds_from_ix(sent1)) * eL
        sent2 = dy.inputTensor(self.embedding.all_embeds_from_ix(sent2)) * eL

        out1, out2 = self.feed_F(sent1, sent2)
        e_out = out1 * dy.transpose(out2)
        prob_f_1 = dy.softmax(e_out)
        score = dy.transpose(e_out)
        prob_f_2 = dy.softmax(score)

        sent1_allign = dy.concatenate_cols([sent1, prob_f_1 * sent2])
        sent2_allign = dy.concatenate_cols([sent2, prob_f_2 * sent1])

        out_g_1, out_g_2 = self.feed_G(sent1_allign, sent2_allign)

        sent1_out_g = dy.sum_dim(out_g_1, [0])
        sent2_out_g = dy.sum_dim(out_g_2, [0])

        concat = dy.transpose(dy.concatenate([sent1_out_g, sent2_out_g]))

        h_step_1 = dy.parameter(self.h_step_1)
        sent_h = dy.rectify(dy.dropout(concat, 0.2) * h_step_1)
        h_step_2 = dy.parameter(self.h_step_2)
        sent_h = dy.rectify(dy.dropout(sent_h, 0.2) * h_step_2)

        final = dy.parameter(self.linear2)
        final = dy.transpose(sent_h * final)
        return final
Пример #14
0
    def __call__(self, sent1, sent2):
        """
        :param sent1: np matrix.
        :param sent2: np matrix.
        :return: np array of 3 elements.
        """
        sent1_linear, sent2_linear = self.apply_linear_embed(sent1, sent2)
        f1, f2 = self.apply_f(sent1_linear, sent2_linear)

        score1 = f1 * dy.transpose(f2)
        prob1 = dy.softmax(score1)
        score2 = dy.transpose(score1)
        prob2 = dy.softmax(score2)

        sent1_combine = dy.concatenate_cols(
            [sent1_linear, prob1 * sent2_linear])
        sent2_combine = dy.concatenate_cols(
            [sent2_linear, prob2 * sent1_linear])

        # sum
        g1, g2 = self.apply_g(sent1_combine, sent2_combine)
        sent1_output = dy.sum_dim(g1, [0])
        sent2_output = dy.sum_dim(g2, [0])

        input_combine = dy.transpose(
            dy.concatenate([sent1_output, sent2_output]))
        h = self.apply_h(input_combine)

        linear_final = dy.parameter(self.linear_final)
        h = h * linear_final

        output = dy.log_softmax(dy.transpose(h))
        return output
Пример #15
0
    def scorer(self, q_d_hists, q_idf, bm25_score, overlap_features, p):
        """
        Makes all the calculations and returns a relevance score
        """
        idf_vec = dy.inputVector(q_idf)
        bm25_score = dy.scalarInput(bm25_score)
        overlap_features = dy.inputVector(overlap_features)
        # Pass each query term representation through the MLP
        term_scores = []
        for hist in q_d_hists:
            q_d_hist = dy.reshape(dy.inputVector(hist), (1, len(hist)))
            hidd_out = dy.rectify(q_d_hist * self.W_1 + self.b_1)
            for i in range(0, self.mlp_layers):
                hidd_out = dy.rectify(hidd_out * self.W_n[i] + self.b_n[i])
            term_scores.append(hidd_out * self.W_last + self.b_last)

        # Term Gating
        gating_weights = idf_vec * self.w_g
        
        bm25_feature = bm25_score * self.W_bm25 + self.b_bm25 
        drop_out =  dy.scalarInput(1)
        drop_num = (np.random.rand(1) < p)/p #p= probability of keeping a unit active
        drop_out.set(drop_num)
        
        bm25_feature *= drop_out
        drmm_score = dy.transpose(dy.concatenate(term_scores)) * dy.reshape(gating_weights, (len(q_idf), 1)) #basic MLPs output
        doc_score = dy.transpose(dy.concatenate([drmm_score, overlap_features])) * self.W_scores + self.b_scores #extra features layer
        
        
        return doc_score
Пример #16
0
    def predict_sequence_batched(self,
                                 inputs,
                                 mask_array,
                                 wlen,
                                 predictFlag=False):

        batch_size = inputs[0].dim()[1]
        src_len = len(inputs)

        if not predictFlag:
            self.charlstm.set_dropouts(self.dropout, self.dropout)
            self.charlstm.set_dropout_masks(batch_size)

        char_fwd = self.charlstm.initial_state(batch_size)
        recur_states, cells = char_fwd.add_inputs(inputs, mask_array,
                                                  predictFlag)

        hidden_states = []
        for idx in range(src_len):
            mask = dy.inputVector(mask_array[idx])
            mask_expr = dy.reshape(mask, (1, ), batch_size)
            hidden_states.append(recur_states[idx] * mask_expr)

        H = dy.concatenate_cols(hidden_states)

        if (predictFlag):
            a = dy.softmax(dy.transpose(self.W_atten.expr()) * H)
        else:
            #dropout attention connections(keep the same dim across the sequence)
            a = dy.softmax(
                dy.transpose(self.W_atten.expr()) *
                dy.dropout_dim(H, 1, self.dropout))

        cell_states = []
        for idx in range(batch_size):
            if (wlen[idx] > 0):
                cell = dy.pick_batch_elem(cells[wlen[idx] - 1], idx)
            else:
                cell = dy.zeros(self.ldims)

            cell_states.append(cell)

        C = dy.concatenate_to_batch(cell_states)

        H_atten = H * dy.transpose(a)
        char_emb = dy.concatenate([H_atten, C])

        if predictFlag:
            proj_char_emb = dy.affine_transform(
                [self.b_linear.expr(),
                 self.W_linear.expr(), char_emb])
        else:
            proj_char_emb = dy.affine_transform([
                self.b_linear.expr(),
                self.W_linear.expr(),
                dy.dropout(char_emb, self.dropout)
            ])

        return proj_char_emb
def get_alpha_beta(E_matrix, F_sen1, F_sen2):
    alpha_softmax = dy.softmax(E_matrix)
    beta_softmax = dy.softmax(dy.transpose(E_matrix))

    beta = F_sen2 * dy.transpose(alpha_softmax)
    alpha = F_sen1 * dy.transpose(beta_softmax)

    return alpha, beta
Пример #18
0
def do_one_batch(X_batch, Z_batch):
    # Flatten the batch into 1-D vector for workaround
    batch_size = X_batch.shape[0]
    if DO_BATCH:
        X_batch_f = X_batch.flatten('F')
        Z_batch_f = Z_batch.flatten('F')
        x = dy.reshape(dy.inputVector(X_batch_f), (nmf, nframes),
                       batch_size=batch_size)
        z = dy.reshape(dy.inputVector(Z_batch_f), (nvgg),
                       batch_size=batch_size)
        scnn.add_input([X_batch[i] for i in range(X_batch.shape[0])])
        vgg.add_input([Z_batch[i] for i in range(X_batch.shape[0])])

    else:
        x = dy.matInput(X_batch.shape[0], X_batch.shape[1])
        x.set(X_batch.flatten('F'))
        z = dy.vecInput(Z_batch.shape[0])
        z.set(Z_batch.flatten('F'))
        x = dy.reshape(dy.transpose(x, [1, 0]),
                       (1, X_batch.shape[1], X_batch.shape[0]))
    print(x.npvalue().shape)
    a_h1 = dy.conv2d_bias(x, w_i, b_i, [1, 1], is_valid=False)
    h1 = dy.rectify(a_h1)
    h1_pool = dy.kmax_pooling(h1, D[1], d=1)

    a_h2 = dy.conv2d_bias(h1_pool, w_h1, b_h1, [1, 1], is_valid=False)
    h2 = dy.rectify(a_h2)
    h2_pool = dy.kmax_pooling(h2, D[2], d=1)

    a_h3 = dy.conv2d_bias(h2_pool, w_h2, b_h2, [1, 1], is_valid=False)
    h3 = dy.rectify(a_h3)
    h3_pool = dy.kmax_pooling(h3, D[3], d=1)

    h4 = dy.kmax_pooling(h3_pool, 1, d=1)
    h4_re = dy.reshape(h4, (J[3], ))
    #print(h4_re.npvalue().shape)
    g = dy.scalarInput(1.)
    zem_sp = dy.weight_norm(h4_re, g)
    #print(zem_sp.npvalue().shape)
    zem_vgg = w_embed * z + b_embed
    #print(zem_vgg.npvalue().shape)

    sa = dy.transpose(zem_sp) * zem_vgg
    s = dy.rectify(sa)

    if PRINT_EMBED:
        print('Vgg embedding vector:', zem_vgg.npvalue().shape)
        print(zem_vgg.value())

        print('Speech embedding vector:', zem_sp.npvalue().shape)
        print(zem_sp.value())
    if PRINT_SIM:
        print('Raw Similarity:', sa.npvalue())
        print(sa.value())
        print('Similarity:', s.npvalue())
        print(s.value())

    return s
Пример #19
0
 def _biaffine(self, x, W, y):
     x = dy.concatenate(
         [x, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
     y = dy.concatenate(
         [y, dy.inputTensor(np.ones((1, ), dtype=np.float32))])
     nx, ny = self.input_dim + 1, self.input_dim + 1
     lin = dy.reshape(W * x, (ny, self.hidden_dim))
     blin = dy.transpose(dy.transpose(y) * lin)
     return blin
Пример #20
0
 def __call__(self, embed_in, src_len, train=False, **kwargs):
     """Input shape: ((T, H), B) Output Shape: [((H,), B)] * T"""
     T = embed_in.dim()[0][0]
     embed_in = dy.transpose(embed_in)
     src_mask = sequence_mask(src_len, T)
     src_mask = [unsqueeze(m, 2) for m in src_mask]
     x = self.proj(embed_in)
     output = self.transformer(x, src_mask, train=train)
     output = [out for out in dy.transpose(output)]
     return TransformerEncoderOutput(output=output, src_mask=src_mask)
Пример #21
0
 def __call__(self, embed_in, src_len, train=False, **kwargs):
     """Input shape: ((T, H), B) Output Shape: [((H,), B)] * T"""
     T = embed_in.dim()[0][0]
     embed_in = dy.transpose(embed_in)
     src_mask = sequence_mask(src_len, T)
     src_mask = [unsqueeze(m, 2) for m in src_mask]
     x = self.proj(embed_in)
     output = self.transformer(x, src_mask, train=train)
     output = [out for out in dy.transpose(output)]
     return TransformerEncoderOutput(output=output, src_mask=src_mask)
Пример #22
0
 def entity_attend(self, H_e, h_e):
     H = dy.concatenate_cols(H_e)
     keys = self.key_weight.expr() * H
     query = self.query_weight.expr() * h_e
     values = self.value_weight.expr() * H
     context_vectors = []
     S = dy.transpose(query) * keys
     A = dy.softmax(S)
     context_vectors = dy.cmult(values, A)
     return dy.transpose(context_vectors)
Пример #23
0
    def forward(self, s1, s2, label=None):
        eL = dy.parameter(self.embeddingLinear)
        s1 = dy.inputTensor(s1) * eL
        s2 = dy.inputTensor(s2) * eL

        # F step
        Lf1 = dy.parameter(self.mlpF1)
        Fs1 = dy.rectify(dy.dropout(s1, 0.2) * Lf1)
        Fs2 = dy.rectify(dy.dropout(s2, 0.2) * Lf1)
        Lf2 = dy.parameter(self.mlpF2)
        Fs1 = dy.rectify(dy.dropout(Fs1, 0.2) * Lf2)
        Fs2 = dy.rectify(dy.dropout(Fs2, 0.2) * Lf2)

        # Attention scoring
        score1 = Fs1 * dy.transpose(Fs2)
        prob1 = dy.softmax(score1)

        score2 = dy.transpose(score1)
        prob2 = dy.softmax(score2)

        # Align pairs using attention
        s1Pairs = dy.concatenate_cols([s1, prob1 * s2])
        s2Pairs = dy.concatenate_cols([s2, prob2 * s1])

        # G step
        Lg1 = dy.parameter(self.mlpG1)
        Gs1 = dy.rectify(dy.dropout(s1Pairs, 0.2) * Lg1)
        Gs2 = dy.rectify(dy.dropout(s2Pairs, 0.2) * Lg1)
        Lg2 = dy.parameter(self.mlpG2)
        Gs1 = dy.rectify(dy.dropout(Gs1, 0.2) * Lg2)
        Gs2 = dy.rectify(dy.dropout(Gs2, 0.2) * Lg2)

        # Sum
        Ss1 = dy.sum_dim(Gs1, [0])
        Ss2 = dy.sum_dim(Gs2, [0])

        concatS12 = dy.transpose(dy.concatenate([Ss1, Ss2]))

        # H step
        Lh1 = dy.parameter(self.mlpH1)
        Hs = dy.rectify(dy.dropout(concatS12, 0.2) * Lh1)
        Lh2 = dy.parameter(self.mlpH2)
        Hs = dy.rectify(dy.dropout(Hs, 0.2) * Lh2)

        # Final layer
        final_layer = dy.parameter(self.final_layer)
        final = dy.transpose(Hs * final_layer)

        # Label can be 0...
        if label != None:
            return dy.pickneglogsoftmax(final, label)
        else:
            out = dy.softmax(final)
            return np.argmax(out.npvalue())
Пример #24
0
    def _multilayer_perceptron(self, x):

        g = self.non_lin

        layer_1 = g(dy.transpose(dy.transpose(x * self.weights['h1']) + self.biases['b1']))

        layer_2 = g(dy.transpose(dy.transpose(layer_1 * self.weights['h2']) + self.biases['b2']))

        out_layer = dy.softmax(dy.transpose(layer_2 * self.weights['out']) + self.biases['out'])

        return out_layer
Пример #25
0
 def __call__(self, h, s):
     # hT -> ((L, h_dim), B), s -> ((s_dim, L), B)
     hT = dy.transpose(h)
     lin = self.U * s        # ((h_dim*n_label, L), B)
     if self.n_label > 1:
         lin = dy.reshape(lin, (self.h_dim, self.n_label))
     blin = hT * lin
     if self.n_label == 1:
         return blin + (hT * self.B if self.bias else 0)
     else:
         return dy.transpose(blin)+(self.V*dy.concatenate([h, s])+self.B if self.bias else 0)
Пример #26
0
    def calc_loss(self, src, db_idx):
        src_embeddings = self.src_embedder.embed_sent(src)
        src_encodings = self.exprseq_pooling(
            self.src_encoder.transduce(src_embeddings))
        trg_encodings = self.encode_trg_example(self.database[db_idx])

        prod = dy.transpose(dy.transpose(src_encodings) * trg_encodings)
        loss = dy.sum_batches(
            dy.hinge_batch(prod, list(six.moves.range(len(db_idx)))))
        print(loss.npvalue())
        return loss
Пример #27
0
    def cal_scores(self, src_encodings):
        src_len = len(src_encodings)

        src_encodings = dy.concatenate_cols(
            src_encodings)  # src_ctx_dim, src_len, batch_size

        W_arc_hidden_to_head = dy.parameter(self.W_arc_hidden_to_head)
        b_arc_hidden_to_head = dy.parameter(self.b_arc_hidden_to_head)
        W_arc_hidden_to_dep = dy.parameter(self.W_arc_hidden_to_dep)
        b_arc_hidden_to_dep = dy.parameter(self.b_arc_hidden_to_dep)

        W_label_hidden_to_head = dy.parameter(self.W_label_hidden_to_head)
        b_label_hidden_to_head = dy.parameter(self.b_label_hidden_to_head)
        W_label_hidden_to_dep = dy.parameter(self.W_label_hidden_to_dep)
        b_label_hidden_to_dep = dy.parameter(self.b_label_hidden_to_dep)

        U_arc_1 = dy.parameter(self.U_arc_1)
        u_arc_2 = dy.parameter(self.u_arc_2)

        U_label_1 = [dy.parameter(x) for x in self.U_label_1]
        u_label_2_1 = [dy.parameter(x) for x in self.u_label_2_1]
        u_label_2_2 = [dy.parameter(x) for x in self.u_label_2_2]
        b_label = [dy.parameter(x) for x in self.b_label]

        h_arc_head = dy.rectify(
            dy.affine_transform(
                [b_arc_hidden_to_head, W_arc_hidden_to_head,
                 src_encodings]))  # n_arc_ml_units, src_len, bs
        h_arc_dep = dy.rectify(
            dy.affine_transform(
                [b_arc_hidden_to_dep, W_arc_hidden_to_dep, src_encodings]))
        h_label_head = dy.rectify(
            dy.affine_transform([
                b_label_hidden_to_head, W_label_hidden_to_head, src_encodings
            ]))
        h_label_dep = dy.rectify(
            dy.affine_transform(
                [b_label_hidden_to_dep, W_label_hidden_to_dep, src_encodings]))

        h_arc_head_transpose = dy.transpose(h_arc_head)
        h_label_head_transpose = dy.transpose(h_label_head)

        s_arc = h_arc_head_transpose * dy.colwise_add(U_arc_1 * h_arc_dep,
                                                      u_arc_2)

        s_label = []
        for U_1, u_2_1, u_2_2, b in zip(U_label_1, u_label_2_1, u_label_2_2,
                                        b_label):
            e1 = h_label_head_transpose * U_1 * h_label_dep
            e2 = h_label_head_transpose * u_2_1 * dy.ones((1, src_len))
            e3 = dy.ones((src_len, 1)) * u_2_2 * h_label_dep
            s_label.append(e1 + e2 + e3 + b)
        return s_arc, s_label
Пример #28
0
 def __call__(self, X):
     d_x = X.dim()[0][0]
     d_y = X.dim()[0][1]
     g = dy.ones((d_x, d_y))
     b = dy.zeros((d_x, d_y))
     Y = []
     for attention in self.attention:
         Y.append(attention(X))
     Y = dy.esum(Y)
     Y = dy.layer_norm(X + Y, g, b)
     Y = dy.layer_norm(Y + dy.transpose(self.feedforward(dy.transpose(Y))),
                       g, b)
     return Y
Пример #29
0
def attend(sentence_a, sentence_b):
    similarity_scores = dy.transpose(sentence_a) * sentence_b
    logging.debug("Similarity Matrix size: " + str(similarity_scores.dim()))

    sentence_a_softmax = dy.softmax(similarity_scores)
    logging.debug("Sentence a softmax size: " + str(sentence_a_softmax.dim()))
    sentence_b_softmax = dy.softmax(dy.transpose(similarity_scores))
    logging.debug("Sentence b softmax size: " + str(sentence_b_softmax.dim()))

    sentence_b_attended = sentence_b * dy.transpose(sentence_a_softmax)
    sentence_a_attended = sentence_a * dy.transpose(sentence_b_softmax)

    return sentence_a_attended, sentence_b_attended
Пример #30
0
 def self_encode_tags(self, tags):
     vectors = tags
     # Self attention for every tag:
     vectors = run_lstm(self.enc_tag_lstm.initial_state(), tags)
     tag_input_mat = dy.concatenate_cols(vectors)
     out_vectors = []
     for v1 in vectors:
         # tag input mat: [tag_emb x seqlen]
         # v1: [tag_emb]
         unnormalized = dy.transpose(dy.transpose(v1) * tag_input_mat)
         self_att_weights = dy.softmax(unnormalized)
         to_add = tag_input_mat * self_att_weights
         out_vectors.append(v1 + tag_input_mat * self_att_weights)
     return out_vectors
Пример #31
0
 def self_attend(self, H_e):
     H = dy.concatenate_cols(H_e)
     keys = self.key_weight.expr() * H
     queries = self.query_weight.expr() * H
     values = self.value_weight.expr() * H
     context_vectors = []
     for q in dy.transpose(queries):
         S = dy.transpose(dy.transpose(q) * keys)
         A = dy.softmax(S)
         context_vectors.append(values * A)
         # S = dy.transpose(h_e) * self.self_attention_weight.expr() * H
         # S = dy.transpose(S)
         # A = dy.softmax(S)
         # context_vectors.append(H * A)
     return context_vectors
Пример #32
0
  def __call__(self, x, z=None, mask=None):
    h = self.h
    if z == None:
      Q = self.W_Q(x)
      K = self.W_K(x)
      V = self.W_V(x)
    else:
      Q = self.W_Q(x)
      K = self.W_K(z)
      V = self.W_V(z)

    (n_units, n_querys), batch = Q.dim()
    (_, n_keys), _ = K.dim()

    batch_Q = dy.concatenate_to_batch(self.split_rows(Q, h))
    batch_K = dy.concatenate_to_batch(self.split_rows(K, h))
    batch_V = dy.concatenate_to_batch(self.split_rows(V, h))

    assert(batch_Q.dim() == (n_units // h, n_querys), batch * h)
    assert(batch_K.dim() == (n_units // h, n_keys), batch * h)
    assert(batch_V.dim() == (n_units // h, n_keys), batch * h)

    mask = np.concatenate([mask] * h, axis=0)
    mask = np.moveaxis(mask, [1, 0, 2], [0, 2, 1])
    mask = dy.inputTensor(mask, batched=True)
    batch_A = (dy.transpose(batch_Q) * batch_K) * self.scale_score
    batch_A = dy.cmult(batch_A, mask) + (1 - mask)*MIN_VALUE

    sent_len = batch_A.dim()[0][0]
    if sent_len == 1:
        batch_A = dy.softmax(batch_A)
    else:
        batch_A = dy.softmax(batch_A, d=1)

    batch_A = dy.cmult(batch_A, mask)
    assert (batch_A.dim() == ((n_querys, n_keys), batch * h))

    if self.attn_dropout:
      if self.dropout != 0.0:
        batch_A = dy.dropout(batch_A, self.dropout)

    batch_C = dy.transpose(batch_A * dy.transpose(batch_V))
    assert (batch_C.dim() == ((n_units // h, n_querys), batch * h))

    C = dy.concatenate(self.split_batch(batch_C, h), d=0)
    assert (C.dim() == ((n_units, n_querys), batch))
    C = self.finishing_linear_layer(C)
    return C
Пример #33
0
    def word_repr(self, char_seq):
        # obtain the word representation when given its character sequence
        wlen = len(char_seq)
        if 'rgW%d'%wlen not in self.param_exprs:
            self.param_exprs['rgW%d'%wlen] = dy.parameter(self.params['reset_gate_W'][wlen-1])
            self.param_exprs['rgb%d'%wlen] = dy.parameter(self.params['reset_gate_b'][wlen-1])
            self.param_exprs['cW%d'%wlen] = dy.parameter(self.params['com_W'][wlen-1])
            self.param_exprs['cb%d'%wlen] = dy.parameter(self.params['com_b'][wlen-1])
            self.param_exprs['ugW%d'%wlen] = dy.parameter(self.params['update_gate_W'][wlen-1])
            self.param_exprs['ugb%d'%wlen] = dy.parameter(self.params['update_gate_b'][wlen-1])
          
        chars = dy.concatenate(char_seq)
        reset_gate = dy.logistic(self.param_exprs['rgW%d'%wlen] * chars + self.param_exprs['rgb%d'%wlen])
        comb = dy.concatenate([dy.tanh(self.param_exprs['cW%d'%wlen] * dy.cmult(reset_gate,chars) + self.param_exprs['cb%d'%wlen]),chars])
        update_logits = self.param_exprs['ugW%d'%wlen] * comb + self.param_exprs['ugb%d'%wlen]
        
        update_gate = dy.transpose(dy.concatenate_cols([dy.softmax(dy.pickrange(update_logits,i*(wlen+1),(i+1)*(wlen+1))) for i in xrange(self.options['ndims'])]))
        
        # The following implementation of Softmax fucntion is not safe, but faster...
        #exp_update_logits = dy.exp(dy.reshape(update_logits,(self.options['ndims'],wlen+1)))
        #update_gate = dy.cdiv(exp_update_logits, dy.concatenate_cols([dy.sum_cols(exp_update_logits)] *(wlen+1)))
        #assert (not np.isnan(update_gate.npvalue()).any())

        word = dy.sum_cols(dy.cmult(update_gate,dy.reshape(comb,(self.options['ndims'],wlen+1))))
        return word
Пример #34
0
    def _forward(self, emissions):

        """Viterbi forward to calculate all path scores.

        :param emissions: List[dy.Expression]

        Returns:
            dy.Expression ((1,), B)
        """
        init_alphas = [-1e4] * self.n_tags
        init_alphas[self.start_idx] = 0

        alphas = dy.inputVector(init_alphas)
        transitions = self.transitions
        # len(emissions) == T
        for emission in emissions:
            add_emission = dy.colwise_add(transitions, emission)
            scores = dy.colwise_add(dy.transpose(add_emission), alphas)
            # dy.logsumexp takes a list of dy.Expression and computes logsumexp
            # elementwise across the lists so for example the logsumexp is calculated
            # for [0] in each list. This means we want the scores for a given
            # transition scores for a tag to be in the columns
            alphas = dy.logsumexp([x for x in scores])
        last_alpha = alphas + dy.pick(transitions, self.end_idx)
        alpha = dy.logsumexp([x for x in last_alpha])
        return alpha
Пример #35
0
def viterbi(emissions, transition, start_idx, end_idx, norm=False):
    n_tags = emissions[0].dim()[0][0]
    backpointers = []

    inits = [-1e4] * n_tags
    inits[start_idx] = 0
    alphas = dy.inputVector(inits)
    alphas = dy.log_softmax(alphas) if norm else alphas

    for emission in emissions:
        next_vars = dy.colwise_add(dy.transpose(transition), alphas)
        best_tags = np.argmax(next_vars.npvalue(), 0)
        v_t = dy.max_dim(next_vars, 0)
        alphas = v_t + emission
        backpointers.append(best_tags)

    terminal_expr = alphas + dy.pick(transition, end_idx)
    best_tag = np.argmax(terminal_expr.npvalue())
    path_score = dy.pick(terminal_expr, best_tag)

    best_path = [best_tag]
    for bp_t in reversed(backpointers):
        best_tag = bp_t[best_tag]
        best_path.append(best_tag)
    _ = best_path.pop()
    best_path.reverse()
    return best_path, path_score
Пример #36
0
 def _attend(self, query, mask=None):
     query = unsqueeze(query, 0) # ((1, H), B)
     # ((1, H), B) * ((H, T), B) -> ((1, T), B) -> ((T, 1), B)
     attn_scores = dy.transpose(query * self.context)
     if mask is not None:
         attn_scores = dy.cmult(attn_scores, mask[0]) + (mask[1] * dy.scalarInput(-1e9))
     return dy.softmax(attn_scores)
Пример #37
0
def calc_attention(src_output_matrix, tgt_output_embedding, fixed_attentional_component):
    w1_att_src = dy.parameter(w1_att_src_p)
    w1_att_tgt = dy.parameter(w1_att_tgt_p)
    w2_att = dy.parameter(w2_att_p)
    a_t = dy.transpose(dy.tanh(dy.colwise_add(fixed_attentional_component, w1_att_tgt * tgt_output_embedding))) * w2_att
    alignment = dy.softmax(a_t)
    att_output = src_output_matrix * alignment
    return att_output, alignment
Пример #38
0
def transpose(x, dim1, dim2):
    """Swap dimensions `dim1` and `dim2`."""
    shape, _ = x.dim()
    dims = list(range(len(shape)))
    tmp = dims[dim1]
    dims[dim1] = dims[dim2]
    dims[dim2] = tmp
    return dy.transpose(x, dims=dims)
Пример #39
0
 def ergm_score(self):
     """
     :return: ERGM score (dynet Expression) computed based on ERGM weights and features only
     Does not populate any field
     """
     W = dy.parameter(self.ergm_weights)
     f = dy.transpose(dy.inputVector([self.feature_vals[k] for k in self.feature_set]))
     return f * W
Пример #40
0
 def _attend(self, query, mask=None):
     # query ((H), B)
     # mask  ((T, 1), B)
     projected_state = self.decoder * query  # ((H,), B)
     non_lin = dy.tanh(dy.colwise_add(self.context_proj, projected_state))  # ((H, T), B)
     attn_scores = dy.transpose(self.v * non_lin)  # ((1, H), B) * ((H, T), B) -> ((1, T), B) -> ((T, 1), B)
     if mask is not None:
         attn_scores = dy.cmult(attn_scores, mask[0]) + (mask[1] * dy.scalarInput(-1e9))
     return dy.softmax(attn_scores)  # ((T, 1), B)
Пример #41
0
 def __call__(self, encoder_output, dst, train):
     embed_out_th_b = self.tgt_embedding.encode(dst)
     embed_out_ht_b = dy.transpose(embed_out_th_b)
     embed_out_ht_b = self.proj_to_hsz(embed_out_ht_b)
     context = dy.concatenate_cols(encoder_output.output)
     T = embed_out_ht_b.dim()[0][1]
     dst_mask = subsequent_mask(T)
     src_mask = encoder_output.src_mask
     output = self.transformer_decoder(embed_out_ht_b, context, src_mask, dst_mask, train)
     output = self.proj_to_dsz(output)
     return self.output(output)
Пример #42
0
    def cal_scores(self, src_encodings):
        src_len = len(src_encodings)

        src_encodings = dy.concatenate_cols(src_encodings)  # src_ctx_dim, src_len, batch_size

        W_arc_hidden_to_head = dy.parameter(self.W_arc_hidden_to_head)
        b_arc_hidden_to_head = dy.parameter(self.b_arc_hidden_to_head)
        W_arc_hidden_to_dep = dy.parameter(self.W_arc_hidden_to_dep)
        b_arc_hidden_to_dep = dy.parameter(self.b_arc_hidden_to_dep)

        W_label_hidden_to_head = dy.parameter(self.W_label_hidden_to_head)
        b_label_hidden_to_head = dy.parameter(self.b_label_hidden_to_head)
        W_label_hidden_to_dep = dy.parameter(self.W_label_hidden_to_dep)
        b_label_hidden_to_dep = dy.parameter(self.b_label_hidden_to_dep)

        U_arc_1 = dy.parameter(self.U_arc_1)
        u_arc_2 = dy.parameter(self.u_arc_2)

        U_label_1 = [dy.parameter(x) for x in self.U_label_1]
        u_label_2_1 = [dy.parameter(x) for x in self.u_label_2_1]
        u_label_2_2 = [dy.parameter(x) for x in self.u_label_2_2]
        b_label = [dy.parameter(x) for x in self.b_label]

        h_arc_head = dy.rectify(dy.affine_transform([b_arc_hidden_to_head, W_arc_hidden_to_head, src_encodings]))  # n_arc_ml_units, src_len, bs
        h_arc_dep = dy.rectify(dy.affine_transform([b_arc_hidden_to_dep, W_arc_hidden_to_dep, src_encodings]))
        h_label_head = dy.rectify(dy.affine_transform([b_label_hidden_to_head, W_label_hidden_to_head, src_encodings]))
        h_label_dep = dy.rectify(dy.affine_transform([b_label_hidden_to_dep, W_label_hidden_to_dep, src_encodings]))

        h_arc_head_transpose = dy.transpose(h_arc_head)
        h_label_head_transpose = dy.transpose(h_label_head)

        s_arc = h_arc_head_transpose * dy.colwise_add(U_arc_1 * h_arc_dep, u_arc_2)

        s_label = []
        for U_1, u_2_1, u_2_2, b in zip(U_label_1, u_label_2_1, u_label_2_2, b_label):
            e1 = h_label_head_transpose * U_1 * h_label_dep
            e2 = h_label_head_transpose * u_2_1 * dy.ones((1, src_len))
            e3 = dy.ones((src_len, 1)) * u_2_2 * h_label_dep
            s_label.append(e1 + e2 + e3 + b)
        return s_arc, s_label
Пример #43
0
def attend(input_mat, state, w1dt):
    global attention_w2
    global attention_v
    w2 = dy.parameter(attention_w2)
    v = dy.parameter(attention_v)

    # input_mat: (encoder_state x seqlen) => input vecs concatenated as cols
    # w1dt: (attdim x seqlen)
    # w2dt: (attdim x attdim)
    w2dt = w2*dy.concatenate(list(state.s()))
    # att_weights: (seqlen,) row vector
    unnormalized = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt)))
    att_weights = dy.softmax(unnormalized)
    # context: (encoder_state)
    context = input_mat * att_weights
    return context
Пример #44
0
def calc_loss(sents):
    dy.renew_cg()

    src_fwd = LSTM_SRC_FWD.initial_state()
    src_bwd = LSTM_SRC_BWD.initial_state()
    trg_fwd = LSTM_TRG_FWD.initial_state()
    trg_bwd = LSTM_TRG_BWD.initial_state()

    # Encoding
    src_reps = encode_sents(LOOKUP_SRC, src_fwd, src_bwd, [src for src, trg in sents])
    trg_reps = encode_sents(LOOKUP_TRG, trg_fwd, trg_bwd, [trg for src, trg in sents])

    # Concatenate the sentence representations to a single matrix
    mtx_src = dy.concatenate_cols(src_reps)
    mtx_trg = dy.concatenate_cols(trg_reps)

    # Do matrix multiplication to get a matrix of dot product similarity scores
    sim_mtx = dy.transpose(mtx_src) * mtx_trg

    # Calculate the hinge loss over all dimensions 
    loss = dy.hinge_dim(sim_mtx, list(range(len(sents))), d=1)

    return dy.sum_elems(loss)
Пример #45
0
def squeeze_and_transpose(x):
    return dy.transpose(squeeze(x))
Пример #46
0
 def encode(self, embed_list):
     embed_list = dy.transpose(dy.concatenate_cols(embed_list))
     return [self.output(out) for out in self.encoder(embed_list, self.train)]
Пример #47
0
 def output(self, x):
     return [self.preds(y) for y in dy.transpose(x)]