示例#1
0
def calc_sent_loss(sent):
    # Create a computation graph
    dy.renew_cg()

    # Get embeddings for the sentence
    emb = [W_w_p[x] for x in sent]

    # Sample K negative words for each predicted word at each position
    all_neg_words = np.random.choice(nwords,
                                     size=2 * N * K * len(emb),
                                     replace=True,
                                     p=word_probabilities)

    # W_w = dy.parameter(W_w_p)
    # Step through the sentence and calculate the negative and positive losses
    all_losses = []
    for i, my_emb in enumerate(emb):
        neg_words = all_neg_words[i * K * 2 * N:(i + 1) * K * 2 * N]
        pos_words = (
            [sent[x] if x >= 0 else S for x in range(i - N, i)] +
            [sent[x] if x < len(sent) else S for x in range(i + 1, i + N + 1)])
        neg_loss = -dy.log(
            dy.logistic(
                -dy.dot_product(my_emb, dy.lookup_batch(W_c_p, neg_words))))
        pos_loss = -dy.log(
            dy.logistic(
                dy.dot_product(my_emb, dy.lookup_batch(W_c_p, pos_words))))
        all_losses.append(dy.sum_batches(neg_loss) + dy.sum_batches(pos_loss))
    return dy.esum(all_losses)
示例#2
0
    def GetQDScore(self, qwords, qreps, dwords, dreps, extra):
        nq = len(qreps)
        nd = len(dreps)
        qgl = [
            self.W_gate.expr() *
            dy.concatenate([qv, dy.constant(1, self.idf_val(qw))])
            for qv, qw in zip(qreps, qwords)
        ]
        qgates = dy.softmax(dy.concatenate(qgl))

        qscores = []
        for qtok in range(len(qreps)):
            qrep = qreps[qtok]
            att_scores = [dy.dot_product(qrep, drep) for drep in dreps]
            att_probs = dy.softmax(dy.concatenate(att_scores))
            doc_rep = dy.esum([v * p for p, v in zip(att_probs, dreps)])
            input_vec = dy.cmult(qrep, doc_rep)
            #input_dot = dy.sum_elems(input_vec)
            #input_len = dy.l2_norm(qrep - doc_rep)
            #input_vec = dy.concatenate([input_vec, input_dot, input_len])

            layer = utils.leaky_relu(self.b_term.expr() +
                                     self.W_term.expr() * input_vec)
            score = (self.b_term2.expr() + self.W_term2.expr() * layer)
            qscores.append(score)

        # Final scores and ultimate classifier.
        qterm_score = dy.dot_product(dy.concatenate(qscores), qgates)

        fin_score = (
            self.b_final.expr() +
            self.W_final.expr() * dy.concatenate([qterm_score, extra]))
        return fin_score
示例#3
0
文件: dy_model.py 项目: jcyk/CWS
    def beam_search(self, char_seq, truth = None, mu =0.): 
        start_agenda = Agenda(self.options['beam_size'])
        init_state = self.params['lstm'].initial_state().add_input(self.param_exprs['<bos>'])
        init_y = dy.tanh(self.param_exprs['pW'] * init_state.output() + self.param_exprs['pb'])
        init_score = dy.scalarInput(0.)
        start_agenda.push(Sentence(score=init_score.scalar_value(),score_expr=init_score,LSTMState =init_state, y= init_y , prevState = None, wlen=None))
        agenda = [start_agenda]

        for idx, _ in enumerate(char_seq,1): # from left to right, character by character
            now = Agenda(self.options['beam_size'])
            for wlen in xrange(1,min(idx,self.options['max_word_len'])+1): # generate candidate word vectors
                word = self.word_repr(char_seq[idx-wlen:idx])
                word_score = dy.dot_product(word,self.param_exprs['U'])
                for sent in agenda[idx-wlen]: # join segmentation
                    if truth is not None:
                        margin = dy.scalarInput(mu*wlen if truth[idx-1]!=wlen else 0.)
                        score = margin + sent.score_expr + dy.dot_product(sent.y, word) + word_score 
                    else:
                        score = sent.score_expr + dy.dot_product(sent.y, word) + word_score 
                    
                    if now.happy_with(score.scalar_value()):
                        new_state = sent.LSTMState.add_input(word)
                        new_y = dy.tanh(self.param_exprs['pW'] * new_state.output() + self.param_exprs['pb'])
                        now.push(Sentence(score=score.scalar_value(),score_expr=score,LSTMState=new_state,y=new_y, prevState=sent, wlen=wlen))
            agenda.append(now)

        if truth is not None:
            return agenda[-1].max().score_expr
        return agenda
示例#4
0
    def __call__(self, x, y):
        #  x_bias = parameter(self.x_bias)
        #  y_bias = parameter(self.y_bias)
        #  bias = parameter(self.bias)

        #  return bias + dot_product(x_bias, x) + dot_product(y_bias, y) + self.U(x, y)
        return self.bias + dot_product(self.x_bias, x) + dot_product(
            self.y_bias, y) + self.U(x, y)
示例#5
0
    def decomp_attend(self, vecsA, vecsB):
        # Fq^T Fc -> need to expedite using native matrix/tensor multiplication
        Fq = vecsA  # the original word vector, not yet passing a NN as in Eq.1, # need a function F
        Fc = vecsB  # need a function F

        expE = []
        for fq in Fq:
            row = []
            for fc in Fc:
                row.append(dt.exp(dt.dot_product(fq, fc)))
            expE.append(row)
        #print ("debug: expE", expE[0][0].value())

        invSumExpEi = []
        for i in xrange(len(Fq)):
            invSumExpEi.append(dt.pow(dt.esum(expE[i]), dt.scalarInput(-1)))

        invSumExpEj = []
        for j in xrange(len(Fc)):
            invSumExpEj.append(
                dt.pow(dt.esum([expE[i][j] for i in xrange(len(Fq))]),
                       dt.scalarInput(-1)))

        beta = []
        for i in xrange(len(Fq)):
            s = dt.esum([Fc[j] * expE[i][j] for j in xrange(len(Fc))])
            beta.append(s * invSumExpEi[i])
        #print("debug: beta", beta[0].value())

        alpha = []
        for j in xrange(len(Fc)):
            s = dt.esum([Fc[j] * expE[i][j] for i in xrange(len(Fq))])
            alpha.append(s * invSumExpEj[j])
        #print("debug: alpha", alpha[0].value())

        # Compare
        v1i = [
            dt.logistic(dt.concatenate([Fq[i], beta[i]]))
            for i in xrange(len(Fq))
        ]  # need a function G
        v2j = [
            dt.logistic(dt.concatenate([Fc[j], alpha[j]]))
            for j in xrange(len(Fc))
        ]  # need a function G

        #print ("debug: v1i", v1i[0].value())
        #print ("debug: v2j", v2j[0].value())

        # Aggregate

        v1 = dt.esum(v1i)
        v2 = dt.esum(v2j)

        #print ("debug: v1.value()", v1.value())
        #print ("debug: v2.value()", v2.value())

        #colScore = dt.logistic(dt.dot_product(self.SelHW, dt.concatenate([v1,v2])))
        return dt.dot_product(v1, v2)
示例#6
0
文件: dy_model.py 项目: jcyk/CWS
    def truth_score(self, word_seq):

        wembs = [self.param_exprs['<bos>']]+[self.word_repr(word) for word in word_seq]
        init_state = self.params['lstm'].initial_state()
        hidden_states = init_state.transduce(wembs)
        score = dy.scalarInput(0.)
        for h, w in zip(hidden_states[:-1],wembs[1:]):
            y = dy.tanh(self.param_exprs['pW'] * h + self.param_exprs['pb'])
            score = score + dy.dot_product(y,w) +dy.dot_product(w,self.param_exprs['U']) 
        return score
示例#7
0
    def greedy_search(self, char_seq, truth = None, mu =0.):
        init_state = self.params['lstm'].initial_state().add_input(self.param_exprs['<bos>'])
        init_y = dy.tanh(self.param_exprs['pW'] * init_state.output() + self.param_exprs['pb'])
        init_score = dy.scalarInput(0.)
        init_sentence = Sentence(score=init_score.scalar_value(),score_expr=init_score,LSTMState =init_state, y= init_y , prevState = None, wlen=None, golden=True)
        
        if truth is not None:
            cembs = [ dy.dropout(dy.lookup(self.params['embed'],char),self.options['dropout_rate']) for char in char_seq ]
        else:
            cembs = [dy.lookup(self.params['embed'],char) for char in char_seq ]
            #cembs = [ dy.dropout(dy.lookup(self.params['embed'],char),self.options['dropout_rate']) for char in char_seq ]

        start_agenda = init_sentence
        agenda = [start_agenda]

        for idx, _ in enumerate(char_seq,1): # from left to right, character by character
            now = None
            for wlen in range(1,min(idx,self.options['max_word_len'])+1): # generate word candidate vectors
                # join segmentation sent + word
                word = self.word_repr(char_seq[idx-wlen:idx], cembs[idx-wlen:idx])
                sent = agenda[idx-wlen]

                if truth is not None:
                    word = dy.dropout(word,self.options['dropout_rate'])
                
                word_score = dy.dot_product(word,self.param_exprs['U'])

                if truth is not None:
                    golden =  sent.golden and truth[idx-1]==wlen
                    margin = dy.scalarInput(mu*wlen if truth[idx-1]!=wlen else 0.)
                    score = margin + sent.score_expr + dy.dot_product(sent.y, word) + word_score
                else:
                    golden = False
                    score = sent.score_expr + dy.dot_product(sent.y, word) + word_score


                good = (now is None or now.score < score.scalar_value())
                if golden or good:
                    new_state = sent.LSTMState.add_input(word)
                    new_y = dy.tanh(self.param_exprs['pW'] * new_state.output() + self.param_exprs['pb'])
                    new_sent = Sentence(score=score.scalar_value(),score_expr=score,LSTMState=new_state,y=new_y, prevState=sent, wlen=wlen, golden=golden)
                    if good:
                        now = new_sent
                    if golden:
                        golden_sent = new_sent

            agenda.append(now)
            if truth is not None and truth[idx-1]>0 and (not now.golden):
                return (now.score_expr - golden_sent.score_expr)

        if truth is not None:
            return (now.score_expr - golden_sent.score_expr)

        return agenda
示例#8
0
    def truth_score(self, word_seq):

        wembs = [self.param_exprs['<bos>']
                 ] + [self.word_repr(word) for word in word_seq]
        init_state = self.params['lstm'].initial_state()
        hidden_states = init_state.transduce(wembs)
        score = dy.scalarInput(0.)
        for h, w in zip(hidden_states[:-1], wembs[1:]):
            y = dy.tanh(self.param_exprs['pW'] * h + self.param_exprs['pb'])
            score = score + dy.dot_product(y, w) + dy.dot_product(
                w, self.param_exprs['U'])
        return score
示例#9
0
    def __call__(self, x, y):
        W = dy.parameter(self.W)
        w_x = dy.parameter(self.w_x)
        w_y = dy.parameter(self.w_y)
        b = dy.parameter(self.b)

        out = dy.transpose(x) * W * y
        out += dy.dot_product(w_x, x)
        out += dy.dot_product(w_y, y)
        out = dy.concatenate([dy.scalarInput(0)] * (self.n_out - 1) + [out])
        out += b

        return out
示例#10
0
    def beam_search(self, char_seq, truth=None, mu=0.):
        start_agenda = Agenda(self.options['beam_size'])
        init_state = self.params['lstm'].initial_state().add_input(
            self.param_exprs['<bos>'])
        init_y = dy.tanh(self.param_exprs['pW'] * init_state.output() +
                         self.param_exprs['pb'])
        init_score = dy.scalarInput(0.)
        start_agenda.push(
            Sentence(score=init_score.scalar_value(),
                     score_expr=init_score,
                     LSTMState=init_state,
                     y=init_y,
                     prevState=None,
                     wlen=None))
        agenda = [start_agenda]

        for idx, _ in enumerate(
                char_seq, 1):  # from left to right, character by character
            now = Agenda(self.options['beam_size'])
            for wlen in xrange(1,
                               min(idx, self.options['max_word_len']) +
                               1):  # generate candidate word vectors
                word = self.word_repr(char_seq[idx - wlen:idx])
                word_score = dy.dot_product(word, self.param_exprs['U'])
                for sent in agenda[idx - wlen]:  # join segmentation
                    if truth is not None:
                        margin = dy.scalarInput(
                            mu * wlen if truth[idx - 1] != wlen else 0.)
                        score = margin + sent.score_expr + dy.dot_product(
                            sent.y, word) + word_score
                    else:
                        score = sent.score_expr + dy.dot_product(
                            sent.y, word) + word_score

                    if now.happy_with(score.scalar_value()):
                        new_state = sent.LSTMState.add_input(word)
                        new_y = dy.tanh(self.param_exprs['pW'] *
                                        new_state.output() +
                                        self.param_exprs['pb'])
                        now.push(
                            Sentence(score=score.scalar_value(),
                                     score_expr=score,
                                     LSTMState=new_state,
                                     y=new_y,
                                     prevState=sent,
                                     wlen=wlen))
            agenda.append(now)

        if truth is not None:
            return agenda[-1].max().score_expr
        return agenda
示例#11
0
文件: test.py 项目: danielhers/cnn
    def test_update(self):
        ones = np.ones((10, 10))

        dy.renew_cg()

        a = self.p1 * self.lp1[1]
        b = self.p2 * self.lp2[1]
        loss = dy.dot_product(a, b) / 100

        self.assertEqual(loss.scalar_value(), 10, msg=str(loss.scalar_value()))

        loss.backward()

        # Check the gradients
        self.assertTrue(np.allclose(self.p1.grad_as_array(), 0.1 * ones),
                        msg=np.array_str(self.p1.grad_as_array()))
        self.assertTrue(np.allclose(self.p2.grad_as_array(), 0.1 * ones),
                        msg=np.array_str(self.p2.grad_as_array()))
        self.assertTrue(np.allclose(self.lp1.grad_as_array()[1], ones[
                        0]), msg=np.array_str(self.lp1.grad_as_array()))
        self.assertTrue(np.allclose(self.lp2.grad_as_array()[1], ones[
                        0]), msg=np.array_str(self.lp2.grad_as_array()))

        self.trainer.update()

        # Check the updated parameters
        self.assertTrue(np.allclose(self.p1.as_array(), ones * 0.99),
                        msg=np.array_str(self.p1.as_array()))
        self.assertTrue(np.allclose(self.p2.as_array(), ones * 0.99),
                        msg=np.array_str(self.p2.as_array()))
        self.assertTrue(np.allclose(self.lp1.as_array()[1], ones[
                        0] * 0.9), msg=np.array_str(self.lp1.as_array()[1]))
        self.assertTrue(np.allclose(self.lp2.as_array()[1], ones[
                        0] * 0.9), msg=np.array_str(self.lp2.as_array()))
示例#12
0
    def intra_sent_attend(self, vecs):
        numVecs = len(vecs)
        fVecs = [dt.tanh(self.SelIntraFW * v) for v in vecs]
        expE = []
        for i, fq in enumerate(fVecs):
            row = []
            for j, fc in enumerate(fVecs):
                row.append(
                    dt.exp(
                        dt.dot_product(fq, fc) +
                        self.SelIntraBias[i - j +
                                          int(config.d["DIST_BIAS_DIM"] / 2)]))
            expE.append(row)

        invSumExpE = []
        for i in xrange(numVecs):
            invSumExpE.append(dt.pow(dt.esum(expE[i]), dt.scalarInput(-1)))

        alpha = []
        for i in xrange(numVecs):
            s = dt.esum([vecs[j] * expE[i][j] for j in xrange(numVecs)])
            alpha.append(s * invSumExpE[i])

        return [
            dt.tanh(self.SelIntraHW * dt.concatenate([v, a]))
            for v, a in zip(vecs, alpha)
        ]
示例#13
0
文件: test.py 项目: jayantk/cnn
 def test_gradient_sanity(self):
     dy.renew_cg()
     x=dy.inputTensor(self.v1)
     y=dy.inputTensor(self.v2)
     l = dy.dot_product(x,y)
     l.forward()
     self.assertRaises(RuntimeError, gradient_callable, x)
示例#14
0
 def test_gradient_sanity(self):
     dy.renew_cg()
     x = dy.inputTensor(self.v1)
     y = dy.inputTensor(self.v2)
     l = dy.dot_product(x, y)
     l.forward()
     self.assertRaises(RuntimeError, gradient_callable, x)
示例#15
0
    def __call__(self, htA, HO, transform_flag=True):
        """

        :param htA:
        :param HO:
        :param transform_flag: determine if the model needs selective transformation,
        :return:
        """
        seq_len = len(HO)
        HO_hat = []
        Weights = []
        for i in range(seq_len):
            hiO = HO[i]
            if transform_flag:
                hiO_hat = hiO + dy.rectify(self.W_A * htA + self.W_O * hiO + self.b)
            else:
                hiO_hat = hiO
            wi = dy.tanh(dy.dot_product(self.W_concat, dy.concatenate([htA, hiO_hat])))
            HO_hat.append(hiO_hat)
            Weights.append(wi)
        HO_hat = dy.concatenate([dy.reshape(ele, d=(1, 2 * self.dim_opi)) for ele in HO_hat])
        Weights = dy.concatenate(Weights)
        # length: seq_len
        Weights = dy.softmax(Weights)
        Weights_np = Weights.npvalue()
        ho_summary_t = dy.reshape(Weights, (1, seq_len)) * HO_hat
        return dy.reshape(ho_summary_t, (2 * self.dim_opi,)), Weights_np
示例#16
0
 def recurrence(self, xt, hmtm1, cmtm1, h_tilde_tm1, dropout_flag):
     """
     recurrence function of LSTM with truncated self-attention
     :param xt: current input, shape: (n_in)
     :param hmtm1: hidden memory [htm1, ..., h1], shape: (n_steps, n_out)
     :param cmtm1: cell memory: (n_steps, n_out)
     :param h_tilde_tm1: previous hidden summary, shape: (n_out, )
     :param h_tilde_tm1: previous cell summary
     :param dropout_flag: where perform partial dropout
     :return:
     """
     score = dy.concatenate([dy.dot_product(self.u, dy.tanh(\
         self.W_h * hmtm1[i] + self.W_x * xt + self.W_htilde * h_tilde_tm1)) for i in range(self.n_steps)])
     # normalize the attention score
     score = dy.softmax(score)
     # shape: (1, n_out)
     h_tilde_t = dy.reshape(dy.transpose(score) * hmtm1, d=(self.n_out,))
     c_tilde_t = dy.transpose(score) * cmtm1
     Wx = self.W * xt
     if dropout_flag:
         # perform partial dropout over the lstm
         Wx = dy.dropout(Wx, self.dropout_rate)
     Uh = self.U * h_tilde_t
     # shape: (4*n_out)
     sum_item = Wx + Uh + self.b
     it = dy.logistic(sum_item[:self.n_out])
     ft = dy.logistic(sum_item[self.n_out:2*self.n_out])
     ot = dy.logistic(sum_item[2*self.n_out:3*self.n_out])
     c_hat = dy.tanh(sum_item[3*self.n_out:])
     ct = dy.cmult(ft, dy.reshape(c_tilde_t, d=(self.n_out,))) + dy.cmult(it, c_hat)
     ht = dy.cmult(ot, dy.tanh(ct))
     hmt = dy.concatenate([hmtm1[1:], dy.reshape(ht, (1, self.n_out))])
     cmt = dy.concatenate([cmtm1[1:], dy.reshape(ct, (1, self.n_out))])
     return hmt, cmt, h_tilde_t
示例#17
0
    def recurrence(self, xt, hmtm1, h_history_tm1, dropout_flag):
        """

        :param xt: input vector at the time step t
        :param hmtm1: hidden memories in previous n_steps steps
        :param h_tilde_tm1: previous hidden summary
        :param dropout_flag: make a decision for conducting partial dropout
        :return:
        """
        score = dy.concatenate([dy.dot_product(self.u, dy.tanh( \
            self.W_h * hmtm1[i] + self.W_x * xt + self.W_htilde * h_history_tm1)) for i in range(self.n_steps)])
        # normalize the attention score
        score = dy.softmax(score)
        # shape: (1, n_out), history of [h[t-n_steps-1], ..., h[t-2]]
        h_history_t = dy.reshape(dy.transpose(score) * hmtm1[:-1], d=(self.n_out,))
        htm1 = hmtm1[-1]
        #h_tilde_t = dy.concatenate([h_history_t, htm1])
        h_tilde_t = htm1 + dy.rectify(h_history_t)
        if dropout_flag:
            # perform partial dropout, i.e., add dropout over the matrices W_x*
            rt = dy.logistic(dy.dropout(self.W_xr, self.dropout_rate) * xt + self.W_hr * h_tilde_t + self.br)
            zt = dy.logistic(dy.dropout(self.W_xz, self.dropout_rate) * xt + self.W_hz * h_tilde_t + self.bz)
            ht_hat = dy.tanh(dy.dropout(self.W_xh, self.dropout_rate) * xt + self.W_hh * dy.cmult(rt, h_tilde_t) \
                             + self.bh)
            ht = dy.cmult(zt, h_tilde_t) + dy.cmult((1.0 - zt), ht_hat)
        else:
            rt = dy.logistic(self.W_xr * xt + self.W_hr * h_tilde_t + self.br)
            zt = dy.logistic(self.W_xz * xt + self.W_hz * h_tilde_t + self.bz)
            ht_hat = dy.tanh(self.W_xh * xt + self.W_hh * dy.cmult(rt, h_tilde_t) + self.bh)
            ht = dy.cmult(zt, h_tilde_t) + dy.cmult((1.0 - zt), ht_hat)
        hmt = dy.concatenate([hmtm1[1:], dy.reshape(ht, (1, self.n_out))])
        return hmt, h_history_t
示例#18
0
    def test_update(self):
        ones = np.ones((10, 10))
        updated = np.ones((10, 10)) * 0.99
        gradient = np.ones((10, 10)) * 0.01

        dy.renew_cg()
        pp1 = dy.parameter(self.p1)
        pp2 = dy.parameter(self.p2)

        a = pp1 * self.lp1[1]
        b = pp2 * self.lp2[1]
        l = dy.dot_product(a, b) / 100
        self.assertEqual(l.scalar_value(), 10, msg=str(l.scalar_value()))
        l.backward()

        self.assertTrue(np.allclose(self.p1.grad_as_array(), 0.1 * ones),
                        msg=np.array_str(self.p1.grad_as_array()))
        self.assertTrue(np.allclose(self.p2.grad_as_array(), 0.1 * ones),
                        msg=np.array_str(self.p2.grad_as_array()))
        self.assertTrue(np.allclose(self.lp1.grad_as_array()[1], ones[0]),
                        msg=np.array_str(self.lp1.grad_as_array()))
        self.assertTrue(np.allclose(self.lp2.grad_as_array()[1], ones[0]),
                        msg=np.array_str(self.lp2.grad_as_array()))

        self.trainer.update()

        self.assertTrue(np.allclose(self.p1.as_array(), ones * 0.99),
                        msg=np.array_str(self.p1.as_array()))
        self.assertTrue(np.allclose(self.p2.as_array(), ones * 0.99),
                        msg=np.array_str(self.p2.as_array()))
        self.assertTrue(np.allclose(self.lp1.as_array()[1], ones[0] * 0.9),
                        msg=np.array_str(self.lp1.as_array()[1]))
        self.assertTrue(np.allclose(self.lp2.as_array()[1], ones[0] * 0.9),
                        msg=np.array_str(self.lp2.as_array()))
    def _get_prob_of_each_word_at_every_pos(self, x, w2i, model_params):
        context = x["context"]
        question = x["question"]

        # encode the context
        c_f_init = model_params["c_fwdRnn"].initial_state()
        c_b_init = model_params["c_bwdRnn"].initial_state()
        c_wemb = [self._word_rep(w, w2i, model_params) for w in context]
        c_f_exps = c_f_init.transduce(c_wemb)
        c_b_exps = c_b_init.transduce(reversed(c_wemb))
        # biGru state for context
        c_bi = [
            dy.concatenate([f, b])
            for f, b in zip(c_f_exps, reversed(c_b_exps))
        ]
        # encode the question
        q_f_init = model_params["q_fwdRnn"].initial_state()
        q_b_init = model_params["q_bwdRnn"].initial_state()
        q_wemb = [self._word_rep(w, w2i, model_params) for w in question]
        q_f_exps_last = q_f_init.transduce(q_wemb)[-1]
        q_b_exps_last = q_b_init.transduce(reversed(q_wemb))[-1]
        # biGru state for question
        q_bi = dy.concatenate([q_f_exps_last, q_b_exps_last])

        # for each word in the context, calculate its probability to be the answer
        score_of_each_word_at_every_pos = [
            dy.dot_product(c_bi[i], q_bi) for i in range(len(context))
        ]
        prob_of_each_word_at_every_pos = dy.softmax(
            dy.concatenate(score_of_each_word_at_every_pos))

        return prob_of_each_word_at_every_pos
    def build_graph(self, x):
        conv_W_1 = dy.parameter(self.params['conv_W_1'])
        conv_b_1 = dy.parameter(self.params['conv_b_1'])
        conv_W_2 = dy.parameter(self.params['conv_W_2'])
        conv_b_2 = dy.parameter(self.params['conv_b_2'])
        conv_W_3 = dy.parameter(self.params['conv_W_3'])
        conv_b_3 = dy.parameter(self.params['conv_b_3'])
        W = dy.parameter(self.params['W'])
        b = dy.parameter(self.params['b'])

        (n, d), _ = x.dim()
        x = dy.reshape(x, (1, n, d))

        # 一维卷积网络
        conv_1 = dy.tanh(
            dy.conv2d_bias(x, conv_W_1, conv_b_1, (1, 1), is_valid=False))
        conv_2 = dy.tanh(
            dy.conv2d_bias(x, conv_W_2, conv_b_2, (1, 1), is_valid=False))
        conv_3 = dy.tanh(
            dy.conv2d_bias(x, conv_W_3, conv_b_3, (1, 1), is_valid=False))

        pool_1 = dy.max_dim(dy.reshape(conv_1, (n, self.options['channel_1'])))
        pool_2 = dy.max_dim(dy.reshape(conv_2, (n, self.options['channel_2'])))
        pool_3 = dy.max_dim(dy.reshape(conv_3, (n, self.options['channel_3'])))

        # 全连接分类
        pool = dy.concatenate([pool_1, pool_2, pool_3], 0)
        logit = dy.dot_product(pool, W) + b
        return logit
示例#21
0
    def get_morph_analysis_scores(self, morph_analysis_representations,
                                  context_representations):

        # (10) and (11) in Shen et al. "The Role of Context ..."
        def transform_context(context):
            return dynet.tanh(
                dynet.affine_transform([
                    self.transform_context_layer_b.expr(),
                    self.transform_context_layer_W.expr(), context
                ]))
            #return dynet.tanh(dynet.sum_cols(dynet.reshape(context, (int(self.sentence_level_bilstm_contexts_length/2), 2))))

        if self.parameters['debug'] == 1:
            print(("morph_analysis_representations",
                   morph_analysis_representations))
            print(("context_representations", context_representations))

        morph_analysis_scores = \
            [dynet.softmax(
                dynet.concatenate([dynet.dot_product(morph_analysis_representation,
                                                     transform_context(context)) # sum + tanh for context[:half] and contet[half:]
                                   for morph_analysis_representation in
                                   morph_analysis_representations[word_pos]]))
                for word_pos, context in enumerate(context_representations)]
        return morph_analysis_scores
示例#22
0
文件: test.py 项目: jayantk/cnn
    def test_update(self):
        ones=np.ones((10, 10))
        updated = np.ones((10, 10)) * 0.99
        gradient = np.ones((10, 10)) * 0.01

        dy.renew_cg()
        pp1 = dy.parameter(self.p1)
        pp2 = dy.parameter(self.p2)

        a = pp1 * self.lp1[1]
        b = pp2 * self.lp2[1]
        l = dy.dot_product(a, b) / 100
        self.assertEqual(l.scalar_value(),10,msg=str(l.scalar_value()))
        l.backward()

        self.assertTrue(np.allclose(self.p1.grad_as_array(), 0.1 * ones),msg=np.array_str(self.p1.grad_as_array()))
        self.assertTrue(np.allclose(self.p2.grad_as_array(), 0.1 * ones),msg=np.array_str(self.p2.grad_as_array()))
        self.assertTrue(np.allclose(self.lp1.grad_as_array()[1], ones[0]),msg=np.array_str(self.lp1.grad_as_array()))
        self.assertTrue(np.allclose(self.lp2.grad_as_array()[1], ones[0]),msg=np.array_str(self.lp2.grad_as_array()))

        self.trainer.update()



        self.assertTrue(np.allclose(self.p1.as_array(), ones * 0.99),msg=np.array_str(self.p1.as_array()))
        self.assertTrue(np.allclose(self.p2.as_array(), ones * 0.99),msg=np.array_str(self.p2.as_array()))
        self.assertTrue(np.allclose(self.lp1.as_array()[1], ones[0] * 0.9),msg=np.array_str(self.lp1.as_array()[1]))
        self.assertTrue(np.allclose(self.lp2.as_array()[1], ones[0] * 0.9),msg=np.array_str(self.lp2.as_array()))
示例#23
0
文件: test.py 项目: Stanforxc/dynet
    def test_update(self):
        ones = np.ones((10, 10))

        dy.renew_cg()

        a = self.p1 * self.lp1[1]
        b = self.p2 * self.lp2[1]
        loss = dy.dot_product(a, b) / 100

        self.assertEqual(loss.scalar_value(), 10, msg=str(loss.scalar_value()))

        loss.backward()

        # Check the gradients
        self.assertTrue(np.allclose(self.p1.grad_as_array(), 0.1 * ones),
                        msg=np.array_str(self.p1.grad_as_array()))
        self.assertTrue(np.allclose(self.p2.grad_as_array(), 0.1 * ones),
                        msg=np.array_str(self.p2.grad_as_array()))
        self.assertTrue(np.allclose(self.lp1.grad_as_array()[1], ones[0]),
                        msg=np.array_str(self.lp1.grad_as_array()))
        self.assertTrue(np.allclose(self.lp2.grad_as_array()[1], ones[0]),
                        msg=np.array_str(self.lp2.grad_as_array()))

        self.trainer.update()

        # Check the updated parameters
        self.assertTrue(np.allclose(self.p1.as_array(), ones * 0.99),
                        msg=np.array_str(self.p1.as_array()))
        self.assertTrue(np.allclose(self.p2.as_array(), ones * 0.99),
                        msg=np.array_str(self.p2.as_array()))
        self.assertTrue(np.allclose(self.lp1.as_array()[1], ones[0] * 0.9),
                        msg=np.array_str(self.lp1.as_array()[1]))
        self.assertTrue(np.allclose(self.lp2.as_array()[1], ones[0] * 0.9),
                        msg=np.array_str(self.lp2.as_array()))
示例#24
0
 def score_expression(self, qwVecs, qwAvgVec, qLSTMVec, colnameVec,
                      colWdVecs):
     colPriorScore = dt.dot_product(self.ColW, colnameVec)
     colMaxScore = AvgMaxScore(qwVecs, colWdVecs)
     colAvgScore = AvgScore(qwAvgVec, colnameVec)
     colQLSTMScore = AvgScore(qLSTMVec, colnameVec)
     ret = [colPriorScore, colMaxScore, colAvgScore, colQLSTMScore]
     return ret
示例#25
0
文件: test.py 项目: yalechang/dynet
 def test_gradient(self):
     dy.renew_cg()
     x=dy.inputTensor(self.v1)
     y=dy.inputTensor(self.v2)
     l = dy.dot_product(x,y)
     l.forward()
     l.backward(full=True)
     self.assertTrue(np.allclose(x.gradient(), self.v2),msg="{}\n{}\n{}".format(l.value(),x.gradient(),self.v2,y.gradient(),self.v2))
示例#26
0
 def determine_coverage_by_name(self, qwVecs, avgVec):
     return None
     # Compute question coverage -- hard/rough implementation to test idea first
     qWdMatchScore = [
         dt.dot_product(qwVec, avgVec).value() for qwVec in qwVecs
     ]
     ret = dt.softmax(dt.inputVector(np.array(qWdMatchScore)))
     return ret
示例#27
0
    def get_all_next(total_p, _, state, interp, last_op_name, last_arg_ref,
                     last_arg_num, num_candidates, expr_nums_pos, expr_vals,
                     trace):
        if last_op_name == 'exit':
            return
        if last_op_name is not None:
            trace = trace + [(last_op_name, last_arg_num)]
            interp = Interpreter(interp)
            end_expr, expr_val = interp.next_op(last_op_name, last_arg_num)
            if end_expr:
                try:
                    expr_val = float(expr_val)
                except:
                    return
                expr_nums_pos = defaultdict(set, expr_nums_pos)
                expr_nums_pos[expr_val].add(state.step)
                num_candidates = num_candidates | {expr_val, -expr_val}
                expr_vals = expr_vals + [expr_val]
                interp = Interpreter()
            state = state.next_state(expr_val, last_op_name, last_arg_ref)
        p_op = dy.log(state.op_probs()).npvalue()
        for op_id, op_name in decoder.opid2name.items():
            if op_name not in interp.valid_ops:
                continue
            op_p = p_op[op_id]
            if op_name == 'load':
                copy_p = state.copy_probs()
                for arg_num in num_candidates:
                    from_pos_prior_p, pos_prior_ref = state.from_prior_prob(
                        arg_num)
                    from_neg_prior_p, neg_prior_ref = state.from_prior_prob(
                        -arg_num, True)
                    from_pos_input_p, pos_input_ref = state.from_input_prob(
                        input_nums_pos[arg_num])
                    from_neg_input_p, neg_input_ref = state.from_input_prob(
                        input_nums_pos[-arg_num], True)
                    from_pos_exprs_p, pos_exprs_ref = state.from_exprs_prob(
                        expr_nums_pos[arg_num])
                    from_neg_exprs_p, neg_exprs_ref = state.from_exprs_prob(
                        expr_nums_pos[-arg_num], True)

                    from_p = dy.concatenate([
                        from_pos_prior_p, from_neg_prior_p, from_pos_input_p,
                        from_neg_input_p, from_pos_exprs_p, from_neg_exprs_p
                    ])
                    arg_ref = (dy.concatenate_cols([
                        pos_prior_ref, neg_prior_ref, pos_input_ref,
                        neg_input_ref, pos_exprs_ref, neg_exprs_ref
                    ]) * copy_p)
                    instruct_p = (
                        op_p + dy.log(dy.dot_product(copy_p, from_p))).value()
                    if not math.isinf(instruct_p):
                        yield total_p + instruct_p, np.random.uniform(
                        ), state, interp, op_name, arg_ref, arg_num, num_candidates, expr_nums_pos, expr_vals, trace
            else:
                instruct_p = op_p
                yield total_p + instruct_p, np.random.uniform(
                ), state, interp, op_name, None, None, num_candidates, expr_nums_pos, expr_vals, trace
示例#28
0
    def __call__(self, dec_state, enc_states):
        w = dy.parameter(self.W)

        attention_weights = []
        for enc_state in enc_states:
            attention_weight = dy.dot_product(w * enc_state, dec_state)
            attention_weights.append(attention_weight)
        attention_weights = dy.softmax(dy.concatenate(attention_weights))
        return attention_weights
示例#29
0
文件: nnunits.py 项目: we1l1n/nsp
    def __call__(self, state_dec, states_enc):
        w = dy.parameter(self.W)
        a_weights = []
        for state_enc in states_enc:
            a_w = dy.dot_product(w * state_enc, state_dec)
            a_weights.append(a_w)

        a_weights = dy.softmax(dy.concatenate(a_weights))
        return a_weights
示例#30
0
 def __cosine_loss(self, pred, gold):
     sn1 = dy.l2_norm(pred)
     sn2 = dy.l2_norm(gold)
     mult = dy.cmult(sn1, sn2)
     dot = dy.dot_product(pred, gold)
     div = dy.cdiv(dot, mult)
     vec_y = dy.scalarInput(2)
     res = dy.cdiv(1 - div, vec_y)
     return res
示例#31
0
    def score_expression(self, qwVecs, numWdPos):
        if numWdPos == 0:
            kwVec = qwVecs[numWdPos + 1]
        elif numWdPos == 1:
            kwVec = qwVecs[0]
        else:
            kwVec = dt.average(qwVecs[numWdPos - 2:numWdPos])

        ret = dt.dot_product(kwVec, self.OpW)
        return ret
示例#32
0
def calc_sent_loss(sent):
  # Create a computation graph
  dy.renew_cg()
  
  # Get embeddings for the sentence
  emb = [W_w_p[x] for x in sent]

  # Sample K negative words for each predicted word at each position
  all_neg_words = np.random.choice(nwords, size=2*N*K*len(emb), replace=True, p=word_probabilities)

  # W_w = dy.parameter(W_w_p)
  # Step through the sentence and calculate the negative and positive losses
  all_losses = [] 
  for i, my_emb in enumerate(emb):
    neg_words = all_neg_words[i*K*2*N:(i+1)*K*2*N]
    pos_words = ([sent[x] if x >= 0 else S for x in range(i-N,i)] +
                 [sent[x] if x < len(sent) else S for x in range(i+1,i+N+1)])
    neg_loss = -dy.log(dy.logistic(-dy.dot_product(my_emb, dy.lookup_batch(W_c_p, neg_words))))
    pos_loss = -dy.log(dy.logistic(dy.dot_product(my_emb, dy.lookup_batch(W_c_p, pos_words))))
    all_losses.append(dy.sum_batches(neg_loss) + dy.sum_batches(pos_loss))
  return dy.esum(all_losses)
示例#33
0
	def cross_entropy_loss(self, score, next_word, cur_word):
		if self.__ls:
			log_prob = dy.log_softmax(score)
			if self.__lm is None:
				loss = - dy.pick_batch(log_prob, next_word) * (1 - self.__ls_eps) - \
					dy.mean_elems(log_prob) * self.__ls_eps
			else:
				loss = - dy.pick_batch(log_prob, next_word) * (1 - self.__ls_eps) - \
					dy.dot_product(self.__lm.next_expr(cur_word), log_prob) * self.__ls_eps
		else:
			loss = dy.pickneglogsoftmax(score, next_word)
		return loss
示例#34
0
 def cross_entropy_loss(self, s, nw, cw):
     """Calculates the cross-entropy
     """
     if self.ls:
         log_prob = dy.log_softmax(s)
         if self.lm is None:
             loss = - dy.pick_batch(log_prob, nw) * (1 - self.ls_eps) - \
                 dy.mean_elems(log_prob) * self.ls_eps
         else:
             loss = - dy.pick_batch(log_prob, nw) * (1 - self.ls_eps) - \
                 dy.dot_product(self.lm_e, log_prob) * self.ls_eps
     else:
         loss = dy.pickneglogsoftmax_batch(s, nw)
     return loss
示例#35
0
def bidirect_pass(x, p):
    """
    This function will wrap all the steps needed to feed one sentence through the biLSTM
    :param x: a <list> of indices
    """
    # convert sequence of ints to sequence of embeddings
    #input_seq = [embedding_parameters[i] for i in x]   # embedding_parameters can be used like <dict>
    input_seq = [dy.lookup(embedding_parameters, i, update=False)
                 for i in x]  # embedding_parameters can be used like <dict>

    # convert Parameters to Expressions
    v1 = dy.parameter(pv1)
    b1 = dy.parameter(pb1)
    v2 = dy.parameter(pv2)
    b2 = dy.parameter(pb2)

    # initialize the RNN unit
    fw_rnn_seq = fw_RNN_unit.initial_state()
    bw_rnn_seq = bw_RNN_unit.initial_state()

    # run each timestep(word) through the RNN
    fw_rnn_hidden_outs = fw_rnn_seq.transduce(input_seq)
    bw_rnn_hidden_outs = bw_rnn_seq.transduce(reversed(input_seq))

    second_input_seq = [
        dy.concatenate([f, b])
        for f, b in zip(fw_rnn_hidden_outs, reversed(bw_rnn_hidden_outs))
    ]

    second_fw_rnn_seq = second_fw_RNN_unit.initial_state()
    second_bw_rnn_seq = second_bw_RNN_unit.initial_state()

    fw_rnn_second_hidden_outs = second_fw_rnn_seq.transduce(second_input_seq)
    bw_rnn_second_hidden_outs = second_bw_rnn_seq.transduce(
        reversed(second_input_seq))

    # biLSTM states
    bi = [
        dy.concatenate([f, b]) for f, b in zip(
            fw_rnn_second_hidden_outs, reversed(bw_rnn_second_hidden_outs))
    ]
    # hidden_state at the position of predicate
    bi_pred = bi[p]

    # a two-layer regression model
    outputs = dy.dot_product(v2, dy.tanh(v1 * bi_pred + b1)) + b2

    return outputs
def action_in_state_context_bonuses(corpus, state, inputs, W_context_action, W_action, predict_invalid, past_states, past_actions):
    all_inputs = dy.concatenate(inputs)
    bonuses = []
    # actions we're scoring could be all actions if we have an unconstrained model. so compute the valid actions for this corpus, and if we have an action that can't be applied in the state, just return a bonus of 0
    valid_actions = set(corpus.valid_actions(state))
    for action in corpus.ACTIONS:
        if action in valid_actions:
            next_state = corpus.take_action(state, action)
            embedded_action_sc = dy.inputVector(corpus.embed_action_in_state_context(action, state, next_state, past_states, past_actions))
            bonus = dy.dot_product(W_context_action * embedded_action_sc, all_inputs) + W_action * embedded_action_sc
        else:
            bonus = dy.scalarInput(0)
        bonuses.append(bonus)
    if predict_invalid:
        bonuses.append(dy.scalarInput(0))

    return dy.concatenate(bonuses)
示例#37
0
文件: test.py 项目: danielhers/cnn
    def test_set_updated(self):
        self.p2.set_updated(False)
        self.lp1.set_updated(False)

        self.assertTrue(self.p1.is_updated())
        self.assertFalse(self.p2.is_updated())
        self.assertFalse(self.lp1.is_updated())
        self.assertTrue(self.lp2.is_updated())

        self.p1.set_updated(True)
        self.p2.set_updated(False)
        self.lp1.set_updated(False)
        self.lp2.set_updated(True)

        self.assertTrue(self.p1.is_updated())
        self.assertFalse(self.p2.is_updated())
        self.assertFalse(self.lp1.is_updated())
        self.assertTrue(self.lp2.is_updated())

        self.p1.set_updated(False)
        self.p2.set_updated(True)
        self.lp1.set_updated(True)
        self.lp2.set_updated(False)

        self.assertFalse(self.p1.is_updated())
        self.assertTrue(self.p2.is_updated())
        self.assertTrue(self.lp1.is_updated())
        self.assertFalse(self.lp2.is_updated())

        dy.renew_cg()

        a = self.p1 * self.lp1[1]
        b = self.p2 * self.lp2[1]
        loss = dy.dot_product(a, b) / 100
        loss.backward()

        self.trainer.update()

        ones = np.ones((10, 10))
        self.assertTrue(np.allclose(self.p1.as_array(), ones),
                        msg=np.array_str(self.p1.as_array()))
        self.assertTrue(np.allclose(self.lp2.as_array()[1], ones[
                        0]), msg=np.array_str(self.lp2.as_array()))
示例#38
0
# It can be improved by following the speed tricks covered in class:
# 1) Don't repeat operations.
# 2) Minimize the number of operations.
# 3) Minimize the number of CPU-GPU memory copies, make them earlier.

# Create the model
model = dy.ParameterCollection()
trainer = dy.SimpleSGDTrainer(model)
W = model.add_parameters((100,100))

# Create the "training data"
x_vecs = []
y_vecs = []
for i in range(10):
  x_vecs.append(np.random.rand(100))
  y_vecs.append(np.random.rand(100))

# Do the processing
for my_iter in range(1000):
  dy.renew_cg()
  total = 0
  for x in x_vecs:
    for y in y_vecs:
      x_exp = dy.inputTensor(x)
      y_exp = dy.inputTensor(y)
      total = total + dy.dot_product(W * x_exp, y_exp)
  total.forward()
  total.backward()
  trainer.update()