예제 #1
0
    def __forward(self, is_training, src_batch, trg_batch = None, generation_limit = None):
        m = self.__model
        tanh = functions.tanh
        lstm = functions.lstm
        batch_size = len(src_batch)
        src_len = len(src_batch[0])
        src_stoi = self.__src_vocab.stoi
        trg_stoi = self.__trg_vocab.stoi
        trg_itos = self.__trg_vocab.itos
        s_c = wrapper.zeros((batch_size, self.__n_hidden))
        
        # encoding
        s_x = wrapper.make_var([src_stoi('</s>') for _ in range(batch_size)], dtype=np.int32)
        s_i = tanh(m.w_xi(s_x))
        s_c, s_p = lstm(s_c, m.w_ip(s_i))

        for l in reversed(range(src_len)):
            s_x = wrapper.make_var([src_stoi(src_batch[k][l]) for k in range(batch_size)], dtype=np.int32)
            s_i = tanh(m.w_xi(s_x))
            s_c, s_p = lstm(s_c, m.w_ip(s_i) + m.w_pp(s_p))

        s_c, s_q = lstm(s_c, m.w_pq(s_p))
        hyp_batch = [[] for _ in range(batch_size)]
        
        # decoding
        if is_training:
            accum_loss = wrapper.zeros(())
            trg_len = len(trg_batch[0])
            
            for l in range(trg_len):
                s_j = tanh(m.w_qj(s_q))
                r_y = m.w_jy(s_j)
                s_t = wrapper.make_var([trg_stoi(trg_batch[k][l]) for k in range(batch_size)], dtype=np.int32)
                accum_loss += functions.softmax_cross_entropy(r_y, s_t)
                output = wrapper.get_data(r_y).argmax(1)

                for k in range(batch_size):
                    hyp_batch[k].append(trg_itos(output[k]))

                s_c, s_q = lstm(s_c, m.w_yq(s_t) + m.w_qq(s_q))

            return hyp_batch, accum_loss
        else:
            while len(hyp_batch[0]) < generation_limit:
                s_j = tanh(m.w_qj(s_q))
                r_y = m.w_jy(s_j)
                output = wrapper.get_data(r_y).argmax(1)

                for k in range(batch_size):
                    hyp_batch[k].append(trg_itos(output[k]))

                if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)): break

                s_y = wrapper.make_var(output, dtype=np.int32)
                s_c, s_q = lstm(s_c, m.w_yq(s_y) + m.w_qq(s_q))
            
            return hyp_batch
예제 #2
0
    def __forward(self, is_training, src_batch, trg_batch = None, generation_limit = None):
        m = self.__model
        tanh = functions.tanh
        lstm = functions.lstm
        batch_size = len(src_batch)
        src_len = len(src_batch[0])
        src_stoi = self.__src_vocab.stoi
        trg_stoi = self.__trg_vocab.stoi
        trg_itos = self.__trg_vocab.itos
        s_c = wrapper.zeros((batch_size, self.__n_hidden))
        
        # encoding
        s_x = wrapper.make_var([src_stoi('</s>') for _ in range(batch_size)], dtype=np.int32)
        s_i = tanh(m.w_xi(s_x))
        s_c, s_p = lstm(s_c, m.w_ip(s_i))

        for l in reversed(range(src_len)):
            s_x = wrapper.make_var([src_stoi(src_batch[k][l]) for k in range(batch_size)], dtype=np.int32)
            s_i = tanh(m.w_xi(s_x))
            s_c, s_p = lstm(s_c, m.w_ip(s_i) + m.w_pp(s_p))

        s_c, s_q = lstm(s_c, m.w_pq(s_p))
        hyp_batch = [[] for _ in range(batch_size)]
        
        # decoding
        if is_training:
            accum_loss = wrapper.zeros(())
            trg_len = len(trg_batch[0])
            
            for l in range(trg_len):
                s_j = tanh(m.w_qj(s_q))
                r_y = m.w_jy(s_j)
                s_t = wrapper.make_var([trg_stoi(trg_batch[k][l]) for k in range(batch_size)], dtype=np.int32)
                accum_loss += functions.softmax_cross_entropy(r_y, s_t)
                output = wrapper.get_data(r_y).argmax(1)

                for k in range(batch_size):
                    hyp_batch[k].append(trg_itos(output[k]))

                s_c, s_q = lstm(s_c, m.w_yq(s_t) + m.w_qq(s_q))

            return hyp_batch, accum_loss
        else:
            while len(hyp_batch[0]) < generation_limit:
                s_j = tanh(m.w_qj(s_q))
                r_y = m.w_jy(s_j)
                output = wrapper.get_data(r_y).argmax(1)

                for k in range(batch_size):
                    hyp_batch[k].append(trg_itos(output[k]))

                if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)): break

                s_y = wrapper.make_var(output, dtype=np.int32)
                s_c, s_q = lstm(s_c, m.w_yq(s_y) + m.w_qq(s_q))
            
            return hyp_batch
예제 #3
0
    def __forward(self,
                  is_training,
                  src_batch,
                  trg_batch=None,
                  generation_limit=None):
        m = self.__model
        tanh = functions.tanh
        lstm = functions.lstm
        batch_size = len(src_batch)
        hidden_size = self.__n_hidden
        src_len = len(src_batch[0])
        trg_len = len(trg_batch[0]) - 1 if is_training else generation_limit
        src_stoi = self.__src_vocab.stoi
        trg_stoi = self.__trg_vocab.stoi
        trg_itos = self.__trg_vocab.itos

        hidden_zeros = wrapper.zeros((batch_size, hidden_size))
        sum_e_zeros = wrapper.zeros((batch_size, 1))

        # make embedding
        list_x = []
        for l in range(src_len):
            s_x = wrapper.make_var(
                [src_stoi(src_batch[k][l]) for k in range(batch_size)],
                dtype=np.int32)
            list_x.append(s_x)

        # forward encoding
        c = hidden_zeros
        s_a = hidden_zeros
        list_a = []
        for l in range(src_len):
            s_x = list_x[l]
            s_i = tanh(m.w_xi(s_x))
            c, s_a = lstm(c, m.w_ia(s_i) + m.w_aa(s_a))
            list_a.append(s_a)

        # backward encoding
        c = hidden_zeros
        s_b = hidden_zeros
        list_b = []
        for l in reversed(range(src_len)):
            s_x = list_x[l]
            s_i = tanh(m.w_xi(s_x))
            c, s_b = lstm(c, m.w_ib(s_i) + m.w_bb(s_b))
            list_b.insert(0, s_b)

        # decoding
        c = hidden_zeros
        s_p = tanh(m.w_ap(list_a[-1]) + m.w_bp(list_b[0]))
        s_y = wrapper.make_var([trg_stoi('<s>') for k in range(batch_size)],
                               dtype=np.int32)

        hyp_batch = [[] for _ in range(batch_size)]
        accum_loss = wrapper.zeros(()) if is_training else None

        #for n in range(src_len):
        #    print(src_batch[0][n], end=' ')
        #print()

        for l in range(trg_len):
            # calculate attention weights
            list_e = []
            sum_e = sum_e_zeros
            for n in range(src_len):
                s_w = tanh(m.w_aw(list_a[n]) + m.w_bw(list_b[n]) + m.w_pw(s_p))
                r_e = functions.exp(m.w_we(s_w))
                #list_e.append(functions.concat(r_e for _ in range(self.__n_hidden)))
                list_e.append(r_e)
                sum_e += r_e
            #sum_e = functions.concat(sum_e for _ in range(self.__n_hidden))

            # make attention vector
            s_c = hidden_zeros
            s_d = hidden_zeros
            for n in range(src_len):
                s_e = list_e[n] / sum_e
                #s_c += s_e * list_a[n]
                #s_d += s_e * list_b[n]
                s_c += functions.reshape(
                    functions.batch_matmul(list_a[n], s_e),
                    (batch_size, hidden_size))
                s_d += functions.reshape(
                    functions.batch_matmul(list_b[n], s_e),
                    (batch_size, hidden_size))

                #zxcv = wrapper.get_data(s_e)[0][0]
                #if zxcv > 0.9: asdf='#'
                #elif zxcv > 0.7: asdf='*'
                #elif zxcv > 0.3: asdf='+'
                #elif zxcv > 0.1: asdf='.'
                #else: asdf=' '
                #print(asdf * len(src_batch[0][n]), end=' ')

            # generate next word
            c, s_p = lstm(
                c,
                m.w_yp(s_y) + m.w_pp(s_p) + m.w_cp(s_c) + m.w_dp(s_d))
            r_y = m.w_py(s_p)
            output = wrapper.get_data(r_y).argmax(1)
            for k in range(batch_size):
                hyp_batch[k].append(trg_itos(output[k]))

            #print(hyp_batch[0][-1])

            if is_training:
                s_t = wrapper.make_var(
                    [trg_stoi(trg_batch[k][l + 1]) for k in range(batch_size)],
                    dtype=np.int32)
                accum_loss += functions.softmax_cross_entropy(r_y, s_t)
                s_y = s_t
            else:
                if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)):
                    break
                s_y = wrapper.make_var(output, dtype=np.int32)

        return hyp_batch, accum_loss
예제 #4
0
    def __forward(self, is_training, src_batch, trg_batch = None, generation_limit = None):
        m = self.__model
        tanh = functions.tanh
        lstm = functions.lstm
        batch_size = len(src_batch)
        hidden_size = self.__n_hidden
        src_len = len(src_batch[0])
        trg_len = len(trg_batch[0]) - 1 if is_training else generation_limit
        src_stoi = self.__src_vocab.stoi
        trg_stoi = self.__trg_vocab.stoi
        trg_itos = self.__trg_vocab.itos

        hidden_zeros = wrapper.zeros((batch_size, hidden_size))
        sum_e_zeros = wrapper.zeros((batch_size, 1))

        # make embedding
        list_x = []
        for l in range(src_len):
            s_x = wrapper.make_var([src_stoi(src_batch[k][l]) for k in range(batch_size)], dtype=np.int32)
            list_x.append(s_x)

        # forward encoding
        c = hidden_zeros
        s_a = hidden_zeros
        list_a = []
        for l in range(src_len):
            s_x = list_x[l]
            s_i = tanh(m.w_xi(s_x))
            c, s_a = lstm(c, m.w_ia(s_i) + m.w_aa(s_a))
            list_a.append(s_a)
        
        # backward encoding
        c = hidden_zeros
        s_b = hidden_zeros
        list_b = []
        for l in reversed(range(src_len)):
            s_x = list_x[l]
            s_i = tanh(m.w_xi(s_x))
            c, s_b = lstm(c, m.w_ib(s_i) + m.w_bb(s_b))
            list_b.insert(0, s_b)

        # decoding
        c = hidden_zeros
        s_p = tanh(m.w_ap(list_a[-1]) + m.w_bp(list_b[0]))
        s_y = wrapper.make_var([trg_stoi('<s>') for k in range(batch_size)], dtype=np.int32)

        hyp_batch = [[] for _ in range(batch_size)]
        accum_loss = wrapper.zeros(()) if is_training else None
        
        #for n in range(src_len):
        #    print(src_batch[0][n], end=' ')
        #print()

        for l in range(trg_len):
            # calculate attention weights
            list_e = []
            sum_e = sum_e_zeros
            for n in range(src_len):
                s_w = tanh(m.w_aw(list_a[n]) + m.w_bw(list_b[n]) + m.w_pw(s_p))
                r_e = functions.exp(m.w_we(s_w))
                #list_e.append(functions.concat(r_e for _ in range(self.__n_hidden)))
                list_e.append(r_e)
                sum_e += r_e
            #sum_e = functions.concat(sum_e for _ in range(self.__n_hidden))

            # make attention vector
            s_c = hidden_zeros
            s_d = hidden_zeros
            for n in range(src_len):
                s_e = list_e[n] / sum_e
                #s_c += s_e * list_a[n]
                #s_d += s_e * list_b[n]
                s_c += functions.reshape(functions.batch_matmul(list_a[n], s_e), (batch_size, hidden_size))
                s_d += functions.reshape(functions.batch_matmul(list_b[n], s_e), (batch_size, hidden_size))

                #zxcv = wrapper.get_data(s_e)[0][0]
                #if zxcv > 0.9: asdf='#'
                #elif zxcv > 0.7: asdf='*'
                #elif zxcv > 0.3: asdf='+'
                #elif zxcv > 0.1: asdf='.'
                #else: asdf=' '
                #print(asdf * len(src_batch[0][n]), end=' ')

            # generate next word
            c, s_p = lstm(c, m.w_yp(s_y) + m.w_pp(s_p) + m.w_cp(s_c) + m.w_dp(s_d))
            r_y = m.w_py(s_p)
            output = wrapper.get_data(r_y).argmax(1)
            for k in range(batch_size):
                hyp_batch[k].append(trg_itos(output[k]))

            #print(hyp_batch[0][-1])
            
            if is_training:
                s_t = wrapper.make_var([trg_stoi(trg_batch[k][l + 1]) for k in range(batch_size)], dtype=np.int32)
                accum_loss += functions.softmax_cross_entropy(r_y, s_t)
                s_y = s_t
            else:
                if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)): break
                s_y = wrapper.make_var(output, dtype=np.int32)

        return hyp_batch, accum_loss