Exemplo n.º 1
0
def decode_main(use_cuda, is_sparse):
    if use_cuda and not fluid.core.is_compiled_with_cuda():
        return
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

    context = encoder(is_sparse)
    translation_ids, translation_scores = decode(context, is_sparse)

    exe = Executor(place)
    exe.run(framework.default_startup_program())

    init_ids_data = np.array([1 for _ in range(batch_size)], dtype='int64')
    init_scores_data = np.array(
        [1. for _ in range(batch_size)], dtype='float32')
    init_ids_data = init_ids_data.reshape((batch_size, 1))
    init_scores_data = init_scores_data.reshape((batch_size, 1))
    init_lod = [1] * batch_size
    init_lod = [init_lod, init_lod]

    init_ids = fluid.create_lod_tensor(init_ids_data, init_lod, place)
    init_scores = fluid.create_lod_tensor(init_scores_data, init_lod, place)

    train_data = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.wmt14.train(dict_size), buf_size=1000),
        batch_size=batch_size)

    feed_order = ['src_word_id']
    feed_list = [
        framework.default_main_program().global_block().var(var_name)
        for var_name in feed_order
    ]
    feeder = fluid.DataFeeder(feed_list, place)

    for data in train_data():
        feed_dict = feeder.feed(map(lambda x: [x[0]], data))
        feed_dict['init_ids'] = init_ids
        feed_dict['init_scores'] = init_scores

        result_ids, result_scores = exe.run(
            framework.default_main_program(),
            feed=feed_dict,
            fetch_list=[translation_ids, translation_scores],
            return_numpy=False)
        print result_ids.lod()
        break
def infer(use_cuda, inference_program, params_dirname):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    inferencer = fluid.Inferencer(
        inference_program, param_path=params_dirname, place=place)

    # Use the first data from paddle.dataset.movielens.test() as input.
    # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor,
    # where `data` is a list of sequences of index numbers, `lod` is 
    # the level of detail (lod) info associated with `data`.
    # For example, data = [[10, 2, 3], [2, 3]] means that it contains
    # two sequences of indexes, of length 3 and 2, respectively.
    # Correspondingly, lod = [[3, 2]] contains one level of detail info,
    # indicating that `data` consists of two sequences of length 3 and 2. 
    user_id = fluid.create_lod_tensor([[1]], [[1]], place)
    gender_id = fluid.create_lod_tensor([[1]], [[1]], place)
    age_id = fluid.create_lod_tensor([[0]], [[1]], place)
    job_id = fluid.create_lod_tensor([[10]], [[1]], place)
    movie_id = fluid.create_lod_tensor([[783]], [[1]], place)
    category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place)
    movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], [[5]],
                                          place)

    results = inferencer.infer(
        {
            'user_id': user_id,
            'gender_id': gender_id,
            'age_id': age_id,
            'job_id': job_id,
            'movie_id': movie_id,
            'category_id': category_id,
            'movie_title': movie_title
        },
        return_numpy=False)

    print("infer results: ", np.array(results[0]))
Exemplo n.º 3
0
def infer(use_cuda, params_dirname):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

    infer_movie_id = 783
    infer_movie_name = paddle.dataset.movielens.movie_info(
    )[infer_movie_id].title

    exe = fluid.Executor(place)

    inference_scope = fluid.core.Scope()
    ids = []

    with fluid.scope_guard(inference_scope):
        [inferencer, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(params_dirname, exe)

        # Use the first data from paddle.dataset.movielens.test() as input
        assert feed_target_names[0] == "user_id"
        user_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)

        assert feed_target_names[1] == "gender_id"
        gender_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)

        assert feed_target_names[2] == "age_id"
        age_id = fluid.create_lod_tensor([[np.int64(0)]], [[1]], place)

        assert feed_target_names[3] == "job_id"
        job_id = fluid.create_lod_tensor([[np.int64(10)]], [[1]], place)

        assert feed_target_names[4] == "movie_id"
        movie_id = fluid.create_lod_tensor([[np.int64(783)]], [[1]], place)

        assert feed_target_names[5] == "category_id"
        category_id = fluid.create_lod_tensor(
            [np.array([10, 8, 9], dtype='int64')], [[3]], place)

        assert feed_target_names[6] == "movie_title"
        movie_title = fluid.create_lod_tensor(
            [np.array([1069, 4140, 2923, 710, 988], dtype='int64')], [[5]],
            place)

        ids.append(infer_movie_id)
        results = exe.run(inferencer,
                          feed={
                              feed_target_names[0]: user_id,
                              feed_target_names[1]: gender_id,
                              feed_target_names[2]: age_id,
                              feed_target_names[3]: job_id,
                              feed_target_names[4]: movie_id,
                              feed_target_names[5]: category_id,
                              feed_target_names[6]: movie_title
                          },
                          fetch_list=fetch_targets,
                          return_numpy=False)
        predict_rating = np.array(results[0])
        usr_features = np.array(results[1])
        mov_features = np.array(results[2])
        print("Predict Rating of user id 1 on movie id 783 is " +
              str(predict_rating[0][0]))
        print("Actual Rating of user id 1 on movie id 783 is 4.")
    return usr_features[0], mov_features[0], ids
 def test_Variable():
     x1 = fluid.create_lod_tensor(
         np.zeros((4, 784)), [[1, 1, 1, 1]], fluid.CPUPlace())
     fluid.layers.uniform_random(x1)
Exemplo n.º 5
0
import paddle.fluid as fluid
import numpy as np


def LodTensor_to_Tensor(lod_tensor):
    lod = lod_tensor.lod()
    print("lod :", lod)
    array = np.array(lod_tensor)
    print("array :", array)
    new_array = []
    for i in range(len(lod[0]) - 1):
        new_array.append(array[lod[0][i]:lod[0][i + 1]])
    return new_array


a = fluid.create_lod_tensor(
    np.array([[1.1], [2.2], [3.3], [4.4]]).astype('float32'), [[1, 3]],
    fluid.CPUPlace())
new_array = LodTensor_to_Tensor(a)
print(new_array)
Exemplo n.º 6
0
def infer(use_cuda, params_dirname=None):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

    exe = fluid.Executor(place)

    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
        # the feed_target_names (the names of variables that will be feeded
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inferencer, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(params_dirname, exe)

        # Setup inputs by creating 4 LoDTensors representing 4 words. Here each word
        # is simply an index to look up for the corresponding word vector and hence
        # the shape of word (base_shape) should be [1]. The recursive_sequence_lengths,
        # which is length-based level of detail (lod) of each LoDTensor, should be [[1]]
        # meaning there is only one level of detail and there is only one sequence of
        # one word on this level.
        # Note that recursive_sequence_lengths should be a list of lists.
        data1 = numpy.asarray([[211]], dtype=numpy.int64)  # 'among'
        data2 = numpy.asarray([[6]], dtype=numpy.int64)  # 'a'
        data3 = numpy.asarray([[96]], dtype=numpy.int64)  # 'group'
        data4 = numpy.asarray([[4]], dtype=numpy.int64)  # 'of'
        lod = numpy.asarray([[1]], dtype=numpy.int64)

        first_word = fluid.create_lod_tensor(data1, lod, place)
        second_word = fluid.create_lod_tensor(data2, lod, place)
        third_word = fluid.create_lod_tensor(data3, lod, place)
        fourth_word = fluid.create_lod_tensor(data4, lod, place)

        assert feed_target_names[0] == 'firstw'
        assert feed_target_names[1] == 'secondw'
        assert feed_target_names[2] == 'thirdw'
        assert feed_target_names[3] == 'fourthw'

        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
        # and results will contain a list of data corresponding to fetch_targets.
        results = exe.run(
            inferencer,
            feed={
                feed_target_names[0]: first_word,
                feed_target_names[1]: second_word,
                feed_target_names[2]: third_word,
                feed_target_names[3]: fourth_word
            },
            fetch_list=fetch_targets,
            return_numpy=False)

        print(numpy.array(results[0]))
        most_possible_word_index = numpy.argmax(results[0])
        print(most_possible_word_index)
        print([
            key for key, value in six.iteritems(word_dict)
            if value == most_possible_word_index
        ][0])

        print(results[0].recursive_sequence_lengths())
        np_data = numpy.array(results[0])
        print("Inference Shape: ", np_data.shape)
Exemplo n.º 7
0
def nlp_example2():
    # Modified from https://www.paddlepaddle.org.cn/documentation/docs/en/user_guides/nlp_case/understand_sentiment/README.html
    def convolution_net(emb, input_dim, class_dim, emb_dim, hid_dim):
        conv_3 = fluid.nets.sequence_conv_pool(
            input=emb,
            num_filters=hid_dim,
            filter_size=3,
            act="tanh",
            pool_type="sqrt")
        conv_4 = fluid.nets.sequence_conv_pool(
            input=emb,
            num_filters=hid_dim,
            filter_size=4,
            act="tanh",
            pool_type="sqrt")
        prediction = fluid.layers.fc(input=[conv_3, conv_4],
                                     size=class_dim,
                                     act="softmax")
        return prediction

    CLASS_DIM = 2
    EMB_DIM = 128
    HID_DIM = 512
    BATCH_SIZE = 128
    word_dict = paddle.dataset.imdb.word_dict()

    def paddle_model(data, alpha, baseline):
        emb = fluid.embedding(
            input=data, size=[len(word_dict), EMB_DIM], is_sparse=True)
        emb = emb * alpha
        probs = convolution_net(emb,
                                len(word_dict), CLASS_DIM, EMB_DIM, HID_DIM)
        return emb, probs

    ig = IntGradInterpreter(
        paddle_model,
        "assets/sent_persistables",  #Training based on https://www.paddlepaddle.org.cn/documentation/docs/en/user_guides/nlp_case/understand_sentiment/README.html
        True,
        model_input_shape=None)

    reviews_str = [
        b'read the book forget the movie', b'this is a great movie',
        b'this is very bad'
    ]
    reviews = [c.split() for c in reviews_str]
    UNK = word_dict['<unk>']
    lod = []
    for c in reviews:
        lod.append([word_dict.get(words, UNK) for words in c])
    base_shape = [[len(c) for c in lod]]
    lod = np.array(sum(lod, []), dtype=np.int64)
    data = fluid.create_lod_tensor(lod, base_shape, fluid.CUDAPlace(0))

    avg_gradients = ig.interpret(
        data,
        label=None,
        baseline='random',
        steps=50,
        num_random_trials=1,
        visual=True,
        save_path='ig_test.jpg')

    sum_gradients = np.sum(avg_gradients, axis=1).tolist()
    lod = data.lod()

    new_array = []
    for i in range(len(lod[0]) - 1):
        new_array.append(sum_gradients[lod[0][i]:lod[0][i + 1]])

    print(new_array)
Exemplo n.º 8
0
 def _get_lod(k):
     #sys.stderr.write("%s\t%s\t%s\n" % (k, " ".join(map(str, batch_data[k][0])),
     #            " ".join(map(str, batch_data[k][1])) ))
     return fluid.create_lod_tensor(
         np.array(batch_data[k][0]).reshape([-1, 1]),
         [batch_data[k][1]], self.place)
Exemplo n.º 9
0
def infer(use_cuda, params_dirname):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

    # Use the first data from paddle.dataset.movielens.test() as input.
    # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor,
    # where `data` is a list of sequences of index numbers, `lod` is
    # the level of detail (lod) info associated with `data`.
    # For example, data = [[10, 2, 3], [2, 3]] means that it contains
    # two sequences of indexes, of length 3 and 2, respectively.
    # Correspondingly, lod = [[3, 2]] contains one level of detail info,
    # indicating that `data` consists of two sequences of length 3 and 2.
    infer_movie_id = 783
    infer_movie_name = paddle.dataset.movielens.movie_info(
    )[infer_movie_id].title

    exe = fluid.Executor(place)

    inference_scope = fluid.core.Scope()

    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
        # the feed_target_names (the names of variables that will be feeded
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inferencer, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(params_dirname, exe)

        # Use the first data from paddle.dataset.movielens.test() as input
        assert feed_target_names[0] == "user_id"
        # Use create_lod_tensor(data, recursive_sequence_lengths, place) API
        # to generate LoD Tensor where `data` is a list of sequences of index
        # numbers, `recursive_sequence_lengths` is the length-based level of detail
        # (lod) info associated with `data`.
        # For example, data = [[10, 2, 3], [2, 3]] means that it contains
        # two sequences of indexes, of length 3 and 2, respectively.
        # Correspondingly, recursive_sequence_lengths = [[3, 2]] contains one
        # level of detail info, indicating that `data` consists of two sequences
        # of length 3 and 2, respectively.
        user_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)

        assert feed_target_names[1] == "gender_id"
        gender_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)

        assert feed_target_names[2] == "age_id"
        age_id = fluid.create_lod_tensor([[np.int64(0)]], [[1]], place)

        assert feed_target_names[3] == "job_id"
        job_id = fluid.create_lod_tensor([[np.int64(10)]], [[1]], place)

        assert feed_target_names[4] == "movie_id"
        movie_id = fluid.create_lod_tensor([[np.int64(783)]], [[1]], place)

        assert feed_target_names[5] == "category_id"
        category_id = fluid.create_lod_tensor(
            [np.array([10, 8, 9], dtype='int64')], [[3]], place)

        assert feed_target_names[6] == "movie_title"
        movie_title = fluid.create_lod_tensor(
            [np.array([1069, 4140, 2923, 710, 988], dtype='int64')], [[5]],
            place)

        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
        # and results will contain a list of data corresponding to fetch_targets.
        results = exe.run(inferencer,
                          feed={
                              feed_target_names[0]: user_id,
                              feed_target_names[1]: gender_id,
                              feed_target_names[2]: age_id,
                              feed_target_names[3]: job_id,
                              feed_target_names[4]: movie_id,
                              feed_target_names[5]: category_id,
                              feed_target_names[6]: movie_title
                          },
                          fetch_list=fetch_targets,
                          return_numpy=False)
        predict_rating = np.array(results[0])
        print("Predict Rating of user id 1 on movie \"" + infer_movie_name +
              "\" is " + str(predict_rating[0][0]))
        print("Actual Rating of user id 1 on movie \"" + infer_movie_name +
              "\" is 4.")
Exemplo n.º 10
0
    def test_errors(self):
        with program_guard(Program(), Program()):
            # the input of interpoalte must be Variable.
            x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]),
                                         [[1, 1, 1, 1]], fluid.CPUPlace())
            self.assertRaises(TypeError, interpolate, x1)

            def test_mode_type():
                # mode must be "BILINEAR" "TRILINEAR" "NEAREST" "BICUBIC"
                x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")

                out = interpolate(x,
                                  size=[12, 12],
                                  mode='UNKONWN',
                                  align_corners=False)

            def test_input_shape():
                x = fluid.data(name="x", shape=[2], dtype="float32")
                out = interpolate(x,
                                  size=[12, 12],
                                  mode='BICUBIC',
                                  align_corners=False)

            def test_align_corcers():
                x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
                interpolate(x, size=[12, 12], mode='BICUBIC', align_corners=3)

            def test_out_shape():
                x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
                out = interpolate(x,
                                  size=[12],
                                  mode='bicubic',
                                  align_corners=False)

            def test_attr_data_format():
                # for 5-D input, data_format only can be NCDHW or NDHWC
                input = fluid.data(name="input",
                                   shape=[2, 3, 6, 9, 4],
                                   dtype="float32")
                out = interpolate(input,
                                  size=[4, 8, 4, 5],
                                  mode='trilinear',
                                  data_format='NHWC')

            def test_actual_shape():
                # the actual_shape  must be Variable.
                x = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]),
                                            [[1, 1, 1, 1]], fluid.CPUPlace())
                out = interpolate(x,
                                  size=[12, 12],
                                  mode='BICUBIC',
                                  align_corners=False)

            def test_scale_value():
                # the scale must be greater than zero.
                x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
                out = interpolate(x,
                                  size=None,
                                  mode='BICUBIC',
                                  align_corners=False,
                                  scale_factor=-2.0)

            def test_attr_5D_input():
                # for 5-D input, data_format only can be NCDHW or NDHWC
                input = fluid.data(name="input",
                                   shape=[2, 3, 6, 9, 4],
                                   dtype="float32")
                out = interpolate(input,
                                  size=[4, 8, 4, 5],
                                  mode='trilinear',
                                  data_format='NDHWC')

            def test_scale_type():
                # the scale must be greater than zero.
                x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
                scale = fluid.create_lod_tensor(np.array([-1, 3, 5,
                                                          5]), [[1, 1, 1, 1]],
                                                fluid.CPUPlace())
                out = interpolate(x,
                                  size=None,
                                  mode='bicubic',
                                  align_corners=False,
                                  scale_factor=scale)

            def test_align_mode():
                x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
                out = interpolate(x,
                                  size=None,
                                  mode='nearest',
                                  align_corners=False,
                                  align_mode=2,
                                  scale_factor=1.0)

            def test_outshape_and_scale():
                x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
                out = interpolate(x,
                                  size=None,
                                  mode='bicubic',
                                  align_corners=False,
                                  scale_factor=None)

            def test_align_corners_and_nearest():
                x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
                out = interpolate(x,
                                  size=None,
                                  mode='nearest',
                                  align_corners=True,
                                  scale_factor=None)

            def test_scale_shape():
                x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
                out = interpolate(x,
                                  size=None,
                                  mode='nearest',
                                  align_corners=False,
                                  scale_factor=[1, 2, 2])

            def test_scale_value():
                x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
                out = interpolate(x,
                                  size=None,
                                  mode='bicubic',
                                  align_corners=False,
                                  scale_factor=[1, 2, 2])

            def test_size_and_scale():
                x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
                out = interpolate(x,
                                  size=None,
                                  mode='bicubic',
                                  align_corners=False,
                                  scale_factor=None)

            def test_size_and_scale2():
                x = fluid.data(name="input",
                               shape=[2, 3, 6, 9, 4],
                               dtype="float32")
                out = interpolate(x,
                                  size=[2, 2, 2],
                                  mode='trilinear',
                                  align_corners=False,
                                  scale_factor=2.0)

            def test_size_type():
                x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
                out = interpolate(x,
                                  size={2, 2},
                                  mode='bicubic',
                                  align_corners=False)

            self.assertRaises(ValueError, test_mode_type)
            self.assertRaises(ValueError, test_input_shape)
            self.assertRaises(TypeError, test_align_corcers)
            self.assertRaises(ValueError, test_attr_data_format)
            self.assertRaises(TypeError, test_actual_shape)
            self.assertRaises(ValueError, test_scale_value)
            self.assertRaises(ValueError, test_out_shape)
            self.assertRaises(ValueError, test_attr_5D_input)
            self.assertRaises(TypeError, test_scale_type)
            self.assertRaises(ValueError, test_align_mode)
            self.assertRaises(ValueError, test_outshape_and_scale)
            self.assertRaises(ValueError, test_align_corners_and_nearest)
            self.assertRaises(ValueError, test_scale_shape)
            self.assertRaises(ValueError, test_scale_value)
            self.assertRaises(ValueError, test_size_and_scale)
            self.assertRaises(ValueError, test_size_and_scale2)
            self.assertRaises(TypeError, test_size_type)
Exemplo n.º 11
0
data6 = get_data("同一户型,为什么高层比低层要贵那么多?")
data7 = get_data("揭秘A股周涨5%资金动向:追捧2类股,抛售600亿香饽饽")
data8 = get_data("宋慧乔陷入感染危机,前夫宋仲基不戴口罩露面,身处国外神态轻松")
data9 = get_data("此盆栽花很好养,花美似牡丹,三季开花,南北都能养,很值得栽培")  # 不属于任何一个类别

texts.append(data1)
texts.append(data2)
texts.append(data3)
texts.append(data4)
texts.append(data5)
texts.append(data6)
texts.append(data7)
texts.append(data8)
texts.append(data9)

# 获取每个句子词数量
base_shape = [[len(c) for c in texts]]
# 生成LodTensor
tensor_words = fluid.create_lod_tensor(texts, base_shape, place)
# 执行预测
result = exe.run(program=infer_program,
                 feed={feeded_var_names[0]: tensor_words},  # 待预测的数据
                 fetch_list=target_var)

names = ["文化", "娱乐", "体育", "财经", "房产", "汽车", "教育", "科技", "国际", "证券"]

# 获取最大值的索引
for i in range(len(texts)):
    lab = np.argsort(result)[0][i][-1]  # 取出最大值的元素下标  默认升序排序
    print("预测结果:%d, 名称:%s, 概率:%f" % (lab, names[lab], result[0][i][lab]))
Exemplo n.º 12
0
 def _get_lod(k):
     return fluid.create_lod_tensor(np.array(batch_data[k][0]).reshape([-1, 1]),
             [batch_data[k][1]], self.place)
Exemplo n.º 13
0
def infer():
    # Load arguments
    args = parse_args()
    # args.batch_size = 1

    word2id = data.read_dictionary("data/pre_trained_word2id.pkl")
    embeddings = np.load("data/pre_trained_embeddings.npy")
    # word2id = data.read_dictionary("data/pre_trained_copy_mini_word2id.pkl")
    # embeddings = np.load("data/pre_trained_copy_mini_embeddings.npy")

    # word2id_output_mini = {}
    # for i, k in enumerate(word2id):
    #     word2id_output_mini[k] = i
    #     if i > 9100:
    #         break
    # word2id_output_mini["<S>"] = 1
    # word2id_output_mini["<E>"] = 2
    # word2id = word2id_output_mini

    word2id_output = word2id.copy()
    word_ori_size = len(word2id)
    # word_mini_size = len(word2id_output)
    # word_size = word_ori_size
    # word_size = word_mini_size

    word_size = 0
    tag_size = 0
    for k in tag2label:
        if tag2label[k] > tag_size:
            tag_size = tag2label[k]
        tag2label[k] += args.max_length
        if tag2label[k] > word_size:
            word_size = tag2label[k]
    # word2id_output.update(tag2label)
    word2id_output = tag2label
    word2id_output["<S>"] = word_size + 1
    word2id_output["<E>"] = word_size + 2
    word_size += 3
    tag_size += 3
    print("output size", word_size, tag_size)

    # # Dictrionaries init
    # word2id = data.read_dictionary("data/pre_trained_word2id.pkl")
    # embeddings = np.load("data/pre_trained_embeddings.npy")
    # word2id_output = word2id.copy()
    # word_mini_size = len(word2id)
    # word_size = 0
    # for k in tag2label:
    #     tag2label[k] += word_mini_size
    #     if tag2label[k] > word_size:
    #         word_size = tag2label[k]
    # tag2label["<S>"] = word_size + 1
    # tag2label["<E>"] = word_size + 2
    # word_size += 3
    # word2id_output.update(tag2label)
    # # print(type(word2id), len(word2id))
    # # print(type(entity2id), len(entity2id))
    # # print(type(pos2id), len(pos2id))
    # # print(type(word2id_output), len(word2id_output))
    id2entity = {}
    for k in entity2id:
        id2entity[entity2id[k]] = k
    id2word = {}
    for k in word2id:
        id2word[word2id[k]] = k
    id2word_output = {}
    for k in word2id_output:
        id2word_output[word2id_output[k]] = k
    src_dict, trg_dict = id2word, id2word_output

    # Load data
    # data_train = data_load("data/train_pos.txt",
    #         data=data, word2id=word2id, entity2id=entity2id,
    #         pos2id=pos2id, word2id_output=word2id_output,
    #         event_args=event_args)
    data_train = data_load("data/ace_data/train.txt",
                           data=data,
                           word2id=word2id,
                           entity2id=entity2id,
                           pos2id=pos2id,
                           word2id_output=word2id_output,
                           event_args=event_args,
                           generate=True)
    data_dev = data_load("data/ace_data/dev.txt",
                         data=data,
                         word2id=word2id,
                         entity2id=entity2id,
                         pos2id=pos2id,
                         word2id_output=word2id_output,
                         event_args=event_args,
                         generate=True)
    data_test = data_load("data/ace_data/test.txt",
                          data=data,
                          word2id=word2id,
                          entity2id=entity2id,
                          pos2id=pos2id,
                          word2id_output=word2id_output,
                          event_args=event_args,
                          generate=True)
    # data_test = data_train

    print("=====Init scores")
    scores = generate_pr(word_dict=id2word_output)
    scores.append_label(data_test)

    # Inference
    net = model.net(
        args.embedding_dim,
        args.encoder_size,
        args.decoder_size,
        word_ori_size,
        word_size,
        tag_size,
        True,
        # False,
        beam_size=args.beam_size,
        max_length=args.max_length,
        source_entity_dim=len(entity2id),
        source_pos_dim=len(pos2id),
        embedding_entity_dim=args.embedding_entity_dim,
        embedding_pos_dim=args.embedding_pos_dim,
        end_id=word2id_output["<E>"])

    # test_batch_generator = paddle.batch(
    #     paddle.reader.shuffle(
    #         paddle.dataset.wmt14.test(args.dict_size), buf_size=1000),
    #     batch_size=args.batch_size,
    #     drop_last=False)

    dev_batch_generator = paddle.batch(paddle.reader.buffered(data_dev,
                                                              size=1000),
                                       batch_size=args.batch_size,
                                       drop_last=False)
    test_batch_generator = paddle.batch(paddle.reader.buffered(data_test,
                                                               size=1000),
                                        batch_size=args.batch_size,
                                        drop_last=False)

    print("begin memory optimization ...")
    # fluid.memory_optimize(train_program)
    fluid.memory_optimize(framework.default_main_program())
    print("end memory optimization ...")

    place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())
    # # exe = fluid.ParallelExecutor(use_cuda=args.use_gpu)
    # os.environ['CPU_NUM'] = "2"
    # exe = fluid.parallel_executor.ParallelExecutor(
    #         use_cuda=args.use_gpu, num_trainers=2,
    #         # loss_name=avg_cost.name,
    #         main_program=fluid.default_main_program())

    # LOAD Model
    model_path = os.path.join(args.save_dir, str(args.load_pass_num))
    fluid.io.load_persistables(executor=exe,
                               dirname=model_path,
                               main_program=framework.default_main_program())
    print("==Model loaded", args.save_dir)

    translation_ids = net.translation_ids
    translation_scores = net.translation_scores
    feed_order = net.feeding_list

    feed_list = [
        framework.default_main_program().global_block().var(var_name)
        for var_name in feed_order
    ]
    # print(feed_list)
    feeder = fluid.DataFeeder(feed_list, place)
    scores.reset()
    for batch_id, _data in enumerate(test_batch_generator()):
        print("=====", batch_id, len(_data))
        # The value of batch_size may vary in the last batch
        batch_size = len(_data)

        # Setup initial ids and scores lod tensor
        # init_ids_data = np.array([0 for _ in range(batch_size)], dtype='int64')
        init_ids_data = np.array(
            [word2id_output["<S>"] for _ in range(batch_size)], dtype='int64')
        init_scores_data = np.array([1. for _ in range(batch_size)],
                                    dtype='float32')
        init_ids_data = init_ids_data.reshape((batch_size, 1))
        init_scores_data = init_scores_data.reshape((batch_size, 1))
        init_recursive_seq_lens = [1] * batch_size
        init_recursive_seq_lens = [
            init_recursive_seq_lens, init_recursive_seq_lens
        ]
        init_ids = fluid.create_lod_tensor(init_ids_data,
                                           init_recursive_seq_lens, place)
        init_scores = fluid.create_lod_tensor(init_scores_data,
                                              init_recursive_seq_lens, place)
        # print(init_ids_data.shape)
        # print(init_recursive_seq_lens)
        # print(init_ids.lod())
        # print(init_scores.lod())

        # Feed dict for inference
        feed_dict = feeder.feed([x for x in _data])
        feed_dict['init_ids'] = init_ids
        feed_dict['init_scores'] = init_scores

        print("=====")
        fetch_outs = exe.run(
            framework.default_main_program(),
            feed=feed_dict,
            fetch_list=[translation_ids, translation_scores],
            # fetch_list=[translation_ids],
            return_numpy=False)
        # print(np.array(fetch_outs[0]))
        # print(np.array(fetch_outs[0]).shape)
        print("=====Update scores")
        scores.update(preds=fetch_outs[0],
                      labels=[_[-1] for _ in _data],
                      words_list=[_[0] for _ in _data],
                      for_generate=True)
        # Split the output words by lod levels
        end_id = word2id_output["<E>"]
        result = []
        paragraphs = []
        for ids in np.array(fetch_outs[0]):
            # print("##", ids.shape)
            # print("##", ids)
            new_ids = []
            new_words = []
            pre_id = -1
            for _id in ids:
                if _id == end_id or \
                        _id == pre_id:
                    break
                pre_id = _id
                new_ids.append(_id)
                if _id < args.max_length:
                    new_words.append(str(_id))
                else:
                    new_words.append(trg_dict[_id])
            result.append(new_ids)
            paragraphs.append(new_words)

        # lod_level_1 = fetch_outs[0].lod()[1]
        # token_array = np.array(fetch_outs[0])
        # result = []
        # for i in six.moves.xrange(len(lod_level_1) - 1):
        #     sentence_list = [
        #         trg_dict[token]
        #         for token in token_array[lod_level_1[i]:lod_level_1[i + 1]]
        #     ]
        #     sentence = " ".join(sentence_list[1:-1])
        #     result.append(sentence)
        # lod_level_0 = fetch_outs[0].lod()[0]
        # paragraphs = [
        #     result[lod_level_0[i]:lod_level_0[i + 1]]
        #     for i in six.moves.xrange(len(lod_level_0) - 1)
        # ]

        # target_sentence_list = [" ".join(
        #         [trg_dict[__]
        #         for __ in _[-1]])
        #         for _ in _data]
        target_sentence_list = []
        for item in _data:
            target_words = []
            for _id in item[-1]:
                if _id < args.max_length:
                    target_words.append(str(_id))
                else:
                    target_words.append(trg_dict[_id])
            target_sentence_list.append(" ".join(target_words))
        source_sentence_list = []
        source_entity_list = []
        for item in _data:
            target_words = []
            for _id in item[0]:
                target_words.append(src_dict[_id])
            source_sentence_list.append(target_words)
            entity_tag = []
            for _id in item[1]:
                entity_tag.append(id2entity[_id])
            source_entity_list.append(entity_tag)

        print("=====Print text")
        for paragraph, sentence, source , entities in \
                zip(paragraphs, target_sentence_list, \
                source_sentence_list, source_entity_list):
            print("-----")
            new_words = []
            indexes = range(len(source))
            for i, word, entity in zip(indexes, source, entities):
                new_words.append(word + "(" + str(i) + " " + entity + ")")
            print(" ".join(new_words))
            print("=Predict:", " ".join(paragraph[1:]))
            print("=Label:", sentence)

    scores.eval_show()
Exemplo n.º 14
0
infer_program, feeded_var_names, target_var = fluid.io.load_inference_model(
    dirname=model_save_dir, executor=exe)
print("加载模型完成")
texts = []  # 预测句子的列表
data1 = get_data("在获得诺贝尔文学奖7年之后,莫言15日晚间在山西汾阳贾家庄如是说")
data2 = get_data("综合'今日美国'、《世界日报》等当地媒体报道,芝加哥河滨警察局表示")
data3 = get_data("中国队无缘2020年世界杯")
data4 = get_data("中国人民银行今日发布通知,提高准备金率,预计释放4000亿流动性")
data5 = get_data("10月20日,第六届世界互联网大会正式开幕")
texts.append(data1)
texts.append(data2)
texts.append(data3)
texts.append(data4)
texts.append(data5)
base_shape = [[len(c) for c in texts]]  # 获取每个句子的长度,并且放到数组中
# 蒋经国编码后的句子转化为张量
tensor_words = fluid.create_lod_tensor(
    texts,  # 原数据
    base_shape,  # 数据长度
    place)
# 执行预测
result = exe.run(
    infer_program,  # 预测program
    feed={feeded_var_names[0]: tensor_words},  # 喂入参数
    fetch_list=target_var)  # 获取结果
names = ["文化", "娱乐", "体育", "财经", "房产", "汽车", "教育", "科技", "国际", "证券"]
# 获取结果概率最大的label
for i in range(len(texts)):
    lab = np.argsort(result)[0][i][-1]
    print("预测结果:{},名称:{},概率:{}".format(lab, names[lab], result[0][i][lab]))
Exemplo n.º 15
0
import numpy as np
import paddle.fluid as fluid

t = fluid.create_lod_tensor(np.ndarray([5, 30]), [[2, 3]], fluid.CUDAPlace(0))
p = t._place()
new_place = fluid.CUDAPlace(p.gpu_device_id())
Exemplo n.º 16
0
    def test_errors(self):
        with program_guard(Program(), Program()):
            # The input type of addmm_op must be Variable.

            input = fluid.create_lod_tensor(np.array([[-1, -1], [-1, -1]]),
                                            [[2]], fluid.CPUPlace())
            x1 = fluid.create_lod_tensor(np.array([[-1, -1], [-1, -1]]), [[2]],
                                         fluid.CPUPlace())
            x2 = fluid.create_lod_tensor(np.array([[-1, -1], [-1, -1]]), [[2]],
                                         fluid.CPUPlace())
            self.assertRaises(TypeError, paddle.addmm, input, x1, x2)

            # The input dtype of mul_op must be float32 or float64.
            input = fluid.layers.data(name='input',
                                      shape=[4, 4],
                                      dtype="int32",
                                      append_batch_size=False)
            x3 = fluid.layers.data(name='x3',
                                   shape=[4, 4],
                                   dtype="int32",
                                   append_batch_size=False)
            x4 = fluid.layers.data(name='x4',
                                   shape=[4, 4],
                                   dtype="int32",
                                   append_batch_size=False)
            self.assertRaises(TypeError, paddle.addmm, input, x3, x4)
            # x and y dimension mismatch
            x5 = fluid.layers.data(name='x5',
                                   shape=[4, 5],
                                   dtype="float32",
                                   append_batch_size=False)
            x6 = fluid.layers.data(name='x6',
                                   shape=[4, 4],
                                   dtype="float32",
                                   append_batch_size=False)
            self.assertRaises(ValueError, paddle.addmm, input, x5, x6)
            # input and x are not broadcastable
            x7 = fluid.layers.data(name='x7',
                                   shape=[4, 4],
                                   dtype="float32",
                                   append_batch_size=False)
            x8 = fluid.layers.data(name='x8',
                                   shape=[4, 4],
                                   dtype="float32",
                                   append_batch_size=False)
            input1 = fluid.layers.data(name='input1',
                                       shape=[2, 4],
                                       dtype="float32",
                                       append_batch_size=False)
            self.assertRaises(ValueError, paddle.addmm, input1, x7, x8)
            # input and x are not broadcastable
            x9 = fluid.layers.data(name='x9',
                                   shape=[4, 4],
                                   dtype="float32",
                                   append_batch_size=False)
            x10 = fluid.layers.data(name='x10',
                                    shape=[4, 4],
                                    dtype="float32",
                                    append_batch_size=False)
            input2 = fluid.layers.data(name='input2',
                                       shape=[1, 2],
                                       dtype="float32",
                                       append_batch_size=False)
            self.assertRaises(ValueError, paddle.addmm, input2, x9, x10)
            x11 = fluid.layers.data(name='x11',
                                    shape=[4, 4],
                                    dtype="float32",
                                    append_batch_size=False)
            x12 = fluid.layers.data(name='x12',
                                    shape=[4, 4],
                                    dtype="float32",
                                    append_batch_size=False)
            input3 = fluid.layers.data(name='input3',
                                       shape=[4, 2],
                                       dtype="float32",
                                       append_batch_size=False)
            self.assertRaises(ValueError, paddle.addmm, input3, x11, x12)
            x13 = fluid.layers.data(name='x13',
                                    shape=[4, 4],
                                    dtype="float32",
                                    append_batch_size=False)
            x14 = fluid.layers.data(name='x14',
                                    shape=[4, 4],
                                    dtype="float32",
                                    append_batch_size=False)
            input4 = fluid.layers.data(name='input4',
                                       shape=[3, 1],
                                       dtype="float32",
                                       append_batch_size=False)
            self.assertRaises(ValueError, paddle.addmm, input4, x13, x14)
Exemplo n.º 17
0
#!/usr/bin/env python
# encoding: utf-8
"""
@author: coffee
@license: (C) Copyright 2017-2020, Node Supply Chain Manager Corporation Limited.
@contact: [email protected]
@file: softmax.py
@time: 2020/10/18 3:31 下午
@desc:
"""

from paddle import fluid
import numpy as np

place = fluid.CPUPlace()
# a = fluid.create_random_int_lodtensor([[1, 2, 3]], base_shape=[5], place=place, low=0, high=1)
x = fluid.data(name='x', shape=[5])
out = fluid.layers.sequence_softmax(x)
seq_lens = [2, 4, 4]
a = np.random.rand(sum(seq_lens)).astype('float32')
x1 = fluid.create_lod_tensor(a, recursive_seq_lens=[seq_lens], place=place)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
res = exe.run(fluid.default_main_program(),
              feed={'x': x1},
              fetch_list=[out.name],
              return_numpy=False)
print(res[0])
Exemplo n.º 18
0
 def test_x_type():
     x1 = fluid.create_lod_tensor(
         np.array([[-1]]), [[1]], paddle.CPUPlace())
     self.reshape(x1, shape=[1])
Exemplo n.º 19
0
def infer(use_cuda, save_dirname=None):
    if save_dirname is None:
        return

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
        # the feed_target_names (the names of variables that will be feeded
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inference_program, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)

        # Use the first data from paddle.dataset.movielens.test() as input
        assert feed_target_names[0] == "user_id"
        # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor
        # where `data` is a list of sequences of index numbers, `lod` is 
        # the level of detail (lod) info associated with `data`.
        # For example, data = [[10, 2, 3], [2, 3]] means that it contains
        # two sequences of indexes, of length 3 and 2, respectively.
        # Correspondingly, lod = [[3, 2]] contains one level of detail info,
        # indicating that `data` consists of two sequences of length 3 and 2. 
        user_id = fluid.create_lod_tensor([[1]], [[1]], place)

        assert feed_target_names[1] == "gender_id"
        gender_id = fluid.create_lod_tensor([[1]], [[1]], place)

        assert feed_target_names[2] == "age_id"
        age_id = fluid.create_lod_tensor([[0]], [[1]], place)

        assert feed_target_names[3] == "job_id"
        job_id = fluid.create_lod_tensor([[10]], [[1]], place)

        assert feed_target_names[4] == "movie_id"
        movie_id = fluid.create_lod_tensor([[783]], [[1]], place)

        assert feed_target_names[5] == "category_id"
        category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place)

        assert feed_target_names[6] == "movie_title"
        movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]],
                                              [[5]], place)

        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
        # and results will contain a list of data corresponding to fetch_targets.
        results = exe.run(inference_program,
                          feed={
                              feed_target_names[0]: user_id,
                              feed_target_names[1]: gender_id,
                              feed_target_names[2]: age_id,
                              feed_target_names[3]: job_id,
                              feed_target_names[4]: movie_id,
                              feed_target_names[5]: category_id,
                              feed_target_names[6]: movie_title
                          },
                          fetch_list=fetch_targets,
                          return_numpy=False)
        print("inferred score: ", np.array(results[0]))
Exemplo n.º 20
0
    def dyanmic_gru_op(self, **kwargs):
        role = kwargs['role']
        data = kwargs['data']
        data_share = kwargs['data_share'][role]
        weight = kwargs['weight']
        weight_share = kwargs['weight_share'][role]
        return_results = kwargs['return_results']
        return_results_cheb = kwargs['return_results_cheb']
        expected_result = kwargs['expect_results']
        pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port))

        hidden_dim = 1

        data_paddle = fluid.data(name='input_paddle',
                                 shape=[3, 3],
                                 dtype='float32',
                                 lod_level=1)
        ldata_paddle = fluid.create_lod_tensor(data, [[3]], fluid.CPUPlace())
        w_param_attrs = fluid.ParamAttr(
            name='gru_weight',
            learning_rate=0.5,
            initializer=fluid.initializer.NumpyArrayInitializer(weight),
            trainable=True)
        hidden_paddle = fluid.layers.dynamic_gru(input=data_paddle,
                                                 size=hidden_dim,
                                                 param_attr=w_param_attrs,
                                                 gate_activation='sigmoid',
                                                 candidate_activation='relu')

        data_mpc = fluid.data(name='input_mpc',
                              shape=[3, 2, 3],
                              dtype='int64',
                              lod_level=1)
        # trans batch information to shape[0]
        data_share_trans = np.transpose(data_share, [1, 0, 2])
        ldata_mpc = fluid.create_lod_tensor(data_share_trans, [[3]],
                                            fluid.CPUPlace())
        w_param_attrs1 = fluid.ParamAttr(
            name='mpc_gru_weight',
            learning_rate=0.5,
            initializer=pfl_mpc.initializer.NumpyArrayInitializer(
                weight_share),
            trainable=True)
        w_param_attrs2 = fluid.ParamAttr(
            name='mpc_gru_weight_cheb',
            learning_rate=0.5,
            initializer=pfl_mpc.initializer.NumpyArrayInitializer(
                weight_share),
            trainable=True)
        hidden_mpc = pfl_mpc.layers.dynamic_gru(input=data_mpc,
                                                size=hidden_dim,
                                                param_attr=w_param_attrs1)
        hidden_mpc_cheb = pfl_mpc.layers.dynamic_gru(
            input=data_mpc,
            size=hidden_dim,
            param_attr=w_param_attrs2,
            gate_activation='sigmoid_chebyshev')

        exe = fluid.Executor(place=fluid.CPUPlace())
        exe.run(fluid.default_startup_program())
        results = exe.run(
            feed={
                'input_paddle': ldata_paddle,
                'input_mpc': ldata_mpc
            },
            fetch_list=[hidden_paddle, hidden_mpc, hidden_mpc_cheb],
            return_numpy=False)
        return_results.append(np.array(results[1]))
        return_results_cheb.append(np.array(results[2]))
        expected_result.append(np.array(results[0]))
Exemplo n.º 21
0
 def test_x_type():
     x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]],
                                  fluid.CPUPlace())
     fluid.layers.reshape(x1, shape=[1])
Exemplo n.º 22
0
        data.append(np.int64(dict_txt[s]))
    return data


data = []
# 获取图片数据
data1 = get_data('京城最值得你来场文化之旅的博物馆')
data2 = get_data('谢娜为李浩菲澄清网络谣言,之后她的两个行为给自己加分')
data.append(data1)
data.append(data2)

# 获取每句话的单词数量
base_shape = [[len(c) for c in data]]

# 生成预测数据
tensor_words = fluid.create_lod_tensor(data, base_shape, place)

# 执行预测
result = exe.run(program=infer_program,
                 feed={feeded_var_names[0]: tensor_words},
                 fetch_list=target_var)

# 分类名称
names = [
    '民生', '文化', '娱乐', '体育', '财经', '房产', '汽车', '教育', '科技', '军事', '旅游', '国际',
    '证券', '农业', '游戏'
]

# 获取结果概率最大的label
for i in range(len(data)):
    lab = np.argsort(result)[0][i][-1]
    with fluid.unique_name.guard():
        translation_ids, translation_scores = infer_model()

test_data = paddle.batch(
    paddle.dataset.wmt16.test(source_dict_size, target_dict_size),
    batch_size=batch_size)
src_idx2word = paddle.dataset.wmt16.get_dict(
    "en", source_dict_size, reverse=True)
trg_idx2word = paddle.dataset.wmt16.get_dict(
    "de", target_dict_size, reverse=True)

fluid.io.load_params(exe, model_save_dir, main_program=infer_prog)

for data in test_data():
    src_word_id = fluid.create_lod_tensor(
        data=[x[0] for x in data],
        recursive_seq_lens=[[len(x[0]) for x in data]],
        place=place)
    # init_ids内容为start token
    init_ids = fluid.create_lod_tensor(
        data=np.array([[0]] * len(data), dtype='int64'),
        recursive_seq_lens=[[1] * len(data)] * 2,
        place=place)
    # init_scores为beam search过程累积得分的初值
    init_scores = fluid.create_lod_tensor(
        data=np.array([[0.]] * len(data), dtype='float32'),
        recursive_seq_lens=[[1] * len(data)] * 2,
        place=place)
    seq_ids, seq_scores = exe.run(
        infer_prog,
        feed={
            'src_word_id': src_word_id,