示例#1
0
 def test_all_positive(self):
     for dtype in self._dtypes:
         x = 1 + 10 * np.random.random([13, 3, 3]).astype(dtype)
         for place in self._places:
             with dg.guard(place):
                 y = paddle.abs(paddle.to_tensor(x))
                 self.assertTrue(np.allclose(np.abs(x), y.numpy()))
    def test_backward_accumulator_with_init_grad(self):
        for dtype in self._dtypes:
            x = np.random.random([
                10,
            ]).astype(dtype)
            y_grad = np.random.random([
                10,
            ]).astype(dtype)
            z_grad = np.random.random([
                10,
            ]).astype(dtype)
            self._places = [paddle.CPUPlace()]
            for place in self._places:
                with dg.guard(place):
                    x_tensor = paddle.to_tensor(x, stop_gradient=False)
                    y_tensor = x_tensor**2
                    z_tensor = y_tensor**3

                    y_grad_tensor = paddle.to_tensor(y_grad)
                    z_grad_tensor = paddle.to_tensor(z_grad)
                    paddle.autograd.backward([y_tensor, z_tensor],
                                             [y_grad_tensor, z_grad_tensor])

                    y = x**2
                    z = x**3
                    x_grad = 2 * x_tensor * (y_grad_tensor + 3 * y_tensor *
                                             y_tensor * z_grad_tensor)

                    self.assertTrue(
                        np.allclose(x_grad.numpy(), x_tensor.grad.numpy()))
示例#3
0
    def test_textcnn_siamese_infer(self):
        textcnn_config = {
                "vocab_size": TestTextSimilarity.tokenizer.size(),
                "emb_dim" : 512,
                "num_filters": 256,
                "num_channels":1,
                "win_size_list": [3],
                "is_sparse": True,
                "use_cudnn": True,
                "triplet_margin": 1.1,
                }

        infer_config = {
                "best_model_save_path": os.path.join(TestTextSimilarity.test_output_dir, "textcnn_best"),
                "batch_size": 32,
                "max_seq_len": 300,
                "print_step": 200,
                }

        topk=10

        infer_res_path = os.path.join(TestTextSimilarity.test_output_dir, "textcnn_infer_res.txt")

        with D.guard():
            textcnn_siamese = TextCNNSiameseModel()
            textcnn_siamese.build(**textcnn_config)
            textcnn_siamese.load_model(infer_config["best_model_save_path"])
            text_emb_list = textcnn_siamese.batch_infer(TestTextSimilarity.src_text_ids)[0]

        self.distance_rank(
                text_emb_list,
                TestTextSimilarity.src_text_list,
                TestTextSimilarity.src_label_list,
                infer_res_path,
                topk)
示例#4
0
    def test_textcnn_siamese_train(self):
        textcnn_config = {
                "vocab_size": TestTextSimilarity.tokenizer.size(),
                "emb_dim" : 512,
                "num_filters": 256,
                "num_channels":1,
                "win_size_list": [3],
                "is_sparse": True,
                "use_cudnn": True,
                "triplet_margin": 1.1,
                }

        run_config = {
                "model_save_path": os.path.join(TestTextSimilarity.test_output_dir, "textcnn"),
                "best_model_save_path": os.path.join(TestTextSimilarity.test_output_dir, "textcnn_best"),
                "epochs": 15,
                "batch_size": 32,
                "max_seq_len": 300,
                "print_step": 200,
                "learning_rate": 5e-5,
                "load_best_model": False,
                }

        with D.guard():
            textcnn_siamese = TextCNNSiameseModel()
            textcnn_siamese.build(**textcnn_config)
            best_acc = textcnn_siamese.train(
                    TestTextSimilarity.train_data, TestTextSimilarity.eval_data,
                    **run_config)
        logging.info("ernie siamese est train score: {}".format(best_acc))
示例#5
0
 def functional_imperative(self, place):
     with dg.guard(place):
         x_var = dg.to_variable(self.input)
         w_var = dg.to_variable(self.weight)
         y_var = F.row_conv(x_var, w_var, act=self.act)
         y_np = y_var.numpy()
     return y_np
示例#6
0
    def test_ernie_siamese_infer(self):
        ernie_config = {
                "pretrain_dir_or_url": "ernie-1.0",
                "triplet_margin": 1.1,
                }

        infer_config = {
                "best_model_save_path": os.path.join(TestTextSimilarity.test_output_dir, "ernie_best"),
                "batch_size": 32,
                "max_seq_len": 300,
                "print_step": 200,
                }

        topk=10

        infer_res_path = os.path.join(TestTextSimilarity.test_output_dir, "ernie_infer_res.txt")

        with D.guard():
            ernie_siamese = ErnieSiameseModel()
            ernie_siamese.build(**ernie_config)
            ernie_siamese.load_model(infer_config["best_model_save_path"])
            text_emb_list = ernie_siamese.batch_infer(TestTextSimilarity.src_text_ids)[0]

        self.distance_rank(
                text_emb_list,
                TestTextSimilarity.src_text_list,
                TestTextSimilarity.src_label_list,
                infer_res_path,
                topk)
示例#7
0
def synthesize(config):
    pprint(vars(config))

    # Get checkpoint directory path.
    run_dir = os.path.join("runs", config.model, config.name)
    checkpoint_dir = os.path.join(run_dir, "checkpoint")

    # Configurate device.
    place = fluid.CUDAPlace(0) if config.use_gpu else fluid.CPUPlace()

    with dg.guard(place):
        # Fix random seed.
        seed = config.seed
        random.seed(seed)
        np.random.seed(seed)
        fluid.default_startup_program().random_seed = seed
        fluid.default_main_program().random_seed = seed
        print("Random Seed: ", seed)

        # Build model.
        model = WaveFlow(config, checkpoint_dir)
        model.build(training=False)
        # Obtain the current iteration.
        if config.checkpoint is None:
            if config.iteration is None:
                iteration = io.load_latest_checkpoint(checkpoint_dir)
            else:
                iteration = config.iteration
        else:
            iteration = int(config.checkpoint.split('/')[-1].split('-')[-1])

        # Run model inference.
        model.infer(iteration)
示例#8
0
    def check_identity(self, place):
        with dg.guard(place):
            x_var = paddle.to_tensor(self.x)
            y_var = F.glu(x_var, self.dim)
            y_np = y_var.numpy()

        np.testing.assert_allclose(y_np, self.out)
示例#9
0
def train_gru(num_class, vocab_size, data):
    logging.warning("gru train start")
    gru_config = {
            "num_class": num_class,
            "vocab_size": vocab_size,
            "emb_dim" : 512,
            "gru_dim" : 256,
            "fc_hid_dim": 512,
            "is_sparse": True,
            "bi_direction": True,
            }

    run_config = {
            "epochs": 2,
            "batch_size": 32,
            "max_seq_len": 100,
            "print_step": 1,
            "learning_rate": 5e-4,
            "load_best_model": False,
            }

    start_time = time.time()
    class GRUModel(ClassificationModel):
        def build(self, **model_config):
            self.model = GRUClassifier(**model_config)
            self.built = True

    with D.guard():
        gru_model = GRUModel()
        gru_model.build(**gru_config)
        best_acc = gru_model.train(
                data, data,
                label_encoder=None,
                **run_config)
    logging.warning("gru best train score: {}, cost time: {}s".format(best_acc, time.time()- start_time))
示例#10
0
    def test_textcnn(self):
        textcnn_config = {
                "num_class": TestDygraphModels.label_encoder.size(),
                "vocab_size": TestDygraphModels.tokenizer.size(),
                "emb_dim" : 512,
                "num_filters": 256,
                "fc_hid_dim": 512,
                "num_channels":1,
                "win_size_list": [3],
                "is_sparse": True,
                "use_cudnn": True,
                }

        run_config = {
                "model_save_path": os.path.join(TestDygraphModels.test_output_dir, "textcnn"),
                "best_model_save_path": os.path.join(TestDygraphModels.test_output_dir, "textcnn_best"),
                "epochs": 2,
                "batch_size": 32,
                "max_seq_len": 300,
                "print_step": 200,
                "learning_rate": 5e-4,
                }

        with D.guard():
            textcnn_model = TextCNN(**textcnn_config)
            best_acc = self.model_train_infer(textcnn_model, run_config)
        logging.info("textcnn best train score: {}".format(best_acc))
示例#11
0
def benchmark(config):
    pprint(vars(config))

    # Get checkpoint directory path.
    run_dir = os.path.join("runs", config.model, config.name)
    checkpoint_dir = os.path.join(run_dir, "checkpoint")

    # Configurate device.
    place = fluid.CUDAPlace(0) if config.use_gpu else fluid.CPUPlace()

    with dg.guard(place):
        # Fix random seed.
        seed = config.seed
        random.seed(seed)
        np.random.seed(seed)
        fluid.default_startup_program().random_seed = seed
        fluid.default_main_program().random_seed = seed
        print("Random Seed: ", seed)

        # Build model.
        model = WaveFlow(config, checkpoint_dir)
        model.build(training=False)

        # Run model inference.
        model.benchmark()
示例#12
0
    def check_identity(self, place):
        with dg.guard(place):
            x_var = dg.to_variable(self.x)
            y_var = fluid.nets.glu(x_var, self.dim)
            y_np = y_var.numpy()

        np.testing.assert_allclose(y_np, self.out)
示例#13
0
    def test_ernie_parallelized(self):
        ernie_config = {
                "pretrain_dir_or_url": "ernie-1.0",
                "num_labels": TestDygraphModelsParallelized.label_encoder.size(),
                }

        run_config = {
                "model_save_path": os.path.join(TestDygraphModelsParallelized.test_output_dir, "ernie"),
                "best_model_save_path": os.path.join(TestDygraphModelsParallelized.test_output_dir, "ernie_best"),
                "epochs": 2,
                "batch_size": 32,
                "max_seq_len": 300,
                "print_step": 100,
                "learning_rate": 5e-5,
                "load_best_model": False,
                }

        start_time = time.time()
        class ErnieClassificationModel(ClassificationModel):
            @model_parallelized(TestDygraphModelsParallelized.strategy)
            def build(self, **model_config):
                self.model = ErnieSequenceClassificationCustomized.from_pretrained(**model_config)
                self.built = True

        place = F.CUDAPlace(D.ParallelEnv().dev_id)
        with D.guard(place):
            ernie_classification_model = ErnieClassificationModel()
            ernie_classification_model.build(**ernie_config)
            best_acc = ernie_classification_model.train(
                    TestDygraphModelsParallelized.train_data, TestDygraphModelsParallelized.eval_data,
                    label_encoder=TestDygraphModelsParallelized.label_encoder,
                    **run_config)
        logging.warning("ernie parallelized best train score: {}, cost time: {}s".format(best_acc, time.time()- start_time))
示例#14
0
 def run_gelu_op(approximate):
     with dg.guard():
         x = paddle.to_tensor(x_np)
         x.stop_gradient = False
         y = F.gelu(x, approximate=approximate)
         x_grad = paddle.grad([y], [x], [paddle.to_tensor(y_g_np)])[0]
         return y.numpy(), x_grad.numpy()
示例#15
0
    def infer(self, infer_file, output_file, batch_size=32):
        """推理,和check的区别是infer是针对文件的预测,而check是针对单条文本的预测
        Args:
            infer_file:     待预测文件,必须是两列以上,其中第二列为文本
            output_file:    预测文件,第一列为模型预测文件
        """
        def line_processor(line):
            """每行文本的处理函数
            """
            parts = line.strip("\n").split("\t")
            text = parts[1]
            return (self.tokenizer.encode(text)[0], text)

        # batch_infer预测时,data_iter可带上其标签信息,这里把标签信息替换成物料文本
        # 方便之后结果输出
        infer_data_iter = get_data(infer_file, read_func=line_processor)
        with D.guard():
            pred_logits, text_list = batch_infer(self.model, infer_data_iter, batch_size=batch_size)

        pred_label_id = np.argmax(pred_logits, axis=-1)
        pred_label_name = [self.label_encoder.inverse_transform(x) for x in pred_label_id]

        with codecs.open(output_file, "w", 'gb18030') as wf:
            for label, text in zip(pred_label_name, text_list):
                wf.write("%s\t%s\n" % (label, text))
示例#16
0
 def _test_equivalence(self, place):
     place = fluid.CPUPlace()
     result1 = self.fluid_layer(place)
     result2 = self.functional(place)
     with dg.guard(place):
         result3 = self.paddle_nn_layer()
     np.testing.assert_array_almost_equal(result1, result2)
     np.testing.assert_array_almost_equal(result2, result3)
示例#17
0
 def _test_case(self, place):
     x = np.random.uniform(-1, 1, size=(11, 17)).astype(np.float64)
     y_ref = erf(x)
     with dg.guard(place) as g:
         x_var = dg.to_variable(x)
         y_var = fluid.layers.erf(x_var)
         y_test = y_var.numpy()
     self.assertTrue(np.allclose(y_ref, y_test))
示例#18
0
 def compare(self, x, y):
     for place in self._places:
         with dg.guard(place):
             x_var = dg.to_variable(x)
             y_var = dg.to_variable(y)
             result = paddle.complex.matmul(x_var, y_var)
     np_result = np.matmul(x, y)
     self.assertTrue(np.allclose(result.numpy(), np_result))
 def compare_op_by_basic_api(self, x, y):
     for place in self._places:
         with dg.guard(place):
             var_x = dg.to_variable(x)
             var_y = dg.to_variable(y)
             self.assert_check((var_x + var_y).numpy(), x + y, place)
             self.assert_check((var_x - var_y).numpy(), x - y, place)
             self.assert_check((var_x * var_y).numpy(), x * y, place)
             self.assert_check((var_x / var_y).numpy(), x / y, place)
示例#20
0
    def init_infer(self, model_type, model_dir, model_conf):
        """预测阶段初始化
        """
        self.label_encoder = LabelEncoder(
                os.path.join(model_dir, model_conf["label_id_path"]))

        with D.guard():
            self.model, self.tokenizer = self.init_model(
                    model_type, model_dir, model_conf, self.label_encoder.size())
示例#21
0
 def test_conj_api_real_number(self):
     for dtype in self._dtypes:
         input = rand([2, 20, 2, 3]).astype(dtype)
         for place in self._places:
             with dg.guard(place):
                 var_x = paddle.to_tensor(input)
                 result = paddle.conj(var_x).numpy()
                 target = np.conj(input)
                 self.assertTrue(np.array_equal(result, target))
示例#22
0
 def test_complex_x(self):
     input = rand([2, 20, 2, 3]).astype(
         self._dtype) + 1j * rand([2, 20, 2, 3]).astype(self._dtype)
     for place in self._places:
         with dg.guard(place):
             var_x = dg.to_variable(input)
             result = cpx.trace(var_x, offset=1, axis1=0, axis2=2).numpy()
             target = np.trace(input, offset=1, axis1=0, axis2=2)
             self.assertTrue(np.allclose(result, target))
示例#23
0
 def test_complex_x(self):
     input = rand([2, 10, 10]).astype(self._dtype) + 1j * rand(
         [2, 10, 10]).astype(self._dtype)
     for place in self._places:
         with dg.guard(place):
             var_x = dg.to_variable(input)
             result = cpx.sum(var_x, dim=[1, 2]).numpy()
             target = np.sum(input, axis=(1, 2))
             self.assertTrue(np.allclose(result, target))
示例#24
0
 def test_dygraph(self):
     for place in self.places:
         paddle.disable_static(place=place)
         with dygraph.guard():
             rrelu = paddle.nn.RReLU(self.lower_0, self.upper_0)
             out_np = rrelu(paddle.to_tensor(self.x_np))
         self.assertTrue(
             check_output(self.x_np, out_np.numpy(), self.lower_0,
                          self.upper_0))
         paddle.enable_static()
示例#25
0
    def test_case(self):
        a = np.random.randn(10, 10).astype(np.float64)
        b = np.random.randn(10, 10).astype(np.float64)

        place = fluid.CPUPlace()
        with dg.guard(place):
            a_var = dg.to_variable(a)
            b_var = dg.to_variable(b)
            c_var = paddle.kron(a_var, b_var)
            np.testing.assert_allclose(c_var.numpy(), np.kron(a, b))
示例#26
0
    def test_case6(self):
        x_np = np.random.randn(2, 3, 4) + 1j * np.random.randn(2, 3, 4)
        x_np_slice = x_np[0][1:3][0:4:2]

        for place in self._places:
            with dg.guard(place):
                x_var = dg.to_variable(x_np)
                x_var_slice = x_var[0][1:3][0:4:2]

            np.testing.assert_allclose(x_var_slice.numpy(), x_np_slice)
示例#27
0
    def _test_case1_gpu(self, approximate):
        x = np.random.uniform(-1, 1, size=(11, 17)).astype(np.float32)
        y_ref = gelu(x, approximate)

        place = fluid.CUDAPlace(0)
        with dg.guard(place) as g:
            x_var = dg.to_variable(x)
            y_var = fluid.layers.gelu(x_var, approximate)
            y_test = y_var.numpy()
        self.assertTrue(np.allclose(y_ref, y_test, rtol=1e-05, atol=1e-08))
示例#28
0
    def _test_case1_cpu(self):
        x = np.random.uniform(-1, 1, size=(11, 17)).astype(np.float32)
        y_ref = 0.5 * x * (1 + erf(x / np.sqrt(2)))

        place = fluid.CPUPlace()
        with dg.guard(place) as g:
            x_var = dg.to_variable(x)
            y_var = fluid.layers.gelu(x_var)
            y_test = y_var.numpy()
        self.assertTrue(np.allclose(y_ref, y_test, rtol=1e-05, atol=1e-08))
示例#29
0
def train(config):
    use_gpu = config.use_gpu

    # Get the rank of the current training process.
    rank = dg.parallel.Env().local_rank
    nranks = dg.parallel.Env().nranks
    parallel = nranks > 1

    if rank == 0:
        # Print the whole config setting.
        pprint(vars(config))

    # Make checkpoint directory.
    run_dir = os.path.join("runs", config.model, config.name)
    checkpoint_dir = os.path.join(run_dir, "checkpoint")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    # Create tensorboard logger.
    vdl = LogWriter(os.path.join(run_dir, "logs")) \
          if rank == 0 else None

    # Configurate device
    place = fluid.CUDAPlace(rank) if use_gpu else fluid.CPUPlace()

    with dg.guard(place):
        # Fix random seed.
        seed = config.seed
        random.seed(seed)
        np.random.seed(seed)
        fluid.default_startup_program().random_seed = seed
        fluid.default_main_program().random_seed = seed
        print("Random Seed: ", seed)

        # Build model.
        model = WaveFlow(config, checkpoint_dir, parallel, rank, nranks, vdl)
        iteration = model.build()

        while iteration < config.max_iterations:
            # Run one single training step.
            model.train_step(iteration)

            iteration += 1

            if iteration % config.test_every == 0:
                # Run validation step.
                model.valid_step(iteration)

            if rank == 0 and iteration % config.save_every == 0:
                # Save parameters.
                model.save(iteration)

    # Close TensorBoard.
    if rank == 0:
        vdl.close()
示例#30
0
 def test_identity(self):
     data = np.random.random(
         (2, 3, 4, 5)).astype("float32") + 1J * np.random.random(
             (2, 3, 4, 5)).astype("float32")
     perm = [3, 2, 0, 1]
     np_trans = np.transpose(data, perm)
     for place in self._places:
         with dg.guard(place):
             var = dg.to_variable(data)
             trans = paddle.complex.transpose(var, perm=perm)
     self.assertTrue(np.allclose(trans.numpy(), np_trans))