Exemplo n.º 1
0
    def test_pserver(self):
        role = role_maker.UserDefinedRoleMaker(
            current_id=0,
            role=role_maker.Role.SERVER,
            worker_num=2,
            server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"])

        fleet.init(role)

        batch_size = 128
        is_sparse = True
        is_distribute = False

        strategy = DistributeTranspilerConfig()
        strategy.sync_mode = False
        strategy.geo_sgd_mode = True
        strategy.geo_sgd_need_push_nums = 5

        avg_cost, _, _ = train_network(batch_size, is_distribute, is_sparse)

        optimizer = fluid.optimizer.SGD(0.1)
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(avg_cost)

        pserver_startup_program = fleet.startup_program
        pserver_mian_program = fleet.main_program
Exemplo n.º 2
0
 def run_trainer(self, args):
     """
     run trainer process, you don't need to implement it.
     Args:
         args (ArgumentParser): run args to config dist fleet.
     """
     from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
     if args.role.upper() != "TRAINER":
         raise ValueError("args role must be TRAINER")
     role = role_maker.UserDefinedRoleMaker(
         current_id=args.current_id,
         role=role_maker.Role.WORKER,
         worker_num=args.trainers,
         server_endpoints=args.endpoints.split(","))
     fleet.init(role)
     self._set_strategy(args)
     avg_cost = self.net(args)
     optimizer = fluid.optimizer.SGD(LEARNING_RATE)
     optimizer = fleet.distributed_optimizer(optimizer, self.strategy)
     optimizer.minimize(avg_cost)
     if args.run_params.get("run_from_dataset", False):
         losses = self.do_training_from_dataset(fleet, args)
     else:
         losses = self.do_training(fleet, args)
     losses = "" if not losses else losses
     print(losses)
Exemplo n.º 3
0
    def run_pserver(self, args):
        if args.role.upper() != "PSERVER":
            raise ValueError("args role must be PSERVER")

        role = role_maker.UserDefinedRoleMaker(
            current_id=args.current_id,
            role=role_maker.Role.SERVER,
            worker_num=args.trainers,
            server_endpoints=args.endpoints.split(","))

        fleet.init(role)

        strategy = DistributeTranspilerConfig()
        strategy.sync_mode = args.sync_mode
        strategy.geo_sgd_mode = args.geo_sgd_mode
        strategy.geo_sgd_need_push_nums = args.geo_sgd_need_push_nums

        avg_cost = self.net()

        optimizer = fluid.optimizer.SGD(LEARNING_RATE)
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(avg_cost)

        fleet.init_server()
        fleet.run_server()
Exemplo n.º 4
0
def distribute_train(args):
    # 根据环境变量确定当前机器/进程在分布式训练中扮演的角色
    # 然后使用 fleet api的 init()方法初始化这个节点
    role = role_maker.PaddleCloudRoleMaker()
    fleet.init(role)

    # 我们还可以进一步指定分布式的运行模式,通过 DistributeTranspilerConfig进行配置
    # 如下,我们设置分布式运行模式为异步(async),同时将参数进行切分,以分配到不同的节点
    strategy = DistributeTranspilerConfig()
    strategy.sync_mode = False
    strategy.runtime_split_send_recv = True

    ctr_model = CTR()
    inputs = ctr_model.input_data(args)
    avg_cost, auc_var = ctr_model.net(inputs, args)

    # 配置分布式的optimizer,传入我们指定的strategy,构建program
    optimizer = fluid.optimizer.Adam(args.learning_rate)
    optimizer = fleet.distributed_optimizer(optimizer, strategy)
    optimizer.minimize(avg_cost)

    # 根据节点角色,分别运行不同的逻辑
    if fleet.is_server():
        # 初始化及运行参数服务器节点
        fleet.init_server()
        fleet.run_server()

    elif fleet.is_worker():
        # 初始化工作节点
        fleet.init_worker()

        exe = fluid.Executor(fluid.CPUPlace())
        # 初始化含有分布式流程的fleet.startup_program
        exe.run(fleet.startup_program)
        dataset, file_list = get_dataset(inputs, args)
        for epoch in range(args.epochs):
            # 以文件为粒度进行shuffle
            random.shuffle(file_list)
            dataset.set_filelist(file_list)

            # 训练节点运行的是经过分布式裁剪的fleet.mian_program
            start_time = time.time()
            exe.train_from_dataset(program=fleet.main_program,
                                   dataset=dataset,
                                   fetch_list=[auc_var],
                                   fetch_info=["Epoch {} auc ".format(epoch)],
                                   print_period=100,
                                   debug=False)
            end_time = time.time()
            logger.info("epoch %d finished, use time=%d\n" %
                        ((epoch), end_time - start_time))

            # 默认使用0号节点保存模型
            if args.save_model and fleet.is_first_worker():
                model_path = os.path.join(str(args.model_path),
                                          "epoch_" + str(epoch))
                fleet.save_persistables(executor=exe, dirname=model_path)

        fleet.stop_worker()
        logger.info("Distribute Train Success!")
Exemplo n.º 5
0
    def test(self):
        endpoints = [
            "127.0.0.1:36004", "127.0.0.1:36005", "127.0.0.1:36006",
            "127.0.0.1:36007"
        ]

        role = role_maker.UserDefinedRoleMaker(
            current_id=0,
            role=role_maker.Role.SERVER,
            worker_num=2,
            server_endpoints=endpoints)

        fleet.init(role)
        loss, acc, _ = self.net()

        optimizer = fluid.optimizer.Adagrad(
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=base_lr,
                decay_steps=500,
                decay_rate=0.969,
                staircase=True))

        strategy = StrategyFactory.create_async_strategy()
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(loss)
Exemplo n.º 6
0
    def run_pserver(self, role, strategy):
        fleet.init(role)
        avg_cost, x, y = self.net()
        optimizer = fluid.optimizer.SGD(0.01)
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(avg_cost)

        fleet.init_server()
        fleet.run_server()
Exemplo n.º 7
0
    def test_default_strategy(self):
        role = role_maker.UserDefinedRoleMaker(
            current_id=0,
            role=role_maker.Role.WORKER,
            worker_num=2,
            server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
        fleet.init(role)

        optimizer = fluid.optimizer.SGD(0.0001)
        optimizer = fleet.distributed_optimizer(optimizer)
Exemplo n.º 8
0
    def set_optimizer(self, FLAGS, net_output):
        """
        set optimizer
        """
        optimizer = net_output['optimizer']
        strategy = DistributeTranspilerConfig()
        strategy.sync_mode = (FLAGS.data_reader != "dataset")
        #pslib, strategy = {"use_cvm": True}
 
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        return optimizer.minimize(net_output['loss'])
Exemplo n.º 9
0
 def optimize(self, loss, optimizer_type, lr):
     log.info('learning rate:%f' % lr)
     if optimizer_type == "sgd":
         optimizer = F.optimizer.SGD(learning_rate=lr)
     elif optimizer_type == "adam":
         # Don't slice tensor ensure convergence
         optimizer = F.optimizer.Adam(learning_rate=lr, lazy_mode=True)
     else:
         raise ValueError("Unknown Optimizer %s" % optimizer_type)
     #create the DistributeTranspiler configure
     self.strategy = StrategyFactory.create_sync_strategy()
     optimizer = tfleet.distributed_optimizer(optimizer, self.strategy)
     optimizer.minimize(loss)
Exemplo n.º 10
0
    def test_dist_geo_server_transpiler(self):
        num_voc = 128
        embed_dim = 64
        x_shape, x_lod = [16, 10], [[3, 5, 2, 6]]
        x = fluid.data(name='x', shape=x_shape, dtype='int32', lod_level=1)
        hash_embd = fluid.contrib.layers.search_pyramid_hash(
            input=x,
            num_emb=embed_dim,
            space_len=num_voc * embed_dim,
            pyramid_layer=4,
            rand_len=16,
            drop_out_percent=0.5,
            is_training=True,
            use_filter=False,
            white_list_len=6400,
            black_list_len=2800,
            seed=3,
            lr=0.002,
            param_attr=fluid.ParamAttr(
                name="PyramidHash_emb_0",
                learning_rate=0,
            ),
            param_attr_wl=fluid.ParamAttr(
                name="Filter",
                learning_rate=0,
            ),
            param_attr_bl=None,
            distribute_update_vars=["PyramidHash_emb_0"],
            name=None)

        cost = fluid.layers.reduce_sum(hash_embd)

        role = role_maker.UserDefinedRoleMaker(
            current_id=0,
            role=role_maker.Role.SERVER,
            worker_num=2,
            server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"])

        fleet.init(role)

        strategy = DistributeTranspilerConfig()
        strategy.sync_mode = False
        strategy.geo_sgd_mode = True
        strategy.geo_sgd_need_push_nums = 5

        optimizer = fluid.optimizer.SGD(0.1)
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(cost)

        pserver_startup_program = fleet.startup_program
        pserver_mian_program = fleet.main_program
Exemplo n.º 11
0
    def test_half_async_strategy(self):
        role = role_maker.UserDefinedRoleMaker(
            current_id=0,
            role=role_maker.Role.WORKER,
            worker_num=2,
            server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
        fleet.init(role)

        half_async_config = DistributeTranspilerConfig()

        half_async_config.sync_mode = False
        half_async_config.geo_sgd_mode = False
        half_async_config.runtime_split_send_recv = False

        optimizer = fluid.optimizer.SGD(0.0001)
        optimizer = fleet.distributed_optimizer(optimizer, half_async_config)
Exemplo n.º 12
0
    def build_optimizer(self, avg_cost, strategy):
        use_grad_clip = int(os.getenv('GRAD_CLIP', 0))
        if use_grad_clip:
            # 1: clip_by_value; 2: clip_by_norm; 3:clip_by_global_norm
            if use_grad_clip == 1:
                fluid.clip.set_gradient_clip(
                    clip=fluid.clip.GradientClipByValue(2.0))
            elif use_grad_clip == 2:
                fluid.clip.set_gradient_clip(
                    clip=fluid.clip.GradientClipByNorm(2.0))
            elif use_grad_clip == 3:
                fluid.clip.set_gradient_clip(
                    clip=fluid.clip.GradientClipByGlobalNorm(2.0))

        optimizer = fluid.optimizer.SGD(LEARNING_RATE)
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(avg_cost)
Exemplo n.º 13
0
    def test(self):
        endpoints = [
            "127.0.0.1:36004", "127.0.0.1:36005", "127.0.0.1:36006",
            "127.0.0.1:36007"
        ]

        role = role_maker.UserDefinedRoleMaker(current_id=0,
                                               role=role_maker.Role.SERVER,
                                               worker_num=2,
                                               server_endpoints=endpoints)

        fleet.init(role)
        loss, acc, _ = self.net()
        optimizer = fluid.optimizer.SGD(base_lr)
        strategy = StrategyFactory.create_geo_strategy(20)
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(loss)
Exemplo n.º 14
0
    def test_communicator_async(self):
        role = role_maker.UserDefinedRoleMaker(
            current_id=0,
            role=role_maker.Role.WORKER,
            worker_num=2,
            server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])

        fleet.init(role)
        avg_cost = self.net()

        optimizer = fluid.optimizer.SGD(0.01)
        strategy = StrategyFactory.create_async_strategy()
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(avg_cost)

        fleet.init_worker()
        time.sleep(10)
        fleet.stop_worker()
Exemplo n.º 15
0
def optimization(base_lr, loss, optimizer='adam'):
    if optimizer == 'sgd':
        optimizer = F.optimizer.SGD(base_lr)
    elif optimizer == 'adam':
        optimizer = F.optimizer.Adam(base_lr, lazy_mode=True)
    else:
        raise ValueError

    log.info('learning rate:%f' % (base_lr))
    #create the DistributeTranspiler configure
    config = DistributeTranspilerConfig()
    config.sync_mode = False
    #config.runtime_split_send_recv = False

    config.slice_var_up = False
    #create the distributed optimizer
    optimizer = fleet.distributed_optimizer(optimizer, config)
    optimizer.minimize(loss)
Exemplo n.º 16
0
def optimization(base_lr, loss, train_steps, optimizer='sgd'):
    decayed_lr = L.learning_rate_scheduler.polynomial_decay(
        learning_rate=base_lr,
        decay_steps=train_steps,
        end_learning_rate=0.0001 * base_lr,
        power=1.0,
        cycle=False)
    if optimizer == 'sgd':
        optimizer = F.optimizer.SGD(decayed_lr)
    elif optimizer == 'adam':
        optimizer = F.optimizer.Adam(decayed_lr, lazy_mode=True)
    else:
        raise ValueError

    log.info('learning rate:%f' % (base_lr))
    #create the DistributeTranspiler configure
    strategy = StrategyFactory.create_async_strategy()
    optimizer = fleet.distributed_optimizer(optimizer, strategy)
    optimizer.minimize(loss)
Exemplo n.º 17
0
    def init(self, context):
        self.model.train_net()
        optimizer = self.model.optimizer()
        strategy = self.build_strategy()
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(self.model.get_cost_op())

        if fleet.is_server():
            context['status'] = 'server_pass'
        else:
            self.fetch_vars = []
            self.fetch_alias = []
            self.fetch_period = self.model.get_fetch_period()

            metrics = self.model.get_metrics()
            if metrics:
                self.fetch_vars = metrics.values()
                self.fetch_alias = metrics.keys()
            context['status'] = 'train_pass'
Exemplo n.º 18
0
    def run_trainer(self, role, strategy):
        place = fluid.core.CPUPlace()
        exe = fluid.Executor(place)

        fleet.init(role)
        avg_cost, x, y = self.net()
        optimizer = fluid.optimizer.SGD(0.01)
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(avg_cost)

        exe.run(fleet.startup_program)
        fleet.init_worker()

        train_reader = paddle.batch(self.fake_reader(), batch_size=24)
        feeder = fluid.DataFeeder(place=place, feed_list=[x, y])

        for batch_id, data in enumerate(train_reader()):
            exe.run(fleet.main_program, feed=feeder.feed(data), fetch_list=[])

        fleet.stop_worker()
Exemplo n.º 19
0
    def test_communicator_init_and_start(self):
        role = role_maker.UserDefinedRoleMaker(
            current_id=0,
            role=role_maker.Role.WORKER,
            worker_num=2,
            server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])

        fleet.init(role)
        avg_cost = self.net()

        optimizer = fluid.optimizer.SGD(0.01)

        strategy = DistributeTranspilerConfig()
        strategy.sync_mode = True
        strategy.wait_port = False
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(avg_cost)

        comm = Communicator(fleet.main_program)
        comm.start()
        time.sleep(10)
        comm.stop()
Exemplo n.º 20
0
 def run_trainer(self, args):
     """run trainer"""
     from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
     import paddle.fluid as fluid
     from paddle.fluid.transpiler.ps_dispatcher import RoundRobin
     from paddle.fluid.transpiler.ps_dispatcher import HashName
     fluid.default_startup_program().random_seed = 1
     fluid.default_main_program().random_seed = 1
     if args.role.upper() != "TRAINER":
         raise ValueError("args role must be TRAINER")
     role = role_maker.UserDefinedRoleMaker(
         current_id=args.current_id,
         role=role_maker.Role.WORKER,
         worker_num=args.trainers,
         server_endpoints=args.endpoints.split(","))
     fleet.init(role)
     strategy = DistributeTranspilerConfig()
     strategy.sync_mode = args.run_params["sync_mode"]
     strategy.async_mode = args.run_params["async_mode"]
     strategy.mode = "pserver"
     strategy.slice_var_up = args.run_params['slice_var_up']
     strategy.enable_dc_asgd = args.run_params['enable_dc_asgd']
     if args.run_params['split_method']:
         strategy.split_method = HashName
     strategy.split_method = RoundRobin
     strategy.wait_port = args.run_params['wait_port']
     strategy.runtime_split_send_recv = args.run_params['runtime_split_send_recv']
     strategy.use_hierarchical_allreduce = args.run_params['use_hierarchical_allreduce']
    # strategy.hierarchical_allreduce_exter_nranks = args.run_params['hierarchical_allreduce_exter_nranks']
    # strategy.hierarchical_allreduce_inter_nranks = args.run_params['hierarchical_allreduce_inter_nranks']
     strategy.geo_sgd_mode = args.run_params['geo_sgd']
     strategy.geo_sgd_need_push_nums = args.run_params['push_nums']
     avg_cost = self.net()
     optimizer = fluid.optimizer.SGD(LEARNING_RATE)
     optimizer = fleet.distributed_optimizer(optimizer, strategy)
     optimizer.minimize(avg_cost)
     losses = self.do_training(fleet, args)
     losses = "" if not losses else losses
     print(losses)
Exemplo n.º 21
0
    def test_debug_info(self):
        x = fluid.layers.data(name='x', shape=[1], dtype='float32')
        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
        y_predict = fluid.layers.fc(input=x, size=1, act=None)
        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
        avg_cost = fluid.layers.mean(cost)

        role = role_maker.UserDefinedRoleMaker(
            current_id=0,
            role=role_maker.Role.WORKER,
            worker_num=2,
            server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
        fleet.init(role)

        optimizer = fluid.optimizer.SGD(0.0001)
        strategy = StrategyFactory.create_sync_strategy()
        strategy.set_debug_opt({
            "dump_param": ["fc_0.tmp_0"],
            "dump_fields": ["fc_0.tmp_0", "fc_0.tmp_0@GRAD"],
            "dump_fields_path": "dump_text/"
        })
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
Exemplo n.º 22
0
    def test_communicator_async(self):
        role = role_maker.UserDefinedRoleMaker(
            current_id=0,
            role=role_maker.Role.WORKER,
            worker_num=2,
            server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])

        fleet.init(role)
        avg_cost = self.net()

        optimizer = fluid.optimizer.SGD(0.01)

        strategy = DistributeTranspilerConfig()
        strategy.sync_mode = False
        strategy.runtime_split_send_recv = True
        strategy.wait_port = False
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(avg_cost)

        fleet.init_worker()
        time.sleep(10)
        fleet.stop_worker()
Exemplo n.º 23
0
 def run_pserver(self, args):
     """
     run pserver process, you don't need to implement it.
     Args:
         args (ArgumentParser): run args to config dist fleet.
     """
     from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
     if args.role.upper() != "PSERVER":
         raise ValueError("args role must be PSERVER")
     role = role_maker.UserDefinedRoleMaker(
         current_id=args.current_id,
         role=role_maker.Role.SERVER,
         worker_num=args.trainers,
         server_endpoints=args.endpoints.split(","))
     fleet.init(role)
     self._set_strategy(args)
     avg_cost = self.net(args)
     optimizer = fluid.optimizer.SGD(LEARNING_RATE)
     optimizer = fleet.distributed_optimizer(optimizer, self.strategy)
     optimizer.minimize(avg_cost)
     fleet.init_server(model_dir=args.run_params.get("model_dir", ""))
     fleet.run_server()
Exemplo n.º 24
0
    def test_pserver(self):
        role = role_maker.UserDefinedRoleMaker(
            current_id=0,
            role=role_maker.Role.SERVER,
            worker_num=2,
            server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"])

        fleet.init(role)

        batch_size = 128
        is_sparse = True
        is_distribute = False

        strategy = StrategyFactory.create_geo_strategy(5)

        avg_cost, _, _, _ = train_network(batch_size, is_distribute, is_sparse)

        optimizer = fluid.optimizer.SGD(0.1)
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(avg_cost)

        pserver_startup_program = fleet.startup_program
        pserver_mian_program = fleet.main_program
Exemplo n.º 25
0
    def run_trainer(self, args):
        if args.role.upper() != "TRAINER":
            raise ValueError("args role must be TRAINER")

        role = role_maker.UserDefinedRoleMaker(
            current_id=args.current_id,
            role=role_maker.Role.WORKER,
            worker_num=args.trainers,
            server_endpoints=args.endpoints.split(","))

        fleet.init(role)

        strategy = DistributeTranspilerConfig()
        strategy.sync_mode = args.sync_mode

        avg_cost = self.net()

        optimizer = fluid.optimizer.SGD(LEARNING_RATE)
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(avg_cost)

        self.do_training(fleet)
        out = self.do_training(fleet)
Exemplo n.º 26
0
    def init(self, context):
        self.model.train_net()
        optimizer = self.model.optimizer()
        optimizer_name = envs.get_global_env("hyper_parameters.optimizer",
                                             None, "train.model")
        if optimizer_name not in ["", "sgd", "SGD", "Sgd"]:
            os.environ["FLAGS_communicator_is_sgd_optimizer"] = '0'

        strategy = self.build_strategy()
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
        optimizer.minimize(self.model.get_avg_cost())

        if fleet.is_server():
            context['status'] = 'server_pass'
        else:
            self.fetch_vars = []
            self.fetch_alias = []
            self.fetch_period = self.model.get_fetch_period()

            metrics = self.model.get_metrics()
            if metrics:
                self.fetch_vars = metrics.values()
                self.fetch_alias = metrics.keys()
            context['status'] = 'startup_pass'
Exemplo n.º 27
0
def optimization(base_lr, loss, train_steps, optimizer='sgd'):
    decayed_lr = L.learning_rate_scheduler.polynomial_decay(
        learning_rate=base_lr,
        decay_steps=train_steps,
        end_learning_rate=0.0001 * base_lr,
        power=1.0,
        cycle=False)
    if optimizer == 'sgd':
        optimizer = F.optimizer.SGD(decayed_lr)
    elif optimizer == 'adam':
        optimizer = F.optimizer.Adam(decayed_lr, lazy_mode=True)
    else:
        raise ValueError

    log.info('learning rate:%f' % (base_lr))
    #create the DistributeTranspiler configure
    config = DistributeTranspilerConfig()
    config.sync_mode = False
    #config.runtime_split_send_recv = False

    config.slice_var_up = False
    #create the distributed optimizer
    optimizer = fleet.distributed_optimizer(optimizer, config)
    optimizer.minimize(loss)
Exemplo n.º 28
0
 def set_program(self, avg_cost, strategy):
     with fluid.scope_guard(fluid.Scope()):
         optimizer = fluid.optimizer.SGD(0.1)
         optimizer = fleet.distributed_optimizer(optimizer, strategy)
         optimizer.minimize(avg_cost)
Exemplo n.º 29
0
    def test_dataset_fleet2(self):
        """
        Testcase for InMemoryDataset from create to run.
        """
        with open("test_in_memory_dataset2_run2_a.txt", "w") as f:
            data = "1 1 2 3 3 4 5 5 5 5 1 1\n"
            data += "1 2 2 3 4 4 6 6 6 6 1 2\n"
            data += "1 3 2 3 5 4 7 7 7 7 1 3\n"
            f.write(data)
        with open("test_in_memory_dataset2_run2_b.txt", "w") as f:
            data = "1 4 2 3 3 4 5 5 5 5 1 4\n"
            data += "1 5 2 3 4 4 6 6 6 6 1 5\n"
            data += "1 6 2 3 5 4 7 7 7 7 1 6\n"
            data += "1 7 2 3 6 4 8 8 8 8 1 7\n"
            f.write(data)

        train_program = fluid.Program()
        startup_program = fluid.Program()
        scope = fluid.Scope()
        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
        with fluid.program_guard(train_program, startup_program):
            slots = ["slot1_ff", "slot2_ff", "slot3_ff", "slot4_ff"]
            slots_vars = []
            for slot in slots:
                var = fluid.layers.data(\
                    name=slot, shape=[1], dtype="float32", lod_level=1)
                slots_vars.append(var)
            fake_cost = \
                fluid.layers.elementwise_sub(slots_vars[0], slots_vars[-1])
            fake_cost = fluid.layers.mean(fake_cost)
        with fluid.scope_guard(scope):
            place = fluid.CPUPlace()
            exe = fluid.Executor(place)
            try:
                fleet.init()
            except ImportError as e:
                print("warning: no mpi4py")
            adam = fluid.optimizer.Adam(learning_rate=0.000005)
            try:
                adam = fleet.distributed_optimizer(adam,
                                                   strategy={
                                                       "fs_uri":
                                                       "fs_uri_xxx",
                                                       "fs_user":
                                                       "******",
                                                       "fs_passwd":
                                                       "fs_passwd_xxx",
                                                       "fs_hadoop_bin":
                                                       "fs_hadoop_bin_xxx"
                                                   })
                adam.minimize([fake_cost], [scope])
            except AttributeError as e:
                print("warning: no mpi")
            except ImportError as e:
                print("warning: no mpi4py")
            exe.run(startup_program)
            dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
                "InMemoryDataset")
            dataset.set_batch_size(32)
            dataset.set_thread(3)
            dataset.set_filelist([
                "test_in_memory_dataset2_run2_a.txt",
                "test_in_memory_dataset2_run2_b.txt"
            ])
            dataset.set_pipe_command("cat")
            dataset.set_use_var(slots_vars)
            dataset.load_into_memory()
            try:
                dataset.global_shuffle(fleet)
            except:
                print("warning: catch expected error")
            fleet._opt_info = None
            fleet._fleet_ptr = None
            dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
                "InMemoryDataset")
            dataset.set_rank_offset("")
            dataset.set_pv_batch_size(1)
            dataset.set_hdfs_config("", "")
            d = paddle.distributed.fleet.DatasetBase()
            try:
                dataset.set_feed_type("MultiSlotInMemoryDataFeed")
            except:
                print("warning: catch expected error")
            dataset.thread_num = 0
            try:
                dataset._prepare_to_run()
            except:
                print("warning: catch expected error")
            dataset.set_parse_logkey(True)
            dataset.set_merge_by_sid(True)
            dataset.set_enable_pv_merge(True)
            try:
                dataset.preprocess_instance()
            except:
                print("warning: catch expected error")
            try:
                dataset.set_current_phase(1)
            except:
                print("warning: catch expected error")
            try:
                dataset.postprocess_instance()
            except:
                print("warning: catch expected error")
            dataset.set_fleet_send_batch_size(1024)
            try:
                dataset.global_shuffle()
            except:
                print("warning: catch expected error")
            dataset.get_pv_data_size()
            dataset.get_memory_data_size()
            dataset.get_shuffle_data_size()
            dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
                "QueueDataset")
            try:
                dataset.local_shuffle()
            except:
                print("warning: catch expected error")
            try:
                dataset.global_shuffle()
            except:
                print("warning: catch expected error")
            dataset = paddle.distributed.fleet.FileInstantDataset()
            try:
                dataset.local_shuffle()
            except:
                print("warning: catch expected error")
            try:
                dataset.global_shuffle()
            except:
                print("warning: catch expected error")

        os.remove("./test_in_memory_dataset2_run2_a.txt")
        os.remove("./test_in_memory_dataset2_run2_b.txt")
Exemplo n.º 30
0
    def test_dataset_fleet(self):
        """
        Testcase for InMemoryDataset from create to run.
        """

        self.skipTest("parameter server will add pslib UT later")

        with open("test_in_memory_dataset2_run_a.txt", "w") as f:
            data = "1 1 2 3 3 4 5 5 5 5 1 1\n"
            data += "1 2 2 3 4 4 6 6 6 6 1 2\n"
            data += "1 3 2 3 5 4 7 7 7 7 1 3\n"
            f.write(data)
        with open("test_in_memory_dataset2_run_b.txt", "w") as f:
            data = "1 4 2 3 3 4 5 5 5 5 1 4\n"
            data += "1 5 2 3 4 4 6 6 6 6 1 5\n"
            data += "1 6 2 3 5 4 7 7 7 7 1 6\n"
            data += "1 7 2 3 6 4 8 8 8 8 1 7\n"
            f.write(data)

        train_program = fluid.Program()
        startup_program = fluid.Program()
        scope = fluid.Scope()
        from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
        with fluid.program_guard(train_program, startup_program):
            slots = ["slot1_ff", "slot2_ff", "slot3_ff", "slot4_ff"]
            slots_vars = []
            for slot in slots:
                var = fluid.layers.data(\
                    name=slot, shape=[1], dtype="float32", lod_level=1)
                slots_vars.append(var)
            fake_cost = \
                fluid.layers.elementwise_sub(slots_vars[0], slots_vars[-1])
            fake_cost = fluid.layers.mean(fake_cost)
        with fluid.scope_guard(scope):
            place = fluid.CPUPlace()
            exe = fluid.Executor(place)
            try:
                fleet.init()
            except ImportError as e:
                print("warning: no mpi4py")
            adam = fluid.optimizer.Adam(learning_rate=0.000005)
            try:
                adam = fleet.distributed_optimizer(adam)
                adam.minimize([fake_cost], [scope])
            except AttributeError as e:
                print("warning: no mpi")
            except ImportError as e:
                print("warning: no mpi4py")
            exe.run(startup_program)
            dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
                "InMemoryDataset")
            dataset.set_batch_size(32)
            dataset.set_thread(3)
            dataset.set_filelist([
                "test_in_memory_dataset2_run_a.txt",
                "test_in_memory_dataset2_run_b.txt"
            ])
            dataset.set_pipe_command("cat")
            dataset.set_use_var(slots_vars)
            dataset.load_into_memory()
            fleet._opt_info = None
            fleet._fleet_ptr = None

        os.remove("./test_in_memory_dataset2_run_a.txt")
        os.remove("./test_in_memory_dataset2_run_b.txt")