Exemplo n.º 1
0
    def test_fleet_init(self):

        os.environ["TRAINING_ROLE"] = "PSERVER"
        os.environ["POD_IP"] = "127.0.0.1"
        os.environ["PADDLE_PORT"] = "36001"

        role = fleet.PaddleCloudRoleMaker(is_collective=False)
        fleet.init(role)
        fleet.init()
        fleet.init(is_collective=False)
        self.assertRaises(Exception, fleet.init, is_collective="F")
        self.assertRaises(Exception, fleet.init, role_maker="F")
Exemplo n.º 2
0
    def test_ps_minimize(self):
        import paddle
        import paddle.distributed.fleet as fleet

        os.environ["TRAINING_ROLE"] = "TRAINER"
        os.environ["PADDLE_TRAINER_ID"] = "1"

        input_x = paddle.fluid.layers.data(
            name="x", shape=[32], dtype='float32')
        input_slot = paddle.fluid.layers.data(
            name="slot", shape=[1], dtype='int64')
        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')

        emb = paddle.fluid.layers.embedding(
            input=input_slot, size=[10, 9], is_sparse=True)
        input_x = paddle.concat(x=[input_x, emb], axis=1)
        fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
        prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax')
        cost = paddle.fluid.layers.cross_entropy(
            input=prediction, label=input_y)
        avg_cost = paddle.fluid.layers.mean(x=cost)

        role = fleet.PaddleCloudRoleMaker(is_collective=False)
        fleet.init(role)

        strategy = paddle.distributed.fleet.DistributedStrategy()
        strategy.a_sync = False
        strategy.a_sync_configs = {"launch_barrier": False}

        optimizer = paddle.optimizer.SGD(learning_rate=0.001)
        optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
        optimizer.minimize(avg_cost)

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(paddle.static.default_startup_program())
        pe = fluid.ParallelExecutor(use_cuda=False, loss_name=avg_cost.name)
        compiled_prog = fluid.compiler.CompiledProgram(
            fluid.default_main_program())

        fleet.init_worker()
        fleet.fleet.save(dirname="/tmp", feed=['x', 'y'], fetch=[avg_cost])
        fleet.fleet.save(
            dirname="/tmp", feed=[input_x, input_y], fetch=[avg_cost])
        fleet.fleet.save(dirname="/tmp")

        fleet.load_model(path="/tmp", mode=0)
        fleet.load_model(path="/tmp", mode=1)
Exemplo n.º 3
0
def test_paddlecloudrolemaker():
    """test_paddlecloudrolemaker"""
    os.environ["PADDLE_PSERVER_NUMS"] = "1"
    os.environ["PADDLE_TRAINERS_NUM"] = "1"

    os.environ["POD_IP"] = "127.0.0.1"
    os.environ["PADDLE_PORT"] = "36001"
    os.environ["TRAINING_ROLE"] = "PSERVER"
    os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = \
        "127.0.0.1:36001,127.0.0.1:36001"
    os.environ["PADDLE_TRAINER_ID"] = "0"

    role = fleet.PaddleCloudRoleMaker()
    fleet.init(role)
    print(str(role.to_string()))
    assert str(role.to_string())[0:7] == "role: 2"
    assert str(role.to_string())[44:53] == "127.0.0.1"
    assert str(role.to_string())[102:111] == "127.0.0.1"
    print("{} ... ok".format(sys._getframe().f_code.co_name))
Exemplo n.º 4
0
import paddle
from paddle.fluid.layer_helper import LayerHelper
from paddle.distributed import fleet
from paddle.distributed.fleet.meta_optimizers.ascend import ascend_parser, ascend_optimizer
from collections import namedtuple

Block = namedtuple('Block', ['program'])
Loss = namedtuple('Loss', ['block'])

paddle.enable_static()

OpRole = core.op_proto_and_checker_maker.OpRole
OP_ROLE_KEY = core.op_proto_and_checker_maker.kOpRoleAttrName()
OP_ROLE_VAR_KEY = core.op_proto_and_checker_maker.kOpRoleVarAttrName()

role = fleet.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role)


def init_communicator(startup_program, main_program, current_endpoint,
                      endpoints, ring_id):
    nranks = len(endpoints)
    other_endpoints = endpoints[:]
    other_endpoints.remove(current_endpoint)
    group_rank = endpoints.index(current_endpoint)
    assert group_rank >= 0

    block = startup_program.global_block()
    nccl_id_var = block.create_var(name=unique_name.generate('nccl_id'),
                                   persistable=True,
                                   type=core.VarDesc.VarType.RAW)
Exemplo n.º 5
0
    def test_ps_minimize(self):
        import paddle
        import paddle.distributed.fleet as fleet

        os.environ["TRAINING_ROLE"] = "PSERVER"
        os.environ["POD_IP"] = "127.0.0.1"
        os.environ["PADDLE_PORT"] = "36001"

        input_x = paddle.fluid.layers.data(name="x",
                                           shape=[32],
                                           dtype='float32')
        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')

        fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
        fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
        prediction = paddle.fluid.layers.fc(input=[fc_2],
                                            size=2,
                                            act='softmax')
        cost = paddle.fluid.layers.cross_entropy(input=prediction,
                                                 label=input_y)
        avg_cost = paddle.fluid.layers.mean(x=cost)

        role = fleet.PaddleCloudRoleMaker(is_collective=False)
        fleet.init(role)
        strategy = paddle.distributed.fleet.DistributedStrategy()
        strategy.a_sync = False
        optimizer = paddle.optimizer.SGD(learning_rate=0.001)
        optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
        optimizer.minimize(avg_cost)

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        pe = fluid.ParallelExecutor(use_cuda=False, loss_name=avg_cost.name)
        compiled_prog = fluid.compiler.CompiledProgram(
            fluid.default_main_program())
        self.assertRaises(Exception,
                          fleet.save_inference_model,
                          dirname='/tmp/',
                          feeded_var_names=['x', 'y'],
                          target_vars=[avg_cost],
                          executor=pe)

        self.assertRaises(Exception,
                          fleet.save_inference_model,
                          dirname='/tmp/',
                          feeded_var_names=['x', 'y'],
                          target_vars=[avg_cost],
                          executor="exe")

        self.assertRaises(Exception,
                          fleet.save_inference_model,
                          dirname='/tmp/',
                          feeded_var_names=['x', 'y'],
                          target_vars=[avg_cost],
                          executor=exe,
                          main_program=compiled_prog)

        self.assertRaises(Exception,
                          fleet.save_persistables,
                          executor=pe,
                          dirname='/tmp/')

        self.assertRaises(Exception,
                          fleet.save_persistables,
                          executor="exe",
                          dirname='/tmp/')

        self.assertRaises(Exception,
                          fleet.save_persistables,
                          executor=exe,
                          dirname='/tmp/',
                          main_program=compiled_prog)
Exemplo n.º 6
0
def test_fleet_init_role():
    """test_fleet_init_role"""
    role = fleet.PaddleCloudRoleMaker()
    fleet.init(role)
    print("{} ... ok".format(sys._getframe().f_code.co_name))
Exemplo n.º 7
0
def get_role_maker():
    return fleet.PaddleCloudRoleMaker(is_collective=True)