Exemplo n.º 1
0
def run_trainer(use_cuda, sync_mode, ip, port, trainers, trainer_id):
    x = fluid.layers.data(name='x', shape=[1], dtype='float32')
    y_predict = fluid.layers.fc(input=x, size=1, act=None)
    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
    # loss function
    cost = fluid.layers.square_error_cost(input=y_predict, label=y)
    avg_cost = fluid.layers.mean(cost)
    # optimizer
    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
    sgd_optimizer.minimize(avg_cost)
    with open("trainer_recv_program.dms", "rb") as f:
        trainer_recv_program_desc_str = f.read()
    with open("trainer_main_program.dms", "rb") as f:
        trainer_main_program_desc_str = f.read()
    with open("trainer_send_program.dms", "rb") as f:
        trainer_send_program_desc_str = f.read()
    recv_program = Program.parse_from_string(trainer_recv_program_desc_str)
    main_program = Program.parse_from_string(trainer_main_program_desc_str)
    send_program = Program.parse_from_string(trainer_send_program_desc_str)

    trainer_startup_program = fluid.default_startup_program()
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

    exe.run(trainer_startup_program)
    for i in range(5):
        exe.run(recv_program)
        exe.run(fluid.default_main_program(),
                feed={
                    "x": numpy.array([1, 2]).astype('float32').reshape(2, 1),
                    "y": numpy.array([2, 3]).astype('float32').reshape(2, 1)
                })
        exe.run(send_program)
Exemplo n.º 2
0
def run_trainer(use_cuda, sync_mode, ip, port, trainers, trainer_id):
    ''' 
    This function is run trainer.
    Args:
        use_cuda (bool): whether use cuda.
        sync_mode (nouse): specify sync mode.
        ip (string): the ip address.
        port (string): the port for listening.
        trainers (int): the count of trainer.
        trainer_id (int): the id of trainer.

    Returns:
        None
    '''
    x = fluid.layers.data(name='x', shape=[1], dtype='float32')
    y_predict = fluid.layers.fc(input=x, size=1, act=None)
    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
    # loss function
    cost = fluid.layers.square_error_cost(input=y_predict, label=y)
    avg_cost = fluid.layers.mean(cost)
    # optimizer
    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
    sgd_optimizer.minimize(avg_cost)
    with open("{}/trainer_recv_program.dms".format(cache_path), "rb") as f:
        trainer_recv_program_desc_str = f.read()
    with open("{}/trainer_main_program.dms".format(cache_path), "rb") as f:
        trainer_main_program_desc_str = f.read()
    with open("{}/trainer_send_program.dms".format(cache_path), "rb") as f:
        trainer_send_program_desc_str = f.read()
    recv_program = Program.parse_from_string(trainer_recv_program_desc_str)
    main_program = Program.parse_from_string(trainer_main_program_desc_str)
    send_program = Program.parse_from_string(trainer_send_program_desc_str)

    trainer_startup_program = fluid.default_startup_program()
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

    exe.run(trainer_startup_program)
    for i in range(5):
        exe.run(recv_program)
        exe.run(fluid.default_main_program(),
                feed={
                    "x": numpy.array([1, 2]).astype('float32').reshape(2, 1),
                    "y": numpy.array([2, 3]).astype('float32').reshape(2, 1)
                })
        exe.run(send_program)
Exemplo n.º 3
0
def run_pserver(use_cuda, sync_mode, ip, port, trainers, trainer_id):
    ''' 
    This function is run trainer.
    Args:
        use_cuda (bool): whether use cuda.
        sync_mode (nouse): specify sync mode.
        ip (string): the ip address.
        port (string): the port for listening.
        trainers (int): the count of trainer.
        trainer_id (int): the id of trainer.

    Returns:
        None
    '''
    remove_ps_flag(os.getpid())
    x = fluid.layers.data(name='x', shape=[1], dtype='float32')
    y_predict = fluid.layers.fc(input=x, size=1, act=None)
    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
    # loss function
    cost = fluid.layers.square_error_cost(input=y_predict, label=y)
    avg_cost = fluid.layers.mean(cost)
    # optimizer
    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
    sgd_optimizer.minimize(avg_cost)
    with open("{}/pserver_startup_program.dms".format(cache_path), "rb") as f:
        pserver_startup_program_desc_str = f.read()
    with open("{}/pserver_main_program.dms".format(cache_path), "rb") as f:
        pserver_main_program_desc_str = f.read()

    startup_program = Program.parse_from_string(
        pserver_startup_program_desc_str)
    main_program = Program.parse_from_string(pserver_main_program_desc_str)

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_program)
    exe.run(main_program)
Exemplo n.º 4
0
def run_pserver(use_cuda, sync_mode, ip, port, trainers, trainer_id):
    remove_ps_flag(os.getpid())
    x = fluid.layers.data(name='x', shape=[1], dtype='float32')
    y_predict = fluid.layers.fc(input=x, size=1, act=None)
    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
    # loss function
    cost = fluid.layers.square_error_cost(input=y_predict, label=y)
    avg_cost = fluid.layers.mean(cost)
    # optimizer
    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
    sgd_optimizer.minimize(avg_cost)
    with open("pserver_startup_program.dms", "rb") as f:
        pserver_startup_program_desc_str = f.read()
    with open("pserver_main_program.dms", "rb") as f:
        pserver_main_program_desc_str = f.read()

    startup_program = Program.parse_from_string(
        pserver_startup_program_desc_str)
    main_program = Program.parse_from_string(pserver_main_program_desc_str)

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_program)
    exe.run(main_program)
Exemplo n.º 5
0
def deserialize_program(data):
    """
    :api_attr: Static Graph

    Deserialize given data to a program.

    Args:
        data(bytes): serialized program.

    Returns:
        Program: deserialized program.

    Examples:
        .. code-block:: python

            import paddle

            paddle.enable_static()

            path_prefix = "./infer_model"

            # User defined network, here a softmax regession example
            image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
            label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
            predict = paddle.static.nn.fc(image, 10, activation='softmax')

            loss = paddle.nn.functional.cross_entropy(predict, label)

            exe = paddle.static.Executor(paddle.CPUPlace())
            exe.run(paddle.static.default_startup_program())

            # serialize the default main program to bytes.
            serialized_program = paddle.static.serialize_program([image], [predict])

            # deserialize bytes to program
            deserialized_program = paddle.static.deserialize_program(serialized_program)

    """
    program = Program.parse_from_string(data)
    if not core._is_program_version_supported(program._version()):
        raise ValueError("Unsupported program version: %d\n" %
                         program._version())
    return program
Exemplo n.º 6
0
def load_persistables_for_inference(dirname, executor, program,
                                    lookup_table_var_name):
    """
    WARNING: this function will only be used for inference with distributed lookup table.
    Inference with distributed lookup table is a little funky, this function will load distributed
    lookup table vars into sparse var, can be used in local inference mode.

    :param dirname(str): The directory path
    :param executor(Executor): The executor to run for loading inference model.
    :param program(Program): The parameter server program, which will run on Pserver.
    :param lookup_table_var_name: the distributed lookup tables var name.
    :return: None
    """

    def __load_lookup_table_vars(executor, dirname, main_program,
                                 lookup_table_vars):
        if not os.path.isdir(dirname):
            raise ValueError("There is no directory named '%s'", dirname)

        lookup_table_dirname = os.path.join(dirname, lookup_table_dir)

        emb_var_name = lookup_table_vars[0]
        emb_var = main_program.global_block().var(emb_var_name)

        emb_files = []
        for emb_name in os.listdir(lookup_table_dirname):
            if emb_var_name in emb_name:
                emb_files.append(emb_name)

        convert_program = Program()
        global_block = convert_program.global_block()

        emb_var = global_block.create_var(
            name=emb_var.name,
            shape=emb_var.shape,
            dtype=emb_var.dtype,
            type=core.VarDesc.VarType.SELECTED_ROWS,
            persistable=True)
        emb_var.desc.set_type(core.VarDesc.VarType.SELECTED_ROWS)

        sums = []

        for i, emb_file in enumerate(emb_files):
            var_name = "{}_{}".format(emb_var.name, i)
            param_var = global_block.create_var(
                name=var_name,
                shape=emb_var.shape,
                dtype=emb_var.dtype,
                type=core.VarDesc.VarType.SELECTED_ROWS,
                persistable=True)
            param_var.desc.set_type(core.VarDesc.VarType.SELECTED_ROWS)
            global_block.append_op(
                type='load',
                inputs={},
                outputs={'Out': [param_var]},
                attrs={
                    'file_path': os.path.join(lookup_table_dirname, var_name)
                })
            sums.append(param_var)
        global_block.append_op(
            type='sum', inputs={"X": sums}, outputs={'Out': emb_var}, attrs={})
        global_block.append_op(type='delete_var', inputs={'X': sums})
        executor.run(convert_program)

    if not os.path.isdir(dirname):
        raise ValueError("There is no directory named '%s'", dirname)

    if program:
        if not isinstance(program, Program):
            raise ValueError("program must be an instance of fluid.Program")
    else:
        local_model = os.path.join(dirname, model_filename)

        with open(local_model, "rb") as f:
            program_desc_str = f.read()

        program = Program.parse_from_string(program_desc_str)

        if not core._is_program_version_supported(program._version()):
            raise ValueError("Unsupported program version: %d\n" %
                             program._version())

    _logger.info("Start Load Sparse Program With "
                 "Distributed Lookup Table Vars from {}, time = {}".format(
                     dirname, time.ctime()))

    _load_persistable_vars(executor, dirname, program, [lookup_table_var_name])
    __load_lookup_table_vars(executor, dirname, program,
                             [lookup_table_var_name])

    _logger.info("Finish Load Sparse Program With "
                 "Distributed Lookup Table Vars from {}, time = {}".format(
                     dirname, time.ctime()))

    return program
Exemplo n.º 7
0
def load_persistables_for_inference(dirname, executor, program,
                                    lookup_table_var_name):
    """
    WARNING: this function will only be used for inference with distributed lookup table.
    Inference with distributed lookup table is a little funky, this function will load distributed
    lookup table vars into sparse var, can be used in local inference mode.

    Args:
        dirname(str): The directory path
        executor(Executor): The executor to run for loading inference model.
        program(Program): The parameter server program, which will run on Pserver.
        lookup_table_var_name: the distributed lookup tables var name.
    Returns:
        None
    """

    def _load_persistable_vars(executor, dirname, program, lookup_table_vars):
        def _is_checkpoint_var(exclude_fluid_vars=None):
            """
            the checkpoint will not save or load all the variables.
            var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded.

            : param var(Variable)
            """

            if exclude_fluid_vars is None:
                exclude_fluid_vars = []

            def is_valid(var):
                if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \
                        var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \
                        var.desc.type() == core.VarDesc.VarType.RAW:
                    return False
                # @GRAD are named for gradient variables, checkpoint will not save it.
                if "@GRAD" in var.name:
                    return False
                # .trainer_ are named for distribute train variables, checkpoint will not save it.
                if ".trainer_" in var.name:
                    return False

                # .block is named for distribute train variables, checkpoint will not save it.
                if ".block" in var.name:
                    return False

                if "tmp_" in var.name:
                    return False

                if var.name in exclude_fluid_vars:
                    return False

                return var.persistable

            return is_valid

        io.load_vars(
            executor,
            dirname=dirname,
            main_program=program,
            predicate=_is_checkpoint_var(lookup_table_vars),
            filename=None)

    def _load_lookup_table_vars(executor, dirname, main_program,
                                lookup_table_vars):
        if not os.path.isdir(dirname):
            raise ValueError("There is no directory named '%s'", dirname)

        lookup_table_dirname = os.path.join(dirname, lookup_table_dir)

        emb_var_name = lookup_table_vars[0]
        emb_var = main_program.global_block().var(emb_var_name)

        emb_files = []
        for emb_name in os.listdir(lookup_table_dirname):
            if emb_var_name in emb_name:
                emb_files.append(emb_name)

        convert_program = Program()
        global_block = convert_program.global_block()

        emb_var = global_block.create_var(
            name=emb_var.name,
            shape=emb_var.shape,
            dtype=emb_var.dtype,
            type=core.VarDesc.VarType.SELECTED_ROWS,
            persistable=True)
        emb_var.desc.set_type(core.VarDesc.VarType.SELECTED_ROWS)

        sums = []

        for i, emb_file in enumerate(emb_files):
            var_name = "{}_{}".format(emb_var.name, i)
            param_var = global_block.create_var(
                name=var_name,
                shape=emb_var.shape,
                dtype=emb_var.dtype,
                type=core.VarDesc.VarType.SELECTED_ROWS,
                persistable=True)
            param_var.desc.set_type(core.VarDesc.VarType.SELECTED_ROWS)
            global_block.append_op(
                type='load',
                inputs={},
                outputs={'Out': [param_var]},
                attrs={
                    'file_path': os.path.join(lookup_table_dirname, var_name)
                })
            sums.append(param_var)
        global_block.append_op(
            type='merge_sparse_lookup_table',
            inputs={"X": sums},
            outputs={'Out': emb_var},
            attrs={})
        global_block.append_op(
            type='save',
            inputs={"X": [emb_var]},
            outputs={},
            attrs={
                'file_path': os.path.join(lookup_table_dirname, emb_var.name)
            })
        global_block.append_op(type='delete_var', inputs={'X': sums})
        executor.run(convert_program)

    if not os.path.isdir(dirname):
        raise ValueError("There is no directory named '%s'", dirname)

    if program:
        if not isinstance(program, Program):
            raise ValueError("program must be an instance of fluid.Program")
    else:
        local_model = os.path.join(dirname, model_filename)

        with open(local_model, "rb") as f:
            program_desc_str = f.read()

        program = Program.parse_from_string(program_desc_str)

        if not core._is_program_version_supported(program._version()):
            raise ValueError("Unsupported program version: %d\n" %
                             program._version())

    _logger.info("Start Load Sparse Program With "
                 "Distributed Lookup Table Vars from {}, time = {}".format(
                     dirname, time.ctime()))

    _load_persistable_vars(executor, dirname, program, [lookup_table_var_name])
    _load_lookup_table_vars(executor, dirname, program, [lookup_table_var_name])

    _logger.info("Finish Load Sparse Program With "
                 "Distributed Lookup Table Vars from {}, time = {}".format(
                     dirname, time.ctime()))

    return program