示例#1
0
def train_loop(args, train_program, feed_vars, loss, auc_var, batch_auc_var,
               trainer_num, trainer_id):
    dataset = reader.CriteoDataset(args.sparse_feature_dim)
    train_reader = paddle.batch(paddle.reader.shuffle(
        dataset.train([args.train_data_path], trainer_num, trainer_id),
        buf_size=args.batch_size * 100),
                                batch_size=args.batch_size)
    feed_var_names = [var.name for var in feed_vars]

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    exe.run(fluid.default_startup_program())
    total_time = 0

    pass_id = 0
    batch_id = 0

    feeder = fluid.DataFeeder(feed_var_names, place)
    for data in train_reader():
        loss_val, auc_val, batch_auc_val = exe.run(
            fluid.default_main_program(),
            feed=feeder.feed(data),
            fetch_list=[loss.name, auc_var.name, batch_auc_var.name])
        break

    loss_val = np.mean(loss_val)
    auc_val = np.mean(auc_val)
    batch_auc_val = np.mean(batch_auc_val)

    logger.info(
        "TRAIN --> pass: {} batch: {} loss: {} auc: {}, batch_auc: {}".format(
            pass_id, batch_id, loss_val / args.batch_size, auc_val,
            batch_auc_val))
示例#2
0
def train_loop(args, train_program, data_list, loss, auc_var, batch_auc_var,
               trainer_num, trainer_id):
    dataset = reader.CriteoDataset(args.sparse_feature_dim)
    train_reader = paddle.batch(paddle.reader.shuffle(
        dataset.train([args.train_data_path], trainer_num, trainer_id),
        buf_size=args.batch_size * 100),
                                batch_size=args.batch_size)
    place = fluid.CPUPlace()

    feeder = fluid.DataFeeder(feed_list=data_list, place=place)
    data_name_list = [var.name for var in data_list]

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())
    for pass_id in range(args.num_passes):
        for batch_id, data in enumerate(train_reader()):
            loss_val, auc_val, batch_auc_val = exe.run(
                train_program,
                feed=feeder.feed(data),
                fetch_list=[loss, auc_var, batch_auc_var])
            logger.info(
                "TRAIN --> pass: {} batch: {} loss: {} auc: {}, batch_auc: {}".
                format(pass_id, batch_id, loss_val / args.batch_size, auc_val,
                       batch_auc_val))
            if batch_id % 1000 == 0 and batch_id != 0:
                model_dir = args.model_output_dir + '/batch-' + str(batch_id)
                if args.trainer_id == 0:
                    fluid.io.save_inference_model(model_dir, data_name_list,
                                                  [loss, auc_var], exe)
        model_dir = args.model_output_dir + '/pass-' + str(pass_id)
        if args.trainer_id == 0:
            fluid.io.save_inference_model(model_dir, data_name_list,
                                          [loss, auc_var], exe)
示例#3
0
def infer():
    args = parse_args()

    place = fluid.CPUPlace()
    inference_scope = fluid.Scope()

    dataset = reader.CriteoDataset(args.sparse_feature_dim)
    test_reader = paddle.batch(dataset.test([args.data_path]),
                               batch_size=args.batch_size)

    startup_program = fluid.framework.Program()
    test_program = fluid.framework.Program()
    with fluid.scope_guard(inference_scope):
        with fluid.framework.program_guard(test_program, startup_program):
            loss, auc_var, batch_auc_var, _, data_list, auc_states = ctr_dnn_model(
                args.embedding_size, args.sparse_feature_dim, False)

            exe = fluid.Executor(place)

            feeder = fluid.DataFeeder(feed_list=data_list, place=place)

            fluid.io.load_persistables(
                executor=exe,
                dirname=args.model_path,
                main_program=fluid.default_main_program())

            def set_zero(var_name):
                param = inference_scope.var(var_name).get_tensor()
                param_array = np.zeros(param._get_dims()).astype("int64")
                param.set(param_array, place)

            for var in auc_states:
                set_zero(var.name)

            for batch_id, data in enumerate(test_reader()):
                loss_val, auc_val = exe.run(test_program,
                                            feed=feeder.feed(data),
                                            fetch_list=[loss, auc_var])
                if batch_id % 100 == 0:
                    logger.info("TEST --> batch: {} loss: {} auc: {}".format(
                        batch_id, loss_val / args.batch_size, auc_val))
示例#4
0
def infer():
    args = parse_args()

    place = fluid.CPUPlace()
    inference_scope = fluid.core.Scope()

    dataset = reader.CriteoDataset(args.sparse_feature_dim)
    test_reader = paddle.batch(dataset.test([args.data_path]),
                               batch_size=args.batch_size)

    startup_program = fluid.framework.Program()
    test_program = fluid.framework.Program()
    with fluid.framework.program_guard(test_program, startup_program):
        loss, data_list, auc_var, batch_auc_var = ctr_dnn_model(
            args.embedding_size, args.sparse_feature_dim)

    exe = fluid.Executor(place)

    feeder = fluid.DataFeeder(feed_list=data_list, place=place)

    with fluid.scope_guard(inference_scope):
        [inference_program, _,
         fetch_targets] = fluid.io.load_inference_model(args.model_path, exe)

        def set_zero(var_name):
            param = inference_scope.var(var_name).get_tensor()
            param_array = np.zeros(param._get_dims()).astype("int64")
            param.set(param_array, place)

        auc_states_names = ['_generated_var_2', '_generated_var_3']
        for name in auc_states_names:
            set_zero(name)

        for batch_id, data in enumerate(test_reader()):
            loss_val, auc_val = exe.run(inference_program,
                                        feed=feeder.feed(data),
                                        fetch_list=fetch_targets)
            if batch_id % 100 == 0:
                logger.info("TEST --> batch: {} loss: {} auc: {}".format(
                    batch_id, loss_val / args.batch_size, auc_val))
示例#5
0
def train_loop(args, train_program, py_reader, loss, auc_var, batch_auc_var,
               trainer_num, trainer_id):

    if args.enable_ce:
        SEED = 102
        train_program.random_seed = SEED
        fluid.default_startup_program().random_seed = SEED

    dataset = reader.CriteoDataset(args.sparse_feature_dim)
    train_reader = paddle.batch(paddle.reader.shuffle(
        dataset.train([args.train_data_path], trainer_num, trainer_id),
        buf_size=args.batch_size * 100),
                                batch_size=args.batch_size)

    py_reader.decorate_paddle_reader(train_reader)
    data_name_list = []

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    exec_strategy = fluid.ExecutionStrategy()
    build_strategy = fluid.BuildStrategy()

    if os.getenv("NUM_THREADS", ""):
        exec_strategy.num_threads = int(os.getenv("NUM_THREADS"))

    cpu_num = int(os.environ.get('CPU_NUM', cpu_count()))
    build_strategy.reduce_strategy = \
        fluid.BuildStrategy.ReduceStrategy.Reduce if cpu_num > 1 \
            else fluid.BuildStrategy.ReduceStrategy.AllReduce

    exe.run(fluid.default_startup_program())
    pe = fluid.ParallelExecutor(use_cuda=False,
                                loss_name=loss.name,
                                main_program=train_program,
                                build_strategy=build_strategy,
                                exec_strategy=exec_strategy)

    total_time = 0
    for pass_id in range(args.num_passes):
        pass_start = time.time()
        batch_id = 0
        py_reader.start()

        try:
            while True:
                loss_val, auc_val, batch_auc_val = pe.run(
                    fetch_list=[loss.name, auc_var.name, batch_auc_var.name])
                loss_val = np.mean(loss_val)
                auc_val = np.mean(auc_val)
                batch_auc_val = np.mean(batch_auc_val)

                logger.info(
                    "TRAIN --> pass: {} batch: {} loss: {} auc: {}, batch_auc: {}"
                    .format(pass_id, batch_id, loss_val / args.batch_size,
                            auc_val, batch_auc_val))
                if batch_id % 1000 == 0 and batch_id != 0:
                    model_dir = args.model_output_dir + '/batch-' + str(
                        batch_id)
                    if args.trainer_id == 0:
                        fluid.io.save_persistables(
                            executor=exe,
                            dirname=model_dir,
                            main_program=fluid.default_main_program())
                batch_id += 1
        except fluid.core.EOFException:
            py_reader.reset()
        print("pass_id: %d, pass_time_cost: %f" %
              (pass_id, time.time() - pass_start))

        total_time += time.time() - pass_start

        model_dir = args.model_output_dir + '/pass-' + str(pass_id)
        if args.trainer_id == 0:
            fluid.io.save_persistables(
                executor=exe,
                dirname=model_dir,
                main_program=fluid.default_main_program())

    # only for ce
    if args.enable_ce:
        threads_num, cpu_num = get_cards(args)
        epoch_idx = args.num_passes
        print("kpis\teach_pass_duration_cpu%s_thread%s\t%s" %
              (cpu_num, threads_num, total_time / epoch_idx))
        print("kpis\ttrain_loss_cpu%s_thread%s\t%s" %
              (cpu_num, threads_num, loss_val / args.batch_size))
        print("kpis\ttrain_auc_val_cpu%s_thread%s\t%s" %
              (cpu_num, threads_num, auc_val))
        print("kpis\ttrain_batch_auc_val_cpu%s_thread%s\t%s" %
              (cpu_num, threads_num, batch_auc_val))
示例#6
0
def train_loop(args, train_program, py_reader, loss, auc_var, batch_auc_var,
               trainer_num, trainer_id):
    dataset = reader.CriteoDataset(args.sparse_feature_dim)
    train_reader = paddle.batch(paddle.reader.shuffle(
        dataset.train([args.train_data_path], trainer_num, trainer_id),
        buf_size=args.batch_size * 100),
                                batch_size=args.batch_size)

    py_reader.decorate_paddle_reader(train_reader)
    data_name_list = []

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    exec_strategy = fluid.ExecutionStrategy()
    build_strategy = fluid.BuildStrategy()

    if os.getenv("NUM_THREADS", ""):
        exec_strategy.num_threads = int(os.getenv("NUM_THREADS"))

    cpu_num = int(os.environ.get('CPU_NUM', cpu_count()))
    build_strategy.reduce_strategy = \
        fluid.BuildStrategy.ReduceStrategy.Reduce if cpu_num > 1 \
            else fluid.BuildStrategy.ReduceStrategy.AllReduce

    pe = fluid.ParallelExecutor(use_cuda=False,
                                loss_name=loss.name,
                                main_program=train_program,
                                build_strategy=build_strategy,
                                exec_strategy=exec_strategy)

    exe.run(fluid.default_startup_program())

    for pass_id in range(args.num_passes):
        pass_start = time.time()
        batch_id = 0
        py_reader.start()

        try:
            while True:
                loss_val, auc_val, batch_auc_val = pe.run(
                    fetch_list=[loss.name, auc_var.name, batch_auc_var.name])
                loss_val = np.mean(loss_val)
                auc_val = np.mean(auc_val)
                batch_auc_val = np.mean(batch_auc_val)

                logger.info(
                    "TRAIN --> pass: {} batch: {} loss: {} auc: {}, batch_auc: {}"
                    .format(pass_id, batch_id, loss_val / args.batch_size,
                            auc_val, batch_auc_val))
                if batch_id % 1000 == 0 and batch_id != 0:
                    model_dir = args.model_output_dir + '/batch-' + str(
                        batch_id)
                    if args.trainer_id == 0:
                        fluid.io.save_inference_model(model_dir,
                                                      data_name_list,
                                                      [loss, auc_var], exe)
                batch_id += 1
        except fluid.core.EOFException:
            py_reader.reset()
        print("pass_id: %d, pass_time_cost: %f" %
              (pass_id, time.time() - pass_start))

        model_dir = args.model_output_dir + '/pass-' + str(pass_id)
        if args.trainer_id == 0:
            fluid.io.save_inference_model(model_dir, data_name_list,
                                          [loss, auc_var], exe)