def main(data_args, use_cuda, num_passes, lr):
    i_shape = [3, 224, 224]
    image = fluid.layers.data(name="image", shape=i_shape, dtype='float32')
    Net = fvnet.Net()
    out = Net.inference(input=image)

    label = fluid.layers.data(name="label", shape=[2], dtype='float32')

    sec = fluid.layers.square_error_cost(input=out, label=label)

    avg_cost = fluid.layers.mean(x=sec)

    optimizer = fluid.optimizer.Adam(learning_rate=lr)  #Adam
    optimizer.minimize(avg_cost)

    fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    layer_load = [
        "conv1_1", "conv1_2", "conv2_1", "conv2_2", "conv3_1", "conv3_2",
        "conv3_3", "conv4_1", "conv4_2", "conv4_3"
    ]
    Net.load_weights(pretrained_npy, exe, place, layer_load)

    #    if pretrained_model:
    #        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)
    model_path = "./pre_weight/"
    fluid.io.save_inference_model(model_path, ['image'], [out], exe)
Exemplo n.º 2
0
def infer(args):
    # parameters from arguments
    class_dim = args.class_dim
    model_name = args.model
    save_inference = args.save_inference
    pretrained_model = args.pretrained_model
    with_memory_optimization = args.with_mem_opt
    image_shape = [int(m) for m in args.image_shape.split(",")]
    model_list = [m for m in dir(models) if "__" not in m]
    assert model_name in model_list, "{} is not in lists: {}".format(
        args.model, model_list)

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')

    # model definition
    model = models.__dict__[model_name]()
    if model_name == "GoogleNet":
        out, _, _ = model.net(input=image, class_dim=class_dim)
    else:
        out = model.net(input=image, class_dim=class_dim)
        out = fluid.layers.softmax(out)

    test_program = fluid.default_main_program().clone(for_test=True)

    fetch_list = [out.name]
    if with_memory_optimization and not save_inference:
        fluid.memory_optimize(fluid.default_main_program(),
                              skip_opt_set=set(fetch_list))

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    fluid.io.load_persistables(exe, pretrained_model)
    if save_inference:
        fluid.io.save_inference_model(dirname=model_name,
                                      feeded_var_names=['image'],
                                      main_program=test_program,
                                      target_vars=out,
                                      executor=exe,
                                      model_filename='model',
                                      params_filename='params')
        print("model: ", model_name, " is already saved")
        exit(0)
    test_batch_size = 1

    test_reader = reader.test(settings=args, batch_size=test_batch_size)
    feeder = fluid.DataFeeder(place=place, feed_list=[image])

    TOPK = 1
    for batch_id, data in enumerate(test_reader()):
        result = exe.run(test_program,
                         fetch_list=fetch_list,
                         feed=feeder.feed(data))
        result = result[0][0]
        pred_label = np.argsort(result)[::-1][:TOPK]
        print("Test-{0}-score: {1}, class {2}".format(batch_id,
                                                      result[pred_label],
                                                      pred_label))
        sys.stdout.flush()
    def check_decay_with_place(self, place, python_decay_fn, fluid_decay_fn,
                               kwargs):
        main_prog = fluid.Program()
        startup_prog = fluid.Program()

        with fluid.program_guard(main_prog, startup_prog):
            decayed_lr = fluid_decay_fn(**kwargs)

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)

        exe.run(startup_prog)

        fluid.memory_optimize(main_prog)

        for step in range(10):
            lr_val, = exe.run(main_prog, feed={}, fetch_list=[decayed_lr])
            python_decayed_lr = python_decay_fn(global_step=float(step),
                                                **kwargs)
            self.assertAlmostEqual(
                python_decayed_lr,
                lr_val[0],
                msg='Failed fn is {0}, Python result is {1}, Fluid result is {2}'
                .format(python_decay_fn.__name__, str(python_decayed_lr),
                        str(lr_val[0])))
Exemplo n.º 4
0
def infer():
    # parameters from arguments
    use_gpu = False
    class_dim = 5
    model_name = "ResNet50"
    pretrained_model = "./output_indoor/ResNet50/61"
    with_memory_optimization = True
    image_shape = [3, 224, 224]

    #    assert model_name in model_list, "{} is not in lists: {}".format(args.model,
    #                                                                     model_list)

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')

    # model definition
    model = mo.__dict__[model_name]()

    if model_name is "GoogleNet":
        out, _, _ = model.net(input=image, class_dim=class_dim)
    else:
        out = model.net(input=image, class_dim=class_dim)

    test_program = fluid.default_main_program().clone(for_test=True)

    if with_memory_optimization:
        fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)

    test_batch_size = 1
    test_reader = paddle.batch(reader.test(), batch_size=test_batch_size)
    feeder = fluid.DataFeeder(place=place, feed_list=[image])

    fetch_list = [out.name]

    TOPK = 1
    for batch_id, data in enumerate(test_reader()):
        result = exe.run(test_program,
                         fetch_list=fetch_list,
                         feed=feeder.feed(data))
        result = result[0][0]

        pred_label = np.argsort(result)[::-1][:TOPK]

        #print("Test-{0}-score: {1}, class {2}"
        #      .format(batch_id, result[pred_label], pred_label))
        result = pred_label
        sys.stdout.flush()
        return result
def main():
    args = parse_args()
    print_arguments(args)
    print_paddle_envs()
    if args.no_random:
        fluid.default_startup_program().random_seed = 1

    # the unique trainer id, starting from 0, needed by trainer
    # only
    nccl_id_var, num_trainers, trainer_id = (None, 1,
                                             int(
                                                 os.getenv(
                                                     "PADDLE_TRAINER_ID",
                                                     "0")))

    if args.use_cprof:
        pr = cProfile.Profile()
        pr.enable()
    model_def = __import__("%s" % args.model, fromlist=["models"])
    train_args = list(model_def.get_model(args))
    train_args.append(args)
    # Run optimizer.minimize(avg_loss)
    train_args[2].minimize(train_args[0])
    if args.memory_optimize:
        fluid.memory_optimize(fluid.default_main_program())

    if args.update_method == "pserver":
        train_prog, startup_prog = dist_transpile(trainer_id, args)
        if not train_prog:
            raise Exception(
                "Must configure correct environments to run dist train.")
        train_args.extend([train_prog, startup_prog])
        if args.gpus > 1 and os.getenv("TRAINING_ROLE") == "TRAINER":
            train_args.extend([nccl_id_var, num_trainers, trainer_id])
            train_parallel(*train_args)
            exit(0)
        train(*train_args)
        exit(0)

    # for other update methods, use default programs
    train_args.append(fluid.default_main_program())
    train_args.append(fluid.default_startup_program())

    if args.update_method == "nccl2":
        nccl_id_var, num_trainers, trainer_id = append_nccl2_prepare(
            trainer_id)
    if args.gpus == 1:
        # NOTE: parallel executor use profiler interanlly
        if args.use_nvprof and args.device == 'GPU':
            with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
                train(*train_args)
        else:
            train(*train_args)
    else:
        if args.device == "CPU":
            raise Exception("Only support GPU perf with parallel exe")
        train_args.extend([nccl_id_var, num_trainers, trainer_id])
        train_parallel(*train_args)
def main(data_args,use_cuda,num_passes,lr,json_path1,json_path2):
    i_shape=[3,224, 224]
    image = fluid.layers.data(name="image", shape=i_shape, dtype='float32')
    Net = fvnet.vgg_fluid()
    out = Net.net(input=image)

    label = fluid.layers.data(name="label", shape=[28*28], dtype='float32')#28*28 2

    sec = fluid.layers.square_error_cost(input=out, label=label)

    avg_cost = fluid.layers.mean(x=sec)

    optimizer = fluid.optimizer.Adam(learning_rate=lr)#Adam
    optimizer.minimize(avg_cost)

    fluid.memory_optimize(fluid.default_main_program())


    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if pretrained_model:
        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)
    panda_cl = provider.Pandas_fm(json_path1,json_path2)
    train_reader = paddle.batch(provider.read_train_for(data_args,panda_cl), batch_size=1)

    Loss = []
    ttime = 0

    for pass_id in range(num_passes):
        Loss_A = []
        for batch_id , data in enumerate(train_reader()):
            start = datetime.datetime.now()

            i_datas = np.array(data[0][0])
            i_labels = np.array(data[0][1])

            loss  = exe.run(fluid.default_main_program(),
                                  feed={"image":i_datas,"label":i_labels},fetch_list = [avg_cost])

            loss = np.mean(np.array(loss))

            Loss_A.append(loss)
            end = datetime.datetime.now()
            ttime += (end-start).total_seconds()
            if batch_id % 100 == 0:
                print("Pass {0}, trainbatch {1}, loss {2}, time {3}".format(pass_id, \
                       batch_id, loss,ttime))
                sys.stdout.flush()
                ttime = 0
        Loss.append(np.mean(Loss_A))
        np.save('./models/loss/'+str(pass_id)+'_loss.npy',np.array(Loss))
        model_path = os.path.join("./models/",str(pass_id))
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        fluid.io.save_inference_model(model_path, ['image'], [out], exe)
    np.save('./models/loss.npy',np.array(Loss))
Exemplo n.º 7
0
def train(config):
    """ model training """
    config.vocab_size = len(open(config.vocab_path).readlines())
    bow_loss, kl_loss, nll_loss, final_loss = knowledge_seq2seq(config)

    bow_loss.persistable = True
    kl_loss.persistable = True
    nll_loss.persistable = True
    final_loss.persistable = True

    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)

    fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm(
        clip_norm=config.grad_clip))
    optimizer = fluid.optimizer.Adam(learning_rate=config.lr)

    if config.stage == 0:
        print("stage 0")
        optimizer.minimize(bow_loss)
    else:
        print("stage 1")
        optimizer.minimize(final_loss)

    fluid.memory_optimize(main_program)
    opt_var_name_list = optimizer.get_opti_var_name_list()

    if config.use_gpu:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()

    exe = Executor(place)
    exe.run(framework.default_startup_program())

    param_list = main_program.block(0).all_parameters()
    param_name_list = [p.name for p in param_list]

    init_model(config, param_name_list, place)

    processors = KnowledgeCorpus(data_dir=config.data_dir,
                                 data_prefix=config.data_prefix,
                                 vocab_path=config.vocab_path,
                                 min_len=config.min_len,
                                 max_len=config.max_len)
    train_generator = processors.data_generator(batch_size=config.batch_size,
                                                phase="train",
                                                shuffle=True)
    valid_generator = processors.data_generator(batch_size=config.batch_size,
                                                phase="dev",
                                                shuffle=False)

    model_handle = [exe, place, bow_loss, kl_loss, nll_loss, final_loss]

    train_loop(config, train_generator, valid_generator, main_program,
               inference_program, model_handle, param_name_list,
               opt_var_name_list)
Exemplo n.º 8
0
def eval(args):
    # parameters from arguments
    model_name = args.model
    pretrained_model = args.pretrained_model
    with_memory_optimization = args.with_mem_opt
    image_shape = [int(m) for m in args.image_shape.split(",")]

    assert model_name in model_list, "{} is not in lists: {}".format(args.model,
                                                                     model_list)

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    # model definition
    model = models.__dict__[model_name]()
    out = model.net(input=image, embedding_size=args.embedding_size)

    test_program = fluid.default_main_program().clone(for_test=True)

    if with_memory_optimization:
        fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)

    test_reader = paddle.batch(reader.test(args), batch_size=args.batch_size, drop_last=False)
    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

    fetch_list = [out.name]

    f, l = [], []
    for batch_id, data in enumerate(test_reader()):
        t1 = time.time()
        [feas] = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data))
        label = np.asarray([x[1] for x in data])
        f.append(feas)
        l.append(label)

        t2 = time.time()
        period = t2 - t1
        if batch_id % 20 == 0:
            print("[%s] testbatch %d, time %2.2f sec" % \
                    (fmt_time(), batch_id, period))

    f = np.vstack(f)
    l = np.hstack(l)
    recall = recall_topk(f, l, k=1)
    print("[%s] End test %d, test_recall %.5f" % (fmt_time(), len(f), recall))
    sys.stdout.flush()
Exemplo n.º 9
0
def infer(args):
    # parameters from arguments
    seg_num = args.seg_num
    class_dim = args.class_dim
    num_layers = args.num_layers
    test_model = args.test_model

    if test_model == None:
        print('Please specify the test model ...')
        return

    image_shape = [int(m) for m in args.image_shape.split(",")]
    image_shape = [seg_num] + image_shape

    # model definition
    model = TSN_ResNet(layers=num_layers, seg_num=seg_num)
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')

    out = model.net(input=image, class_dim=class_dim)

    # for test
    inference_program = fluid.default_main_program().clone(for_test=True)

    if args.with_mem_opt:
        fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    def is_parameter(var):
        if isinstance(var, Parameter):
            return isinstance(var, Parameter)

    if test_model is not None:
        vars = filter(is_parameter, inference_program.list_vars())
        fluid.io.load_vars(exe, test_model, vars=vars)

    # reader
    test_reader = paddle.batch(reader.infer(seg_num), batch_size=1)
    feeder = fluid.DataFeeder(place=place, feed_list=[image])

    fetch_list = [out.name]

    # test
    TOPK = 1
    for batch_id, data in enumerate(test_reader()):
        data, vid = data[0]
        data = [[data]]
        result = exe.run(inference_program,
                         fetch_list=fetch_list,
                         feed=feeder.feed(data))
        result = result[0][0]
        pred_label = np.argsort(result)[::-1][:TOPK]
        print("Test sample: {0}, score: {1}, class {2}".format(
            vid, result[pred_label], pred_label))
        sys.stdout.flush()
Exemplo n.º 10
0
def infer(model):

    predict = create_model(model=model)
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    exe.run(fluid.default_startup_program())
    fluid.memory_optimize(fluid.default_main_program())
    load_model(exe,fluid.default_main_program(),model=model)
    print("load model succeed")


    while True:
        ret,frame = cap.read()
        cv2.imshow('frame',frame)#一个窗口用以显示原视频
        gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
        # 探测图片中的人脸
        faces = face_cascade.detectMultiScale(
            gray,
            scaleFactor = 1.5,
            minNeighbors = 5,
            minSize = (5,5)
        )
        for (x, y, w, h) in faces:
            w = w*1.1
            h = h*1.3
            w = int(w)
            h = int(h)
            frame = cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
            xmin = x
            xmax = x + w
            ymin = y
            ymax = y + h
            slip_image = frame[ymin:ymax,xmin:xmax]
            slip_image_224   = cv2.resize(slip_image, (224,224), interpolation=cv2.INTER_CUBIC)
            slip_image_224   = slip_image_224.transpose((2,0,1))
            imgs = []
            imgs.append(slip_image_224)
            imgs = np.array(imgs)
            imgs   = imgs.astype(np.float32)
            imgs   /= 255.0
            result = exe.run(fluid.default_main_program(),
                            feed={'img': imgs},
                            fetch_list=[predict])
            
            points = result[0]
            points = points.reshape(-1,2)
            draw_landmark_point(slip_image,points)
            cv2.imshow("image!",slip_image)

        cv2.imshow('origin image',frame)#一个窗口用以显示原视频
        if cv2.waitKey(1) &0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
Exemplo n.º 11
0
def add_optimizer(args, avg_cost):
    #optimizer = fluid.optimizer.SGD(learning_rate=0.002)
    optimizer = fluid.optimizer.Momentum(
        learning_rate=fluid.layers.piecewise_decay(
            boundaries=[100], values=[0.1, 0.2]),
        momentum=0.9,
        regularization=fluid.regularizer.L2Decay(1e-4))
    optimizer.minimize(avg_cost)

    if args.use_mem_opt:
        fluid.memory_optimize(fluid.default_main_program())
Exemplo n.º 12
0
def main():
    rnn_out = encoder_decoder()
    label = layers.data(
        name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
    cost = layers.cross_entropy(input=rnn_out, label=label)
    avg_cost = fluid.layers.mean(cost)

    optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
    optimizer.minimize(avg_cost)

    fluid.memory_optimize(fluid.default_main_program())
    # fluid.release_memory(fluid.default_main_program())

    # fix the order of training data
    train_data = paddle.batch(
        paddle.dataset.wmt14.train(dict_size), batch_size=batch_size)

    # train_data = paddle.batch(
    #     paddle.reader.shuffle(
    #         paddle.dataset.wmt14.train(dict_size), buf_size=1000),
    #     batch_size=batch_size)

    place = core.CPUPlace()
    exe = Executor(place)

    exe.run(framework.default_startup_program())

    feed_order = [
        'src_word_id', 'target_language_word', 'target_language_next_word'
    ]

    feed_list = [
        fluid.default_main_program().global_block().var(var_name)
        for var_name in feed_order
    ]
    feeder = fluid.DataFeeder(feed_list, place)

    batch_id = 0
    for pass_id in range(10):
        for data in train_data():
            outs = exe.run(fluid.default_main_program(),
                           feed=feeder.feed(data),
                           fetch_list=[avg_cost])
            avg_cost_val = np.array(outs[0])
            print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
                  " avg_cost=" + str(avg_cost_val))
            if batch_id > 2:
                exit(0)
            if math.isnan(float(avg_cost_val)):
                sys.exit("got NaN loss, training failed.")
            batch_id += 1
Exemplo n.º 13
0
def main():
    args = parse_args()
    print_arguments(args)

    # the unique trainer id, starting from 0, needed by trainer
    # only
    nccl_id_var, num_trainers, trainer_id = (
        None, 1, int(os.getenv("PADDLE_TRAINER_ID", "-1")))

    if args.use_cprof:
        pr = cProfile.Profile()
        pr.enable()
    model_def = __import__("models.%s" % args.model, fromlist=["models"])
    train_args = list(model_def.get_model(args))
    train_args.append(args)
    # Run optimizer.minimize(avg_loss)
    train_args[2].minimize(train_args[0])
    if args.memory_optimize:
        fluid.memory_optimize(fluid.default_main_program())

    if args.update_method == "pserver":
        train_prog, startup_prog = dist_transpile(trainer_id)
        if not train_prog:
            raise Exception(
                "Must configure correct environments to run dist train.")
        train_args.extend([train_prog, startup_prog])
        if args.gpus > 1 and os.getenv("PADDLE_TRAINING_ROLE") == "TRAINER":
            train_args.extend([nccl_id_var, num_trainers, trainer_id])
            train_parallel(*train_args)
        train(*train_args)
        exit(0)

    # for other update methods, use default programs
    train_args.append(fluid.default_main_program())
    train_args.append(fluid.default_startup_program())

    if args.update_method == "nccl2":
        nccl_id_var, num_trainers, trainer_id = append_nccl2_prepare(trainer_id)
    if args.gpus == 1:
        # NOTE: parallel executor use profiler interanlly
        if args.use_nvprof and args.device == 'GPU':
            with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
                train(*train_args)
        else:
            train(*train_args)
    else:
        if args.device == "CPU":
            raise Exception("Only support GPU perf with parallel exe")
        train_args.extend([nccl_id_var, num_trainers, trainer_id])
        train_parallel(*train_args)
Exemplo n.º 14
0
def get_model(args, is_train, main_prog, startup_prog):
    # NOTE: mnist is small, we don't implement data sharding yet.
    opt = None
    data_file_handle = None
    with fluid.program_guard(main_prog, startup_prog):
        if args.use_reader_op:
            filelist = [
                os.path.join(args.data_path, f)
                for f in os.listdir(args.data_path)
            ]
            data_file_handle = fluid.layers.open_files(
                filenames=filelist,
                shapes=[[-1, 1, 28, 28], (-1, 1)],
                lod_levels=[0, 0],
                dtypes=["float32", "int64"],
                thread_num=1,
                pass_num=1)
            data_file = fluid.layers.double_buffer(
                fluid.layers.batch(
                    data_file_handle, batch_size=args.batch_size))
        with fluid.unique_name.guard():
            if args.use_reader_op:
                input, label = fluid.layers.read_file(data_file)
            else:
                images = fluid.layers.data(
                    name='pixel', shape=[1, 28, 28], dtype='float32')
                label = fluid.layers.data(
                    name='label', shape=[1], dtype='int64')

            predict = cnn_model(images)
            cost = fluid.layers.cross_entropy(input=predict, label=label)
            avg_cost = fluid.layers.mean(x=cost)
            # Evaluator
            batch_acc = fluid.layers.accuracy(input=predict, label=label)
            # Optimization
            if is_train:
                opt = fluid.optimizer.AdamOptimizer(
                    learning_rate=0.001, beta1=0.9, beta2=0.999)
                opt.minimize(avg_cost)
                if args.memory_optimize:
                    fluid.memory_optimize(main_prog)

    # Reader
    if is_train:
        reader = paddle.dataset.mnist.train()
    else:
        reader = paddle.dataset.mnist.test()
    batched_reader = paddle.batch(
        reader, batch_size=args.batch_size * args.gpus)
    return avg_cost, opt, [batch_acc], batched_reader, data_file_handle
Exemplo n.º 15
0
def infer(args):
    # parameters from arguments
    model_name = args.model
    pretrained_model = args.pretrained_model
    with_memory_optimization = args.with_mem_opt
    image_shape = [int(m) for m in args.image_shape.split(",")]

    assert model_name in model_list, "{} is not in lists: {}".format(
        args.model, model_list)

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')

    # model definition
    model = models.__dict__[model_name]()
    out = model.net(input=image, embedding_size=args.embedding_size)

    test_program = fluid.default_main_program().clone(for_test=True)

    if with_memory_optimization:
        fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)

    infer_reader = paddle.batch(reader.infer(args),
                                batch_size=args.batch_size,
                                drop_last=False)
    feeder = fluid.DataFeeder(place=place, feed_list=[image])

    fetch_list = [out.name]

    for batch_id, data in enumerate(infer_reader()):
        result = exe.run(test_program,
                         fetch_list=fetch_list,
                         feed=feeder.feed(data))
        result = result[0][0].reshape(-1)
        print("Test-{0}-feature: {1}".format(batch_id, result[:5]))
        sys.stdout.flush()
Exemplo n.º 16
0
    def construct_resnet(self, depth, learning_rate, momentum):
        input = fluid.layers.data(
            name='data', shape=self.data_shape, dtype='float32')
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        predict = self.model_dict[self.data_set](input, self.class_dim, depth)
        cost = fluid.layers.cross_entropy(input=predict, label=label)
        self.avg_cost = fluid.layers.mean(x=cost)

        self.accuracy = fluid.layers.accuracy(input=predict, label=label)
        # inference program
        self.inference_program = fluid.default_main_program().clone(
            for_test=True)

        optimizer = fluid.optimizer.Momentum(
            learning_rate=learning_rate, momentum=momentum)
        opts = optimizer.minimize(self.avg_cost)
        fluid.memory_optimize(fluid.default_main_program())
        self.train_program = fluid.default_main_program().clone()
Exemplo n.º 17
0
def test_main(use_cuda, use_py_func_op, use_parallel_executor):
    if use_cuda and not fluid.core.is_compiled_with_cuda():
        return None

    with fluid.program_guard(fluid.Program(), fluid.Program()):
        with fluid.scope_guard(fluid.core.Scope()):
            fluid.default_main_program().random_seed = 1
            fluid.default_startup_program().random_seed = 1
            np.random.seed(1)

            img = fluid.layers.data(name='image', shape=[784], dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            loss = simple_fc_net(img, label, use_py_func_op)
            optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
            optimizer.minimize(loss)

            place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
            feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
            r = paddle.batch(reader, batch_size=10)

            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())

            #FIXME force use old memory optimzie strategy here to pass the unittest
            #since open the new strategy will crash the unittest
            fluid.memory_optimize(fluid.default_main_program())

            train_cp = compiler.CompiledProgram(fluid.default_main_program())
            if use_parallel_executor:
                train_cp = train_cp.with_data_parallel(loss_name=loss.name)
                fetch_list = [loss.name]
            else:
                fetch_list = [loss]

            ret = []
            for epoch_id in six.moves.range(2):
                for d in r():
                    L, = exe.run(train_cp,
                                 feed=feeder.feed(d),
                                 fetch_list=fetch_list)
                    ret.append(L)
            return np.array(ret)
Exemplo n.º 18
0
    def construct_vgg16_net(self, learning_rate):
        images = fluid.layers.data(name='pixel',
                                   shape=self.data_shape,
                                   dtype='float32')
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        net = self.vgg16_bn_drop(images)
        predict = fluid.layers.fc(input=net,
                                  size=self.class_dim,
                                  act='softmax')
        cost = fluid.layers.cross_entropy(input=predict, label=label)
        self.avg_cost = fluid.layers.mean(x=cost)
        self.accuracy = fluid.layers.accuracy(input=predict, label=label)

        self.inference_program = fluid.default_main_program().clone(
            for_test=True)

        optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
        opts = optimizer.minimize(self.avg_cost)
        fluid.memory_optimize(fluid.default_main_program())

        self.train_program = fluid.default_main_program().clone()
Exemplo n.º 19
0
    # lr = fluid.layers.polynomial_decay(base_lr, total_step, end_learning_rate=0, power=0.9)
    # area = fluid.layers.elementwise_max(
    #     fluid.layers.reduce_mean(mask),
    #     fluid.layers.assign(np.array(
    #         [0.1], dtype=np.float32)))
    # loss_mean = fluid.layers.reduce_mean(loss) / area
    # opt = fluid.optimizer.Adam(learning_rate=2e-4)
    # opt = fluid.optimizer.Momentum(
    #     lr,
    #     momentum=0.9,
    #     regularization=fluid.regularizer.L2DecayRegularizer(
    #         regularization_coeff=weight_decay), )
    # retv = opt.minimize(loss_mean, startup_program=sp, no_grad_set=no_grad_set)
# tp = tp.clone(True)

fluid.memory_optimize(tp, print_log=False, skip_opt_set=[pred.name], level=1)

place = fluid.CPUPlace()
if use_gpu:
    place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(sp)

load_model(recover_path)

if parallel_flag:
    exe_p = fluid.ParallelExecutor(use_cuda=True,
                                   loss_name=loss_mean.name,
                                   main_program=tp)

early_stopcount = 0
Exemplo n.º 20
0
def train(args):
    """Train
    """
    if not os.path.exists(args.save_path):
        os.makedirs(args.save_path)

    net = Network(args.vocab_size, args.emb_size, args.hidden_size)

    train_program = fluid.Program()
    train_startup = fluid.Program()
    if "CE_MODE_X" in os.environ:
        train_program.random_seed = 110
        train_startup.random_seed = 110
    with fluid.program_guard(train_program, train_startup):
        with fluid.unique_name.guard():
            logits, loss = net.network(args.loss_type)
            loss.persistable = True
            logits.persistable = True
            # gradient clipping
            fluid.clip.set_gradient_clip(
                clip=fluid.clip.GradientClipByValue(max=1.0, min=-1.0))

            optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
            optimizer.minimize(loss)
            print("begin memory optimization ...")
            fluid.memory_optimize(train_program)
            print("end memory optimization ...")

    test_program = fluid.Program()
    test_startup = fluid.Program()
    if "CE_MODE_X" in os.environ:
        test_program.random_seed = 110
        test_startup.random_seed = 110
    with fluid.program_guard(test_program, test_startup):
        with fluid.unique_name.guard():
            logits, loss = net.network(args.loss_type)
            loss.persistable = True
            logits.persistable = True

    test_program = test_program.clone(for_test=True)
    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    print("device count %d" % dev_count)
    print("theoretical memory usage: ")
    print(
        fluid.contrib.memory_usage(program=train_program,
                                   batch_size=args.batch_size))

    exe = fluid.Executor(place)
    exe.run(train_startup)
    exe.run(test_startup)

    train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                       loss_name=loss.name,
                                       main_program=train_program)

    test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                      main_program=test_program,
                                      share_vars_from=train_exe)

    if args.word_emb_init is not None:
        print("start loading word embedding init ...")
        if six.PY2:
            word_emb = np.array(pickle.load(open(args.word_emb_init,
                                                 'rb'))).astype('float32')
        else:
            word_emb = np.array(
                pickle.load(open(args.word_emb_init, 'rb'),
                            encoding="bytes")).astype('float32')
        net.set_word_embedding(word_emb, place)
        print("finish init word embedding  ...")

    print("start loading data ...")

    def train_with_feed(batch_data):
        """
        Train on one batch
        """
        #to do get_feed_names
        feed_dict = dict(zip(net.get_feed_names(), batch_data))

        cost = train_exe.run(feed=feed_dict, fetch_list=[loss.name])
        return cost[0]

    def test_with_feed(batch_data):
        """
        Test on one batch
        """
        feed_dict = dict(zip(net.get_feed_names(), batch_data))

        score = test_exe.run(feed=feed_dict, fetch_list=[logits.name])
        return score[0]

    def evaluate():
        """
        Evaluate to choose model
        """
        val_batches = reader.batch_reader(args.val_path, args.batch_size,
                                          place, args.max_len, 1)
        scores = []
        labels = []
        for batch in val_batches:
            scores.extend(test_with_feed(batch))
            labels.extend([x[0] for x in batch[2]])

        return eva.evaluate_Recall(zip(scores, labels))

    def save_exe(step, best_recall):
        """
        Save exe conditional
        """
        recall_dict = evaluate()
        print('evaluation recall result:')
        print('1_in_2: %s\t1_in_10: %s\t2_in_10: %s\t5_in_10: %s' %
              (recall_dict['1_in_2'], recall_dict['1_in_10'],
               recall_dict['2_in_10'], recall_dict['5_in_10']))

        if recall_dict['1_in_10'] > best_recall and step != 0:
            fluid.io.save_inference_model(args.save_path,
                                          net.get_feed_inference_names(),
                                          logits,
                                          exe,
                                          main_program=train_program)

            print("Save model at step %d ... " % step)
            print(
                time.strftime('%Y-%m-%d %H:%M:%S',
                              time.localtime(time.time())))
            best_recall = recall_dict['1_in_10']
        return best_recall

    # train over different epoches
    global_step, train_time = 0, 0.0
    best_recall = 0
    for epoch in six.moves.xrange(args.num_scan_data):
        train_batches = reader.batch_reader(args.train_path, args.batch_size,
                                            place, args.max_len,
                                            args.sample_pro)

        begin_time = time.time()
        sum_cost = 0
        ce_cost = 0
        for batch in train_batches:
            if (args.save_path is not None) and (global_step % args.save_step
                                                 == 0):
                best_recall = save_exe(global_step, best_recall)

            cost = train_with_feed(batch)
            global_step += 1
            sum_cost += cost.mean()
            ce_cost = cost.mean()

            if global_step % args.print_step == 0:
                print('training step %s avg loss %s' %
                      (global_step, sum_cost / args.print_step))
                sum_cost = 0

        pass_time_cost = time.time() - begin_time
        train_time += pass_time_cost
        print("Pass {0}, pass_time_cost {1}".format(
            epoch, "%2.2f sec" % pass_time_cost))
        if "CE_MODE_X" in os.environ and epoch == args.num_scan_data - 1:
            card_num = get_cards()
            print("kpis\ttrain_duration_card%s\t%s" %
                  (card_num, pass_time_cost))
            print("kpis\ttrain_loss_card%s\t%s" % (card_num, ce_cost))
Exemplo n.º 21
0
def finetune(args):
    """
    Finetune
    """
    if not os.path.exists(args.save_path):
        os.makedirs(args.save_path)

    net = Network(args.vocab_size, args.emb_size, args.hidden_size)

    train_program = fluid.Program()
    train_startup = fluid.Program()
    if "CE_MODE_X" in os.environ:
        train_program.random_seed = 110
        train_startup.random_seed = 110
    with fluid.program_guard(train_program, train_startup):
        with fluid.unique_name.guard():
            logits, loss = net.network(args.loss_type)
            loss.persistable = True
            logits.persistable = True
            # gradient clipping
            fluid.clip.set_gradient_clip(
                clip=fluid.clip.GradientClipByValue(max=1.0, min=-1.0))

            optimizer = fluid.optimizer.Adam(
                learning_rate=fluid.layers.exponential_decay(
                    learning_rate=args.learning_rate,
                    decay_steps=400,
                    decay_rate=0.9,
                    staircase=True))
            optimizer.minimize(loss)
            print("begin memory optimization ...")
            fluid.memory_optimize(train_program)
            print("end memory optimization ...")

    test_program = fluid.Program()
    test_startup = fluid.Program()
    if "CE_MODE_X" in os.environ:
        test_program.random_seed = 110
        test_startup.random_seed = 110
    with fluid.program_guard(test_program, test_startup):
        with fluid.unique_name.guard():
            logits, loss = net.network(args.loss_type)
            loss.persistable = True
            logits.persistable = True

    test_program = test_program.clone(for_test=True)
    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    print("device count %d" % dev_count)
    print("theoretical memory usage: ")
    print(
        fluid.contrib.memory_usage(program=train_program,
                                   batch_size=args.batch_size))

    exe = fluid.Executor(place)
    exe.run(train_startup)
    exe.run(test_startup)

    train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                       loss_name=loss.name,
                                       main_program=train_program)

    test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                      main_program=test_program,
                                      share_vars_from=train_exe)

    if args.init_model:
        init.init_pretraining_params(exe,
                                     args.init_model,
                                     main_program=train_startup)
        print('sccuess init %s' % args.init_model)

    print("start loading data ...")

    def train_with_feed(batch_data):
        """
        Train on one batch
        """
        #to do get_feed_names
        feed_dict = dict(zip(net.get_feed_names(), batch_data))

        cost = train_exe.run(feed=feed_dict, fetch_list=[loss.name])
        return cost[0]

    def test_with_feed(batch_data):
        """
        Test on one batch
        """
        feed_dict = dict(zip(net.get_feed_names(), batch_data))

        score = test_exe.run(feed=feed_dict, fetch_list=[logits.name])
        return score[0]

    def evaluate():
        """
        Evaluate to choose model
        """
        val_batches = reader.batch_reader(args.val_path, args.batch_size,
                                          place, args.max_len, 1)
        scores = []
        labels = []
        for batch in val_batches:
            scores.extend(test_with_feed(batch))
            labels.extend([x[0] for x in batch[2]])
        scores = [x[0] for x in scores]
        return eva.evaluate_cor(scores, labels)

    def save_exe(step, best_cor):
        """
        Save exe conditional
        """
        cor = evaluate()
        print('evaluation cor relevance %s' % cor)
        if cor > best_cor and step != 0:
            fluid.io.save_inference_model(args.save_path,
                                          net.get_feed_inference_names(),
                                          logits,
                                          exe,
                                          main_program=train_program)
            print("Save model at step %d ... " % step)
            print(
                time.strftime('%Y-%m-%d %H:%M:%S',
                              time.localtime(time.time())))
            best_cor = cor
        return best_cor

    # train over different epoches
    global_step, train_time = 0, 0.0
    best_cor = 0.0
    pre_index = -1
    for epoch in six.moves.xrange(args.num_scan_data):
        train_batches = reader.batch_reader(args.train_path, args.batch_size,
                                            place, args.max_len,
                                            args.sample_pro)

        begin_time = time.time()
        sum_cost = 0
        for batch in train_batches:
            if (args.save_path is not None) and (global_step % args.save_step
                                                 == 0):
                best_cor = save_exe(global_step, best_cor)

            cost = train_with_feed(batch)
            global_step += 1
            sum_cost += cost.mean()

            if global_step % args.print_step == 0:
                print('training step %s avg loss %s' %
                      (global_step, sum_cost / args.print_step))
                sum_cost = 0

        pass_time_cost = time.time() - begin_time
        train_time += pass_time_cost
        print("Pass {0}, pass_time_cost {1}".format(
            epoch, "%2.2f sec" % pass_time_cost))
Exemplo n.º 22
0
def main():
    args = parse_args()
    lstm_size = args.hidden_dim

    data = fluid.layers.data(name="words",
                             shape=[1],
                             lod_level=1,
                             dtype='int64')
    sentence = fluid.layers.embedding(input=data,
                                      size=[len(word_dict), args.emb_dim])

    sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh')

    rnn = fluid.layers.DynamicRNN()
    with rnn.block():
        word = rnn.step_input(sentence)
        prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
        prev_cell = rnn.memory(value=0.0, shape=[lstm_size])

        def gate_common(
            ipt,
            hidden,
            size,
        ):
            gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
            gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
            gate = fluid.layers.sums(input=[gate0, gate1])
            return gate

        forget_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        input_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        output_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        cell_gate = fluid.layers.tanh(
            x=gate_common(word, prev_hidden, lstm_size))

        cell = fluid.layers.sums(input=[
            fluid.layers.elementwise_mul(x=forget_gate, y=prev_cell),
            fluid.layers.elementwise_mul(x=input_gate, y=cell_gate)
        ])

        hidden = fluid.layers.elementwise_mul(x=output_gate,
                                              y=fluid.layers.tanh(x=cell))

        rnn.update_memory(prev_cell, cell)
        rnn.update_memory(prev_hidden, hidden)
        rnn.output(hidden)

    last = fluid.layers.sequence_pool(rnn(), 'last')
    logit = fluid.layers.fc(input=last, size=2, act='softmax')
    loss = fluid.layers.cross_entropy(input=logit,
                                      label=fluid.layers.data(name='label',
                                                              shape=[1],
                                                              dtype='int64'))
    loss = fluid.layers.mean(x=loss)

    # add acc
    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
    batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \
                shape=[1], dtype='int64'), total=batch_size_tensor)

    adam = fluid.optimizer.Adam()
    adam.minimize(loss)

    fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    train_reader = batch(paddle.reader.shuffle(crop_sentence(
        imdb.train(word_dict), args.crop_size),
                                               buf_size=25000),
                         batch_size=args.batch_size)

    train_acc_kpi = None
    for kpi in tracking_kpis:
        if kpi.name == 'imdb_%s_train_acc' % (args.batch_size):
            train_acc_kpi = kpi
    train_speed_kpi = None
    for kpi in tracking_kpis:
        if kpi.name == 'imdb_%s_train_speed' % (args.batch_size):
            train_speed_kpi = kpi

    iters, num_samples, start_time = 0, 0, time.time()
    for pass_id in range(args.pass_num):
        train_accs = []
        train_losses = []
        for batch_id, data in enumerate(train_reader()):
            if iters == args.skip_batch_num:
                start_time = time.time()
                num_samples = 0
            if iters == args.iterations:
                break
            tensor_words = to_lodtensor([x[0] for x in data], place)
            label = numpy.array([x[1] for x in data]).astype("int64")
            label = label.reshape((-1, 1))
            loss_np, acc, weight = exe.run(
                fluid.default_main_program(),
                feed={
                    "words": tensor_words,
                    "label": label
                },
                fetch_list=[loss, batch_acc, batch_size_tensor])
            iters += 1
            for x in data:
                num_samples += len(x[0])
            print(
                "Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
                (pass_id, iters, loss_np, acc)
            )  # The accuracy is the accumulation of batches, but not the current batch.

        train_elapsed = time.time() - start_time
        examples_per_sec = num_samples / train_elapsed
        print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
              (num_samples, train_elapsed, examples_per_sec))
        train_speed_kpi.add_record(np.array(examples_per_sec, dtype='float32'))
        break
    train_speed_kpi.persist()
Exemplo n.º 23
0
def run_benchmark(model, args):

    dshape, class_dim = get_data_shape(args)

    input = fluid.layers.data(name='data', shape=dshape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    predict = model(input, class_dim)
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(x=cost)

    fluid.default_main_program().seed = 1
    fluid.default_startup_program().seed = 1

    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
    batch_acc = fluid.layers.accuracy(input=predict,
                                      label=label,
                                      total=batch_size_tensor)

    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
        inference_program = fluid.io.get_inference_program(
            target_vars=[batch_acc, batch_size_tensor])

    optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
    opts = optimizer.minimize(avg_cost)

    fluid.memory_optimize(fluid.default_main_program())

    # Init ParallelExecutor
    train_exe, test_exe = get_parallel_executor(args, avg_cost,
                                                fluid.default_main_program(),
                                                inference_program)

    # Prepare reader
    train_reader, test_reader = init_reader(args)

    def test(test_exe):
        test_accuracy = fluid.average.WeightedAverage()
        for batch_id, data in enumerate(test_reader()):
            if batch_id == args.iterations:
                break
            img_data = np.array(map(lambda x: x[0].reshape(dshape),
                                    data)).astype("float32")
            y_data = np.array(map(lambda x: x[1],
                                  data)).astype("int64").reshape([-1, 1])

            acc, weight = test_exe.run(
                fetch_list=[batch_acc.name, batch_size_tensor.name],
                feed={
                    "data": img_data,
                    "label": y_data
                })
            acc = float((acc * weight).sum() / weight.sum())
            weight = int(weight.sum())
            test_accuracy.add(value=acc, weight=weight)

        return test_accuracy.eval()

    im_num, total_train_time, total_iters = 0, 0.0, 0
    accuracy = fluid.average.WeightedAverage()
    fetch_list = [avg_cost.name, batch_acc.name, batch_size_tensor.name]

    for pass_id in range(args.pass_num):
        every_pass_loss = []
        accuracy.reset()
        iter, pass_duration = 0, 0.0
        for batch_id, data in enumerate(train_reader()):
            batch_start = time.time()
            if iter == args.iterations:
                break

            image = np.array(map(lambda x: x[0].reshape(dshape),
                                 data)).astype('float32')
            label = np.array(map(lambda x: x[1],
                                 data)).astype('int64').reshape([-1, 1])

            loss, acc, weight = train_exe.run(fetch_list=fetch_list,
                                              feed={
                                                  'data': image,
                                                  'label': label
                                              })

            acc = float((acc * weight).sum() / weight.sum())
            loss = (loss * weight).sum() / weight.sum()
            weight = int(weight.sum())
            accuracy.add(value=acc, weight=weight)

            if iter >= args.skip_batch_num or pass_id != 0:
                batch_duration = time.time() - batch_start
                pass_duration += batch_duration
                im_num += label.shape[0]

            every_pass_loss.append(loss)
            # print("Pass: %d, Iter: %d, loss: %s, acc: %s" %
            #      (pass_id, iter, str(loss), str(acc)))
            iter += 1
            total_iters += 1

        total_train_time += pass_duration
        pass_train_acc = accuracy.eval()
        pass_test_acc = test(test_exe)
        print(
            "Pass:%d, Loss:%f, Train Accuray:%f, Test Accuray:%f, Handle Images Duration: %f\n"
            % (pass_id, np.mean(every_pass_loss), pass_train_acc,
               pass_test_acc, pass_duration))

    record_kpi(pass_id, iter, pass_train_acc, total_train_time, im_num)

    examples_per_sec = im_num / total_train_time
    sec_per_batch = total_train_time / \
            (iter * args.pass_num - args.skip_batch_num)

    print('\nTotal examples: %d, total time: %.5f' %
          (im_num, total_train_time))
    print('%.5f examples/sec, %.5f sec/batch \n' %
          (examples_per_sec, sec_per_batch))
Exemplo n.º 24
0
    def check_network_convergence(self,
                                  method,
                                  memory_opt=True,
                                  iter=50,
                                  batch_size=None,
                                  allow_op_delay=False,
                                  feed_dict=None,
                                  seed=None,
                                  use_parallel_executor=True,
                                  balance_parameter_opt_between_cards=False):
        def run_executor(exe, feed, fetch_list, program=None):
            if isinstance(exe, fluid.ParallelExecutor):
                res = exe.run(fetch_list=fetch_list, feed=feed)
            elif isinstance(exe, fluid.Executor):
                if program is None:
                    program = fluid.default_main_program()
                res = exe.run(program=program, feed=feed, fetch_list=fetch_list)
            else:
                raise ValueError('Unkown type exe')
            return res

        main = fluid.Program()
        startup = fluid.Program()
        startup.random_seed = 1  # Fix random seed
        with fluid.program_guard(main, startup):
            if seed is not None:
                startup.random_seed = seed
            loss = method(use_feed=feed_dict is not None)
            adam = fluid.optimizer.Adam()
            adam.minimize(loss)
            if memory_opt:
                fluid.memory_optimize(main)
            place = fluid.CUDAPlace(0)
            startup_exe = fluid.Executor(place)
            startup_exe.run(startup)
            exec_strategy = fluid.ExecutionStrategy()
            exec_strategy.allow_op_delay = allow_op_delay

            build_strategy = fluid.BuildStrategy()
            build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce if balance_parameter_opt_between_cards else fluid.BuildStrategy.ReduceStrategy.AllReduce

            if use_parallel_executor:
                exe = fluid.ParallelExecutor(
                    True,
                    loss_name=loss.name,
                    exec_strategy=exec_strategy,
                    build_strategy=build_strategy)
            else:
                exe = fluid.Executor(place=place)

            if batch_size is not None:
                batch_size *= fluid.core.get_cuda_device_count()
            begin = time.time()
            first_loss, = run_executor(
                exe=exe, feed=feed_dict, fetch_list=[loss.name])
            first_loss = np.array(first_loss)

            for i in xrange(iter):
                run_executor(exe=exe, feed=feed_dict, fetch_list=[])

            last_loss, = run_executor(
                exe=exe, feed=feed_dict, fetch_list=[loss.name])
            end = time.time()

            if batch_size is not None:
                print "%.4f Instance per second" % (
                    (batch_size * iter + 2) / (end - begin))

            last_loss = np.array(last_loss)

            print first_loss, last_loss
            # self.assertGreater(first_loss[0], last_loss[0])
            return first_loss, last_loss
Exemplo n.º 25
0
def train(args):
    # parse config
    config = parse_config(args.config)
    train_config = merge_configs(config, 'train', vars(args))
    valid_config = merge_configs(config, 'valid', vars(args))
    train_model = models.get_model(args.model_name, train_config, mode='train')
    valid_model = models.get_model(args.model_name, valid_config, mode='valid')

    # build model
    startup = fluid.Program()
    train_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup):
        with fluid.unique_name.guard():
            train_model.build_input(not args.no_use_pyreader)
            train_model.build_model()
            # for the input, has the form [data1, data2,..., label], so train_feeds[-1] is label
            train_feeds = train_model.feeds()
            train_feeds[-1].persistable = True
            # for the output of classification model, has the form [pred]
            train_outputs = train_model.outputs()
            for output in train_outputs:
                output.persistable = True
            train_loss = train_model.loss()
            train_loss.persistable = True
            # outputs, loss, label should be fetched, so set persistable to be true
            optimizer = train_model.optimizer()
            optimizer.minimize(train_loss)
            train_pyreader = train_model.pyreader()

    if not args.no_memory_optimize:
        fluid.memory_optimize(train_prog)

    valid_prog = fluid.Program()
    with fluid.program_guard(valid_prog, startup):
        with fluid.unique_name.guard():
            valid_model.build_input(not args.no_use_pyreader)
            valid_model.build_model()
            valid_feeds = valid_model.feeds()
            valid_outputs = valid_model.outputs()
            valid_loss = valid_model.loss()
            valid_pyreader = valid_model.pyreader()

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup)

    if args.resume:
        # if resume weights is given, load resume weights directly
        assert os.path.exists(args.resume), \
                "Given resume weight dir {} not exist.".format(args.resume)
        def if_exist(var):
            return os.path.exists(os.path.join(args.resume, var.name))
        fluid.io.load_vars(exe, args.resume, predicate=if_exist, main_program=train_prog)
    else:
        # if not in resume mode, load pretrain weights
        if args.pretrain:
            assert os.path.exists(args.pretrain), \
                    "Given pretrain weight dir {} not exist.".format(args.pretrain)
        pretrain = args.pretrain or train_model.get_pretrain_weights()
        if pretrain:
            train_model.load_pretrain_params(exe, pretrain, train_prog, place)

    train_exe = fluid.ParallelExecutor(
        use_cuda=args.use_gpu,
        loss_name=train_loss.name,
        main_program=train_prog)
    valid_exe = fluid.ParallelExecutor(
        use_cuda=args.use_gpu,
        share_vars_from=train_exe,
        main_program=valid_prog)

    # get reader
    bs_denominator = 1
    if (not args.no_use_pyreader) and args.use_gpu:
        bs_denominator = train_config.TRAIN.num_gpus
    train_config.TRAIN.batch_size = int(train_config.TRAIN.batch_size /
                                        bs_denominator)
    valid_config.VALID.batch_size = int(valid_config.VALID.batch_size /
                                        bs_denominator)
    train_reader = get_reader(args.model_name.upper(), 'train', train_config)
    valid_reader = get_reader(args.model_name.upper(), 'valid', valid_config)

    # get metrics 
    train_metrics = get_metrics(args.model_name.upper(), 'train', train_config)
    valid_metrics = get_metrics(args.model_name.upper(), 'valid', valid_config)

    train_fetch_list = [train_loss.name] + [x.name for x in train_outputs
                                            ] + [train_feeds[-1].name]
    valid_fetch_list = [valid_loss.name] + [x.name for x in valid_outputs
                                            ] + [valid_feeds[-1].name]

    epochs = args.epoch_num or train_model.epoch_num()

    if args.no_use_pyreader:
        train_feeder = fluid.DataFeeder(place=place, feed_list=train_feeds)
        valid_feeder = fluid.DataFeeder(place=place, feed_list=valid_feeds)
        train_without_pyreader(exe, train_prog, train_exe, train_reader, train_feeder,
                               train_fetch_list, train_metrics, epochs = epochs,
                               log_interval = args.log_interval, valid_interval = args.valid_interval,
                               save_dir = args.save_dir, save_model_name = args.model_name,
                               test_exe = valid_exe, test_reader = valid_reader, test_feeder = valid_feeder,
                               test_fetch_list = valid_fetch_list, test_metrics = valid_metrics)
    else:
        train_pyreader.decorate_paddle_reader(train_reader)
        valid_pyreader.decorate_paddle_reader(valid_reader)
        train_with_pyreader(exe, train_prog, train_exe, train_pyreader, train_fetch_list, train_metrics,
                            epochs = epochs, log_interval = args.log_interval,
                            valid_interval = args.valid_interval,
                            save_dir = args.save_dir, save_model_name = args.model_name,
                            test_exe = valid_exe, test_pyreader = valid_pyreader,
                            test_fetch_list = valid_fetch_list, test_metrics = valid_metrics)
Exemplo n.º 26
0
def train(conf_dict):
    """
    train process
    """
    # Get data layer
    data = layers.DataLayer()
    # Load network structure dynamically
    net = utils.import_class(
        "nets", conf_dict["net"]["module_name"], conf_dict["net"]["class_name"])(conf_dict)
    # Load loss function dynamically
    loss = utils.import_class(
        "losses", conf_dict["loss"]["module_name"], conf_dict["loss"]["class_name"])(conf_dict)
    # Load Optimization method
    optimizer = utils.import_class(
        "optimizers", "paddle_optimizers", conf_dict["optimizer"]["class_name"])(conf_dict)

    # Get service
    place = fluid.core.CPUPlace()
    if conf_dict["task_mode"] == "pairwise":
        # Build network
        left = data.ops(name="left", shape=[1], dtype="int64", lod_level=1)
        pos_right = data.ops(name="right", shape=[
                             1], dtype="int64", lod_level=1)
        neg_right = data.ops(name="neg_right", shape=[
                             1], dtype="int64", lod_level=1)
        left_feat, pos_score = net.predict(left, pos_right)
        _, neg_score = net.predict(left, neg_right)
        avg_cost = loss.compute(pos_score, neg_score)
        # Get Feeder and Reader
        feeder = fluid.DataFeeder(place=place, feed_list=[
                                  left.name, pos_right.name, neg_right.name])
        reader = data_reader.get_reader(conf_dict, False, None)
    else:
        # Build network
        left = data.ops(name="left", shape=[1], dtype="int64", lod_level=1)
        right = data.ops(name="right", shape=[1], dtype="int64", lod_level=1)
        label = data.ops(name="label", shape=[1], dtype="int64", lod_level=0)
        left_feat, pred = net.predict(left, right)
        avg_cost = loss.compute(pred, label)
        # Get Feeder and Reader
        feeder = fluid.DataFeeder(place=place, feed_list=[
                                  left.name, right.name, label.name])
        reader = data_reader.get_reader(conf_dict, False, None)
    # Save Infer model
    infer_program = fluid.default_main_program().clone()
    # operate Optimization
    optimizer.ops(avg_cost)
    # optimize memory 
    fluid.memory_optimize(fluid.default_main_program())
    executor = fluid.Executor(place)
    executor.run(fluid.default_startup_program())
    # Get and run executor
    parallel_executor = fluid.ParallelExecutor(
        use_cuda=False, loss_name=avg_cost.name,
        main_program=fluid.default_main_program())
    # Get device number
    device_count = parallel_executor.device_count
    logging.info("device count: %d" % device_count)
    # run train
    logging.info("start train process ...")
    for epoch_id in range(conf_dict["epoch_num"]):
        losses = []
        # Get batch data iterator
        batch_data = paddle.batch(reader, conf_dict["batch_size"], drop_last=False)
        start_time = time.time()
        for iter, data in enumerate(batch_data()):
            if len(data) < device_count:
                continue
            avg_loss = parallel_executor.run(
                [avg_cost.name], feed=feeder.feed(data))
            print("epoch: %d, iter: %d, loss: %f" %
                (epoch_id, iter, np.mean(avg_loss[0])))
            losses.append(np.mean(avg_loss[0]))
        end_time = time.time()
        print("epoch: %d, loss: %f, used time: %d sec" %
              (epoch_id, np.mean(losses), end_time - start_time))
        model_save_dir = conf_dict["model_path"]
        model_path = os.path.join(model_save_dir, str(epoch_id))
        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)
        if conf_dict["task_mode"] == "pairwise":
            feed_var_names = [left.name, pos_right.name]
            target_vars = [left_feat, pos_score]
        else:
            feed_var_names = [left.name, right.name]
            target_vars = [left_feat, pred]
        fluid.io.save_inference_model(
            model_path, feed_var_names, target_vars, executor, infer_program)
Exemplo n.º 27
0
def train():
    avg_cost, feeding_list = seq_to_seq_net(args.embedding_dim,
                                            args.encoder_size,
                                            args.decoder_size,
                                            args.dict_size,
                                            args.dict_size,
                                            False,
                                            beam_size=args.beam_size,
                                            max_length=args.max_length)

    # clone from default main program
    inference_program = fluid.default_main_program().clone()

    optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
    optimizer.minimize(avg_cost)

    fluid.memory_optimize(fluid.default_main_program())

    train_batch_generator = paddle.batch(paddle.reader.shuffle(
        paddle.dataset.wmt14.train(args.dict_size), buf_size=1000),
                                         batch_size=args.batch_size)

    test_batch_generator = paddle.batch(paddle.reader.shuffle(
        paddle.dataset.wmt14.test(args.dict_size), buf_size=1000),
                                        batch_size=args.batch_size)

    place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    def do_validation():
        total_loss = 0.0
        count = 0
        for batch_id, data in enumerate(test_batch_generator()):
            src_seq = to_lodtensor(map(lambda x: x[0], data), place)[0]
            trg_seq = to_lodtensor(map(lambda x: x[1], data), place)[0]
            lbl_seq = to_lodtensor(map(lambda x: x[2], data), place)[0]

            fetch_outs = exe.run(inference_program,
                                 feed={
                                     feeding_list[0]: src_seq,
                                     feeding_list[1]: trg_seq,
                                     feeding_list[2]: lbl_seq
                                 },
                                 fetch_list=[avg_cost],
                                 return_numpy=False)

            total_loss += lodtensor_to_ndarray(fetch_outs[0])[0]
            count += 1

        return total_loss / count

    iters, num_samples, start_time = 0, 0, time.time()
    for pass_id in xrange(args.pass_num):
        train_accs = []
        train_losses = []
        for batch_id, data in enumerate(train_batch_generator()):
            if iters == args.skip_batch_num:
                start_time = time.time()
                num_samples = 0
            if iters == args.iterations:
                break
            src_seq, word_num = to_lodtensor(map(lambda x: x[0], data), place)
            num_samples += word_num
            trg_seq, word_num = to_lodtensor(map(lambda x: x[1], data), place)
            num_samples += word_num
            lbl_seq, _ = to_lodtensor(map(lambda x: x[2], data), place)

            fetch_outs = exe.run(framework.default_main_program(),
                                 feed={
                                     feeding_list[0]: src_seq,
                                     feeding_list[1]: trg_seq,
                                     feeding_list[2]: lbl_seq
                                 },
                                 fetch_list=[avg_cost])

            iters += 1
            loss = np.array(fetch_outs[0])
            print(
                "Pass = %d, Iter = %d, Loss = %f" % (pass_id, iters, loss)
            )  # The accuracy is the accumulation of batches, but not the current batch.

        train_elapsed = time.time() - start_time
        examples_per_sec = num_samples / train_elapsed
        print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
              (num_samples, train_elapsed, examples_per_sec))
        # evaluation
        if args.with_test:
            test_loss = do_validation()
        exit(0)
Exemplo n.º 28
0
def train(args):
    print("pretraining start")
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    train_program = fluid.Program()
    startup_prog = fluid.Program()
    with fluid.program_guard(train_program, startup_prog):
        with fluid.unique_name.guard():
            train_pyreader, next_sent_acc, mask_lm_loss, total_loss = create_model(
                pyreader_name='train_reader', ernie_config=ernie_config)
            scheduled_lr = optimization(loss=total_loss,
                                        warmup_steps=args.warmup_steps,
                                        num_train_steps=args.num_train_steps,
                                        learning_rate=args.learning_rate,
                                        train_program=train_program,
                                        startup_prog=startup_prog,
                                        weight_decay=args.weight_decay,
                                        scheduler=args.lr_scheduler,
                                        use_fp16=args.use_fp16,
                                        loss_scaling=args.loss_scaling)

            fluid.memory_optimize(input_program=train_program,
                                  skip_opt_set=[
                                      next_sent_acc.name, mask_lm_loss.name,
                                      total_loss.name
                                  ])

    test_prog = fluid.Program()
    with fluid.program_guard(test_prog, startup_prog):
        with fluid.unique_name.guard():
            test_pyreader, next_sent_acc, mask_lm_loss, total_loss = create_model(
                pyreader_name='test_reader', ernie_config=ernie_config)

    test_prog = test_prog.clone(for_test=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    print("Device count %d" % dev_count)
    print("theoretical memory usage: ")
    if args.in_tokens:
        print(
            fluid.contrib.memory_usage(program=train_program,
                                       batch_size=args.batch_size //
                                       args.max_seq_len))
    else:
        print(
            fluid.contrib.memory_usage(program=train_program,
                                       batch_size=args.batch_size))

    nccl2_num_trainers = 1
    nccl2_trainer_id = 0
    print("args.is_distributed:", args.is_distributed)
    if args.is_distributed:
        worker_endpoints_env = os.getenv("worker_endpoints")
        worker_endpoints = worker_endpoints_env.split(",")
        trainers_num = len(worker_endpoints)
        current_endpoint = os.getenv("current_endpoint")
        trainer_id = worker_endpoints.index(current_endpoint)
        if trainer_id == 0:
            print("train_id == 0, sleep 60s")
            time.sleep(60)
        print("worker_endpoints:{} trainers_num:{} current_endpoint:{} \
              trainer_id:{}".format(worker_endpoints, trainers_num,
                                    current_endpoint, trainer_id))

        # prepare nccl2 env.
        config = fluid.DistributeTranspilerConfig()
        config.mode = "nccl2"
        t = fluid.DistributeTranspiler(config=config)
        t.transpile(trainer_id,
                    trainers=worker_endpoints_env,
                    current_endpoint=current_endpoint,
                    program=train_program,
                    startup_program=startup_prog)
        nccl2_num_trainers = trainers_num
        nccl2_trainer_id = trainer_id

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    if args.init_checkpoint and args.init_checkpoint != "":
        init_checkpoint(exe, args.init_checkpoint, train_program,
                        args.use_fp16)

    data_reader = ErnieDataReader(filelist=args.train_filelist,
                                  batch_size=args.batch_size,
                                  vocab_path=args.vocab_path,
                                  voc_size=ernie_config['vocab_size'],
                                  epoch=args.epoch,
                                  max_seq_len=args.max_seq_len,
                                  generate_neg_sample=args.generate_neg_sample,
                                  in_tokens=args.in_tokens,
                                  is_bidirection=args.is_bidirection)

    exec_strategy = fluid.ExecutionStrategy()
    if args.use_fast_executor:
        exec_strategy.use_experimental_executor = True
    exec_strategy.num_threads = dev_count
    exec_strategy.num_iteration_per_drop_scope = min(10, args.skip_steps)

    build_strategy = fluid.BuildStrategy()
    build_strategy.remove_unnecessary_lock = False

    train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                       loss_name=total_loss.name,
                                       build_strategy=build_strategy,
                                       exec_strategy=exec_strategy,
                                       main_program=train_program,
                                       num_trainers=nccl2_num_trainers,
                                       trainer_id=nccl2_trainer_id)

    if args.valid_filelist and args.valid_filelist != "":
        predict = predict_wrapper(args,
                                  exe,
                                  ernie_config,
                                  test_prog=test_prog,
                                  pyreader=test_pyreader,
                                  fetch_list=[
                                      next_sent_acc.name, mask_lm_loss.name,
                                      total_loss.name
                                  ])

    train_pyreader.decorate_tensor_provider(data_reader.data_generator())
    train_pyreader.start()
    steps = 0
    cost = []
    lm_cost = []
    acc = []
    time_begin = time.time()
    while steps < args.num_train_steps:
        try:
            steps += nccl2_num_trainers
            skip_steps = args.skip_steps * nccl2_num_trainers

            if nccl2_trainer_id != 0:
                train_exe.run(fetch_list=[])
                continue

            if steps % skip_steps != 0:
                train_exe.run(fetch_list=[])
            else:
                each_next_acc, each_mask_lm_cost, each_total_cost, np_lr = train_exe.run(
                    fetch_list=[
                        next_sent_acc.name, mask_lm_loss.name, total_loss.name,
                        scheduled_lr.name
                    ])
                acc.extend(each_next_acc)
                lm_cost.extend(each_mask_lm_cost)
                cost.extend(each_total_cost)

                print("feed_queue size", train_pyreader.queue.size())
                time_end = time.time()
                used_time = time_end - time_begin
                epoch, current_file_index, total_file, current_file, mask_type = data_reader.get_progress(
                )
                print("current learning_rate:%f" % np_lr[0])
                print(
                    "epoch: %d, progress: %d/%d, step: %d, loss: %f, "
                    "ppl: %f, next_sent_acc: %f, speed: %f steps/s, file: %s, mask_type: %s"
                    %
                    (epoch, current_file_index, total_file, steps,
                     np.mean(np.array(cost)), np.mean(np.exp(
                         np.array(lm_cost))), np.mean(np.array(acc)),
                     skip_steps / used_time, current_file, mask_type))
                cost = []
                lm_cost = []
                acc = []
                time_begin = time.time()

            if steps % args.save_steps == 0:
                save_path = os.path.join(args.checkpoints,
                                         "step_" + str(steps))
                fluid.io.save_persistables(exe, save_path, train_program)

            if args.valid_filelist and steps % args.validation_steps == 0:
                vali_cost, vali_lm_cost, vali_acc, vali_steps, vali_speed = predict(
                )
                print("[validation_set] epoch: %d, step: %d, "
                      "loss: %f, global ppl: %f, batch-averged ppl: %f, "
                      "next_sent_acc: %f, speed: %f steps/s" %
                      (epoch, steps, np.mean(np.array(vali_cost) / vali_steps),
                       np.exp(np.mean(np.array(vali_lm_cost) / vali_steps)),
                       np.mean(np.exp(np.array(vali_lm_cost) / vali_steps)),
                       np.mean(np.array(vali_acc) / vali_steps), vali_speed))

        except fluid.core.EOFException:
            train_pyreader.reset()
            break
Exemplo n.º 29
0
places = fluid.layers.get_places(device_count=0, device_type=device_type)
pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl)
with pd.do():
    x_ = pd.read_input(x)
    y_ = pd.read_input(y)
    y_predict = fluid.layers.fc(input=x_, size=1, act=None)
    cost = fluid.layers.square_error_cost(input=y_predict, label=y_)
    avg_cost = fluid.layers.mean(x=cost)
    pd.write_output(avg_cost)

cost = pd()
avg_cost = fluid.layers.mean(x=cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01)
sgd_optimizer.minimize(avg_cost)

fluid.memory_optimize(fluid.default_main_program(), print_log=True)
# fluid.release_memory(fluid.default_main_program())

BATCH_SIZE = 200

# fix the order of training data
train_reader = paddle.batch(
    paddle.dataset.uci_housing.train(), batch_size=BATCH_SIZE)

# train_reader = paddle.batch(
#     paddle.reader.shuffle(
#         paddle.dataset.uci_housing.train(), buf_size=500),
#     batch_size=BATCH_SIZE)

feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
exe = fluid.Executor(place)
                                        main_program=program)


if __name__ == '__main__':
    parse = argparse.ArgumentParser(description='')
    parse.add_argument('--model', help='model name', nargs='?')
    args = parse.parse_args()
    model = args.model

    DataSet = create_reader(model)

    predict = create_model(model=model)

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())
    fluid.memory_optimize(fluid.default_main_program())
    load_model(exe, fluid.default_main_program(), model=model)

    batches = DataSet.get_batch_generator(1, 1234)
    for i, imgs, names in batches:

        result = exe.run(fluid.default_main_program(),
                         feed={'img': imgs},
                         fetch_list=[predict])
        print(i)
        path = path + 'data/unet/test/ColorImage/' + names[0].split(
            "image/")[1]
        picture = np.argmax(result[0], axis=1)
        picture = picture.reshape((1024, 1024))
        saveImage(picture, path)
Exemplo n.º 31
0
def main(args):
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    exe = fluid.Executor(place)

    reader = task_reader.ClassifyReader(
        vocab_path=args.vocab_path,
        label_map_config=args.label_map_config,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case,
        in_tokens=args.in_tokens,
        random_seed=args.random_seed)

    if not (args.do_train or args.do_val or args.do_test):
        raise ValueError("For args `do_train`, `do_val` and `do_test`, at "
                         "least one of them must be True.")

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.do_train:
        train_data_generator = reader.data_generator(
            input_file=args.train_set,
            batch_size=args.batch_size,
            epoch=args.epoch,
            shuffle=True,
            phase="train")

        num_train_examples = reader.get_num_examples(args.train_set)

        if args.in_tokens:
            max_train_steps = args.epoch * num_train_examples // (
                args.batch_size // args.max_seq_len) // dev_count
        else:
            max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        warmup_steps = int(max_train_steps * args.warmup_proportion)
        print("Device count: %d" % dev_count)
        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)
        print("Num warmup steps: %d" % warmup_steps)

        train_program = fluid.Program()

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                train_pyreader, graph_vars = create_model(
                    args,
                    pyreader_name='train_reader',
                    ernie_config=ernie_config)
                scheduled_lr = optimization(
                    loss=graph_vars["loss"],
                    warmup_steps=warmup_steps,
                    num_train_steps=max_train_steps,
                    learning_rate=args.learning_rate,
                    train_program=train_program,
                    startup_prog=startup_prog,
                    weight_decay=args.weight_decay,
                    scheduler=args.lr_scheduler,
                    use_fp16=args.use_fp16,
                    loss_scaling=args.loss_scaling)

                fluid.memory_optimize(
                    input_program=train_program,
                    skip_opt_set=[
                        graph_vars["loss"].name,
                        graph_vars["probs"].name,
                        graph_vars["accuracy"].name,
                        graph_vars["num_seqs"].name,
                    ])

        if args.verbose:
            if args.in_tokens:
                lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                    program=train_program,
                    batch_size=args.batch_size // args.max_seq_len)
            else:
                lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                    program=train_program, batch_size=args.batch_size)
            print("Theoretical memory usage in training: %.3f - %.3f %s" %
                  (lower_mem, upper_mem, unit))

    if args.do_val or args.do_test:
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                test_pyreader, graph_vars = create_model(
                    args,
                    pyreader_name='test_reader',
                    ernie_config=ernie_config)

        test_prog = test_prog.clone(for_test=True)

    exe.run(startup_prog)

    if args.do_train:
        if args.init_checkpoint and args.init_pretraining_params:
            print(
                "WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
                "both are set! Only arg 'init_checkpoint' is made valid.")
        if args.init_checkpoint:
            init_checkpoint(
                exe,
                args.init_checkpoint,
                main_program=startup_prog,
                use_fp16=args.use_fp16)
        elif args.init_pretraining_params:
            init_pretraining_params(
                exe,
                args.init_pretraining_params,
                main_program=startup_prog,
                use_fp16=args.use_fp16)
    elif args.do_val or args.do_test:
        if not args.init_checkpoint:
            raise ValueError("args 'init_checkpoint' should be set if"
                             "only doing validation or testing!")
        init_checkpoint(
            exe,
            args.init_checkpoint,
            main_program=startup_prog,
            use_fp16=args.use_fp16)

    if args.do_train:
        exec_strategy = fluid.ExecutionStrategy()
        if args.use_fast_executor:
            exec_strategy.use_experimental_executor = True
        exec_strategy.num_threads = dev_count
        exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope

        train_exe = fluid.ParallelExecutor(
            use_cuda=args.use_cuda,
            loss_name=graph_vars["loss"].name,
            exec_strategy=exec_strategy,
            main_program=train_program)

        train_pyreader.decorate_tensor_provider(train_data_generator)
    else:
        train_exe = None

    if args.do_train:
        train_pyreader.start()
        steps = 0
        if warmup_steps > 0:
            graph_vars["learning_rate"] = scheduled_lr

        if args.save_log and args.log_path:
            if os.path.exists(args.log_path):
                raise FileExistsError("Logging file already exists!")
            with open(args.log_path, 'w') as logfile:
                logfile.write('%s\n' % time.asctime())
            print('Writing logs into %s' % args.log_path)

        time_begin = time.time()
        while True:
            try:
                steps += 1
                if steps % args.skip_steps != 0:
                    train_exe.run(fetch_list=[])
                else:
                    outputs = evaluate(train_exe, train_program, train_pyreader,
                                       graph_vars, "train")

                    if args.verbose:
                        verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size(
                        )
                        verbose += "learning rate: %f" % (
                            outputs["learning_rate"]
                            if warmup_steps > 0 else args.learning_rate)
                        print(verbose)

                    current_example, current_epoch = reader.get_train_progress()
                    time_end = time.time()
                    used_time = time_end - time_begin
                    print("epoch: %d, progress: %d/%d, step: %d, "
                          "ave loss: %.4f, micro_f1: %.4f, micro_p: %.4f, micro_r: %.4f, "
                          "speed: %f steps/s" %
                          (current_epoch, current_example, num_train_examples, steps,
                           outputs["loss"], outputs["micro_f"], outputs["micro_p"], outputs["micro_r"],
                           args.skip_steps / used_time))

                    # Todo: complete logging function
                    # Todo: print more useful metrics: f1/p/r instead of acc
                    if args.save_log and args.log_path:
                        with open(args.log_path, 'a') as logfile:
                            logfile.write("epoch: %d, progress: %d/%d, step: %d, "
                          "ave loss: %.4f, ave_acc: %.4f, micro_f1: %.4f, micro_p: %.4f, micro_r: %.4f, "
                          "speed: %f steps/s\n" %
                          (current_epoch, current_example, num_train_examples, steps,
                           outputs["loss"], outputs["accuracy"], outputs["micro_f"], outputs["micro_p"], outputs["micro_r"],
                           args.skip_steps / used_time))

                    time_begin = time.time()

                if steps % args.save_steps == 0:
                    save_path = os.path.join(args.checkpoints,
                                             "step_" + str(steps))
                    fluid.io.save_persistables(exe, save_path, train_program)

                if steps % args.validation_steps == 0:
                    # evaluate dev set
                    if args.do_val:
                        test_pyreader.decorate_tensor_provider(
                            reader.data_generator(
                                args.dev_set,
                                batch_size=args.batch_size,
                                epoch=1,
                                shuffle=False))
                        evaluate(exe, test_prog, test_pyreader, graph_vars,
                                 "dev")
                    # evaluate test set
                    if args.do_test:
                        test_pyreader.decorate_tensor_provider(
                            reader.data_generator(
                                args.test_set,
                                batch_size=args.batch_size,
                                epoch=1,
                                shuffle=False))
                        evaluate(exe, test_prog, test_pyreader, graph_vars,
                                 "test")
            except fluid.core.EOFException:
                save_path = os.path.join(args.checkpoints, "step_" + str(steps))
                fluid.io.save_persistables(exe, save_path, train_program)
                train_pyreader.reset()
                break

    # final eval on dev set
    if args.do_val:
        test_pyreader.decorate_tensor_provider(
            reader.data_generator(
                args.dev_set,
                batch_size=args.batch_size,
                epoch=1,
                shuffle=False))
        print("Final validation result:")
        evaluate(exe, test_prog, test_pyreader, graph_vars, "dev")

    # final eval on test set
    if args.do_test:
        test_pyreader.decorate_tensor_provider(
            reader.data_generator(
                args.test_set,
                batch_size=args.batch_size,
                epoch=1,
                shuffle=False))
        print("Final test result:")
        evaluate(exe, test_prog, test_pyreader, graph_vars, "test")
Exemplo n.º 32
0
def main():
    if args.data_set == "cifar10":
        classdim = 10
        if args.data_format == 'NCHW':
            data_shape = [3, 32, 32]
        else:
            data_shape = [32, 32, 3]
    else:
        classdim = 102
        if args.data_format == 'NCHW':
            data_shape = [3, 224, 224]
        else:
            data_shape = [224, 224, 3]

    # Input data
    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    # Train program
    net = vgg16_bn_drop(images)
    predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(x=cost)

    # Evaluator
    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
    batch_acc = fluid.layers.accuracy(input=predict,
                                      label=label,
                                      total=batch_size_tensor)

    # inference program
    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
        inference_program = fluid.io.get_inference_program(
            target_vars=[batch_acc, batch_size_tensor])

    # Optimization
    optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
    opts = optimizer.minimize(avg_cost)

    fluid.memory_optimize(fluid.default_main_program())

    # Initialize executor
    place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
    exe = fluid.Executor(place)

    # Parameter initialization
    exe.run(fluid.default_startup_program())

    # data reader
    train_reader = paddle.batch(paddle.reader.shuffle(
        paddle.dataset.cifar.train10()
        if args.data_set == 'cifar10' else paddle.dataset.flowers.train(),
        buf_size=5120),
                                batch_size=args.batch_size)
    test_reader = paddle.batch(paddle.dataset.cifar.test10() if args.data_set
                               == 'cifar10' else paddle.dataset.flowers.test(),
                               batch_size=args.batch_size)

    # test
    def test(exe):
        test_accuracy = fluid.average.WeightedAverage()
        for batch_id, data in enumerate(test_reader()):
            img_data = np.array(map(lambda x: x[0].reshape(data_shape),
                                    data)).astype("float32")
            y_data = np.array(map(lambda x: x[1], data)).astype("int64")
            y_data = y_data.reshape([-1, 1])

            acc, weight = exe.run(inference_program,
                                  feed={
                                      "pixel": img_data,
                                      "label": y_data
                                  },
                                  fetch_list=[batch_acc, batch_size_tensor])
            test_accuracy.add(value=acc, weight=weight)
        return test_accuracy.eval()

    iters, num_samples, start_time = 0, 0, time.time()
    accuracy = fluid.average.WeightedAverage()
    for pass_id in range(args.pass_num):
        accuracy.reset()
        train_accs = []
        train_losses = []
        for batch_id, data in enumerate(train_reader()):
            if iters == args.skip_batch_num:
                start_time = time.time()
                num_samples = 0
            if iters == args.iterations:
                break
            img_data = np.array(map(lambda x: x[0].reshape(data_shape),
                                    data)).astype("float32")
            y_data = np.array(map(lambda x: x[1], data)).astype("int64")
            y_data = y_data.reshape([-1, 1])

            loss, acc, weight = exe.run(
                fluid.default_main_program(),
                feed={
                    "pixel": img_data,
                    "label": y_data
                },
                fetch_list=[avg_cost, batch_acc, batch_size_tensor])
            accuracy.add(value=acc, weight=weight)
            iters += 1
            num_samples += len(y_data)
            print(
                "Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
                (pass_id, iters, loss, acc)
            )  # The accuracy is the accumulation of batches, but not the current batch.

        # pass_train_acc = accuracy.eval()
        train_losses.append(loss)
        train_accs.append(acc)
        print("Pass: %d, Loss: %f, Train Accuray: %f\n" %
              (pass_id, np.mean(train_losses), np.mean(train_accs)))
        train_elapsed = time.time() - start_time
        examples_per_sec = num_samples / train_elapsed
        print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
              (num_samples, train_elapsed, examples_per_sec))
        # evaluation
        if args.with_test:
            pass_test_acc = test(exe)
        exit(0)
Exemplo n.º 33
0
    def _build_env(self):
        if self.env.is_inititalized:
            return

        self._build_env_start_event()
        self.env.is_inititalized = True
        self.env.main_program = clone_program(self._base_main_program,
                                              for_test=False)

        self.env.startup_program = fluid.Program()
        with fluid.program_guard(self.env.main_program,
                                 self._base_startup_program):
            with fluid.unique_name.guard(self.env.UNG):
                self.env.outputs = self._build_net()
                if self.is_train_phase or self.is_test_phase:
                    self.env.labels = self._add_label()
                    self.env.loss = self._add_loss()
                    self.env.metrics = self._add_metrics()

        if self.is_predict_phase or self.is_test_phase:
            self.env.main_program = clone_program(self.env.main_program,
                                                  for_test=True)
            hub.common.paddle_helper.set_op_attr(self.env.main_program,
                                                 is_test=True)

        if self.config.use_pyreader:
            t_program = fluid.Program()
            with fluid.program_guard(t_program, self.env.startup_program):
                self.env.py_reader = fluid.layers.py_reader(
                    capacity=64,
                    shapes=[var.shape for var in self.feed_var_list],
                    dtypes=[
                        dtype_map[var.dtype] for var in self.feed_var_list
                    ],
                    lod_levels=[var.lod_level for var in self.feed_var_list],
                    use_double_buffer=False)

                feed_var_list = self.feed_var_list
                py_vars = fluid.layers.read_file(self.env.py_reader)
                py_vars = to_list(py_vars)
                input_dict = {
                    feed_var_list[index].name: py_var
                    for index, py_var in enumerate(py_vars)
                }

                hub.connect_program(pre_program=t_program,
                                    next_program=self.env.main_program,
                                    input_dict=input_dict,
                                    need_log=False)

            self.env.main_program = t_program
            if not self.is_predict_phase:
                self.env.loss = self.env.main_program.global_block().vars[
                    self.env.loss.name]
                metrics_name = [var.name for var in self.env.metrics]
                self.env.metrics = [
                    self.env.main_program.global_block().vars[name]
                    for name in metrics_name
                ]

            outputs_name = [var.name for var in self.env.outputs]
            self.env.outputs = [
                self.env.main_program.global_block().vars[name]
                for name in outputs_name
            ]

        if self.config.enable_memory_optim:
            for var_name in self.fetch_list:
                var = self.env.main_program.global_block().vars[var_name]
                var.persistable = True

        if self.is_train_phase:
            with fluid.program_guard(self.env.main_program,
                                     self._base_startup_program):
                with fluid.unique_name.guard(self.env.UNG):
                    self.config.strategy.execute(self.loss,
                                                 self._base_data_reader,
                                                 self.config)

        if self.is_train_phase:
            loss_name = self.env.loss.name
            share_vars_from = None
        else:
            loss_name = None

        if self._base_compiled_program is None:
            share_vars_from = None
        else:
            share_vars_from = self._base_compiled_program

        if not self.config.use_data_parallel:
            if self.config.enable_memory_optim:
                fluid.memory_optimize(self.env.main_program)
            self.env.main_program_compiled = None
        else:
            self.env.main_program_compiled = fluid.CompiledProgram(
                self.env.main_program).with_data_parallel(
                    loss_name=loss_name,
                    share_vars_from=share_vars_from,
                    build_strategy=self.build_strategy)

            if self._base_compiled_program is None:
                self._base_compiled_program = self.env.main_program_compiled

        self.exe.run(self.env.startup_program)
        self._build_env_end_event()