def run_main_with_place(self, places, use_compiled_program=True): with fluid.scope_guard(fluid.Scope()): with fluid.program_guard(fluid.Program(), fluid.Program()): input_data, loss, loader = self.build_network(places) fetch_list = [input_data] exe = fluid.Executor(places[0]) exe.run(fluid.default_startup_program()) dev_cnt = len(places) if dev_cnt > 1: self.assertTrue(use_compiled_program) main_program = fluid.default_main_program() if use_compiled_program: main_program = fluid.CompiledProgram( main_program).with_data_parallel( loss_name=loss.name, places=places) max_batch_num = min(self.break_num, int(self.batch_num / dev_cnt)) if loader.iterable: early_break = False for epoch_id in six.moves.range(self.epoch_num): early_break = False self.clear_visited() batch_id = 0 for data in loader(): if batch_id >= self.break_num: early_break = True break self.assertInputData( batch_id, data, dev_cnt, check_visited=False) fetch_val, = exe.run(program=main_program, feed=data, fetch_list=fetch_list) self.assertInputData(batch_id, fetch_val, dev_cnt) batch_id += 1 if dev_cnt == 1: self.assertEqual(batch_id, max_batch_num) else: self.assertLessEqual(batch_id, max_batch_num) if early_break: loader._reset() else: for epoch_id in six.moves.range(self.epoch_num): batch_id = 0 self.clear_visited() loader.start() try: while True: if batch_id >= self.break_num: loader.reset() break fetch_val, = exe.run(program=main_program, fetch_list=fetch_list) self.assertInputData(batch_id, fetch_val, dev_cnt) batch_id += 1 except fluid.core.EOFException: loader.reset() if dev_cnt == 1: self.assertEqual(batch_id, max_batch_num) else: self.assertLessEqual(batch_id, max_batch_num)
def run_func_with_guard(self, func): with fluid.program_guard(fluid.Program(), fluid.Program()): with fluid.unique_name.guard(): with fluid.scope_guard(fluid.Scope()): func()
def test_pslib_2(self): """Test cases for pslib.""" import paddle.fluid as fluid from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker from paddle.fluid.incubate.fleet.base.role_maker import RoleMakerBase os.environ["POD_IP"] = "127.0.0.1" os.environ["PADDLE_PORT"] = "36001" os.environ["TRAINING_ROLE"] = "TRAINER" os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002" os.environ["PADDLE_TRAINER_ID"] = "0" os.environ["PADDLE_TRAINERS_NUM"] = "1" place = fluid.CPUPlace() exe = fluid.Executor(place) try: fleet.init(None) except: print("no mpi4py, skip test_pslib_2") return train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() with fluid.program_guard(train_program, startup_program): show = fluid.layers.data(name="show", shape=[-1, 1], \ dtype="float32", lod_level=1, append_batch_size=False) fc = fluid.layers.fc(input=show, size=1, act=None) label = fluid.layers.data(name="click", shape=[-1, 1], \ dtype="int64", lod_level=1, append_batch_size=False) label_cast = fluid.layers.cast(label, dtype='float32') cost = fluid.layers.log_loss(fc, label_cast) try: adam = fluid.optimizer.Adam(learning_rate=0.000005) adam = fleet.distributed_optimizer(adam) adam.minimize([cost], [scope]) fleet.run_server() except: print("do not support pslib test, skip") return os.environ["TRAINING_ROLE"] = "wrong" try: role1 = GeneralRoleMaker(path="./test_gloo_1") role1.generate_role() except: print("catch expected error of wrong TRAINING_ROLE") os.environ["TRAINING_ROLE"] = "PSERVER" os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" role2 = GeneralRoleMaker(path="./test_gloo_2") role2._finalize() role2._all_gather(1) role2._all_gather(1) role2._barrier_server() role2._all_gather(1) role3 = GeneralRoleMaker(path="./test_gloo_3") role3._worker_gather(1) role3._worker_gather(1) os.environ["TRAINING_ROLE"] = "TRAINER" os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002" role4 = GeneralRoleMaker(path="./test_gloo_4") role4._worker_gather(1) role4._get_rank() role4._get_size() role4._all_comm.init() role5 = GeneralRoleMaker(path="./test_gloo_5") role5.get_local_endpoint() role5.get_local_endpoint() role6 = GeneralRoleMaker(path="./test_gloo_6") role6.get_trainer_endpoints() role6.get_trainer_endpoints() role7 = GeneralRoleMaker(path="./test_gloo_7") role7.get_pserver_endpoints() role7.get_pserver_endpoints() role8 = GeneralRoleMaker(path="./test_gloo_8") role8.is_worker() role8.is_worker() role9 = GeneralRoleMaker(path="./test_gloo_9") role9.is_server() role9.is_server() role10 = GeneralRoleMaker(path="./test_gloo_10") role10.is_first_worker() role10.is_first_worker() role11 = GeneralRoleMaker(path="./test_gloo_11") role11.worker_index() role11.worker_index() role12 = GeneralRoleMaker(path="./test_gloo_12") role12.server_index() role12.server_index() role13 = GeneralRoleMaker(path="./test_gloo_13") role13.worker_num() role13.worker_num() role14 = GeneralRoleMaker(path="./test_gloo_14") role14.server_num() role14.server_num() role15 = GeneralRoleMaker(path="./test_gloo_15") role15._barrier_worker() role15._barrier_worker() role16 = GeneralRoleMaker(path="./test_gloo_16") role16._barrier_all() role16._barrier_all() role17 = GeneralRoleMaker(path="./test_gloo_17") role17._barrier_server() role17._barrier_server() role18 = GeneralRoleMaker(path="./test_gloo_18") role18._worker_num() role18._worker_num() role19 = GeneralRoleMaker(path="./test_gloo_19") role19._server_num() role19._server_num() role20 = GeneralRoleMaker(path="./test_gloo_20") a = [1] b = [0] role20._all_reduce(a, b) role21 = GeneralRoleMaker(path="./test_gloo_21") role21.all_reduce_worker([], []) role21.all_reduce_worker([], []) role21.barrier_worker() role21.barrier_all() role22 = GeneralRoleMaker(path="./test_gloo_22") role22._get_rank() role22._get_rank() os.environ["PADDLE_PSERVER_ID"] = "0" role23 = GeneralRoleMaker(path="./test_gloo_23") role23._get_size() role23._get_size() with open("test_fleet_gloo_role_maker_1.txt", "w") as f: data = "1 1 1 1\n" f.write(data) dataset = paddle.distributed.InMemoryDataset() dataset.set_filelist(["test_fleet_gloo_role_maker_1.txt"]) dataset._set_use_var([show, label]) dataset.load_into_memory() dataset.get_memory_data_size(fleet) dataset.get_shuffle_data_size(fleet) os.remove("./test_fleet_gloo_role_maker_1.txt") class TmpClass(): """ dummy tmp class """ def __init__(self): pass def all_reduce_worker(self, input, output): """ dummy all reduce worker Args: input(None): fake input output(None): fale output """ pass def barrier_worker(self): """ dummy barrier worker """ pass from paddle.fluid.incubate.fleet.base.fleet_base import Fleet class TmpFleet(Fleet): """ dummy tmp fleet """ def __init__(self): super(TmpFleet, self).__init__() self._role_maker = None def init_worker(self): """ dummy init worker """ pass def init_server(self, model_dir=None): """ dummy init server Args: model_dir(None): fake model_dir """ pass def run_server(self): """ dummy run server """ pass def stop_worker(self): """ dummy stop worker """ pass def distributed_optimizer(self, optimizer, strategy=None): """ dummy distributed optimizer Args: optimizer(None): fake optimizer strategy(None): fake strategy """ pass def save_inference_model(self): """ dummy save inference model """ pass def save_persistables(self): """ dummy save persistables """ pass os.environ["TRAINING_ROLE"] = "TRAINER" tmp = TmpFleet() tmp._role_maker = TmpClass() tmp.all_reduce_worker([], []) tmp.barrier_worker() from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker tmp = RoleMakerBase() tmp.all_gather(1) tmp.all_reduce_worker([], []) tmp.barrier_worker() tmp.barrier_all() from paddle.fluid.incubate.fleet.base.role_maker import \ MPISymetricRoleMaker tmp1 = MPISymetricRoleMaker() tmp1.all_gather(1) tmp1.all_gather(1) tmp2 = MPISymetricRoleMaker() tmp2.all_reduce_worker([], []) tmp3 = MPISymetricRoleMaker() tmp3.barrier_worker() tmp3.barrier_worker() tmp4 = MPISymetricRoleMaker() tmp4.barrier_all() tmp4.barrier_all()
def build_network(self, context): context["model"] = {} if len(context["env"]["phase"]) > 1: warnings.warn("Cluster Train Only Support One Phase.", category=UserWarning, stacklevel=2) model_dict = context["env"]["phase"][0] train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() dataset_name = model_dict["dataset_name"] with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): with fluid.scope_guard(scope): context["model"][model_dict["name"]] = {} model_path = envs.os_path_adapter( envs.workspace_adapter(model_dict["model"])) model = envs.lazy_instance_by_fliename( model_path, "Model")(context["env"]) model._data_var = model.input_data( dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader(is_infer=False) model.net(model._data_var, False) optimizer = model.optimizer() optimizer = context["fleet"].distributed_optimizer( optimizer) optimizer.minimize([model._cost], [fluid.global_scope()]) context["model"][ model_dict["name"]]["main_program"] = train_program context["model"][model_dict["name"]][ "startup_program"] = startup_program context["model"][model_dict["name"]]["scope"] = scope context["model"][model_dict["name"]]["model"] = model context["model"][model_dict["name"]][ "default_main_program"] = train_program.clone() context["model"][ model_dict["name"]]["compile_program"] = None if context["fleet"].is_server(): self._server(context) else: context["dataset"] = {} for phase in context["env"]["phase"]: type = envs.get_global_env("dataset." + dataset["name"] + ".type") if type == "DataLoader": data_loader = DataLoader(context) data_loader.get_dataloader( context, dataset_name, context["model"][ model_dict["name"]]["model"]._data_loader) elif type == "QueueDataset": dataset_class = QueueDataset(context) context["dataset"][ dataset["name"]] = dataset_class.create_dataset( dataset["name"], context) context["status"] = "startup_pass"
def freeze_graph(self, use_cuda, seed, activation_quant_type, bias_correction=False, weight_quant_type='abs_max', for_ci=True, quant_skip_pattern='skip_quant'): def build_program(main, startup, is_test): main.random_seed = seed startup.random_seed = seed with fluid.unique_name.guard(): with fluid.program_guard(main, startup): img = fluid.layers.data( name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data( name='label', shape=[1], dtype='int64') loss = conv_net(img, label, quant_skip_pattern) if not is_test: opt = fluid.optimizer.Adam(learning_rate=0.001) opt.minimize(loss) return [img, label], loss random.seed(0) np.random.seed(0) main = fluid.Program() startup = fluid.Program() test_program = fluid.Program() feeds, loss = build_program(main, startup, False) build_program(test_program, startup, True) test_program = test_program.clone(for_test=True) main_graph = IrGraph(core.Graph(main.desc), for_test=False) test_graph = IrGraph(core.Graph(test_program.desc), for_test=True) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() with fluid.scope_guard(scope): exe.run(startup) transform_pass = QuantizationTransformPass( scope=scope, place=place, activation_quantize_type=activation_quant_type, weight_quantize_type=weight_quant_type, skip_pattern=quant_skip_pattern) transform_pass.apply(main_graph) transform_pass.apply(test_graph) dev_name = '_gpu_' if use_cuda else '_cpu_' if not for_ci: marked_nodes = set() for op in main_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) main_graph.draw('.', 'main' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'test' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) quantized_test_program = test_graph.to_program() iters = 5 batch_size = 8 train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=500), batch_size=batch_size) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) with fluid.scope_guard(scope): for _ in range(iters): data = next(train_reader()) loss_v = exe.run(binary, feed=feeder.feed(data), fetch_list=[loss]) if not for_ci: print('{}: {}'.format('loss' + dev_name + activation_quant_type + '_' + weight_quant_type, loss_v)) test_data = next(test_reader()) with fluid.program_guard(quantized_test_program): w_var = fluid.framework._get_var('conv2d_1.w_0.quantized', quantized_test_program) # Testing with fluid.scope_guard(scope): test_loss1, w_quant = exe.run(program=quantized_test_program, feed=feeder.feed(test_data), fetch_list=[loss, w_var]) # Freeze graph for inference, but the weight of fc/conv is still float type. freeze_pass = QuantizationFreezePass( scope=scope, place=place, bias_correction=bias_correction, \ weight_quantize_type=weight_quant_type) freeze_pass.apply(test_graph) if not for_ci: marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'test_freeze' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) server_program = test_graph.to_program() with fluid.scope_guard(scope): test_loss2, = exe.run(program=server_program, feed=feeder.feed(test_data), fetch_list=[loss]) self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3) if not for_ci: print( '{}: {}'.format('test_loss1' + dev_name + activation_quant_type + '_' + weight_quant_type, test_loss1)) print( '{}: {}'.format('test_loss2' + dev_name + activation_quant_type + '_' + weight_quant_type, test_loss2)) w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor()) # Maybe failed, this is due to the calculation precision # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant)) if not for_ci: print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type + '_' + weight_quant_type, np.sum(w_freeze))) print('{}: {}'.format('w_quant' + dev_name + activation_quant_type + '_' + weight_quant_type, np.sum(w_quant))) # Convert parameter to 8-bit. convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place) convert_int8_pass.apply(test_graph) if not for_ci: marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'test_int8' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) server_program_int8 = test_graph.to_program() # Save the 8-bit parameter and model file. with fluid.scope_guard(scope): fluid.io.save_inference_model( 'server_int8' + dev_name + activation_quant_type + '_' + weight_quant_type, ['image', 'label'], [loss], exe, server_program_int8) # Test whether the 8-bit parameter and model file can be loaded successfully. [infer, feed, fetch] = fluid.io.load_inference_model( 'server_int8' + dev_name + activation_quant_type + '_' + weight_quant_type, exe) # Check the loaded 8-bit weight. w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor()) self.assertEqual(w_8bit.dtype, np.int8) self.assertEqual(np.sum(w_8bit), np.sum(w_freeze)) if not for_ci: print('{}: {}'.format('w_8bit' + dev_name + activation_quant_type + '_' + weight_quant_type, np.sum(w_8bit))) print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type + '_' + weight_quant_type, np.sum(w_freeze))) mobile_pass = TransformForMobilePass() mobile_pass.apply(test_graph) if not for_ci: marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'test_mobile' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) mobile_program = test_graph.to_program() with fluid.scope_guard(scope): fluid.io.save_inference_model( 'mobile_int8' + dev_name + activation_quant_type + '_' + weight_quant_type, ['image', 'label'], [loss], exe, mobile_program)
def quan(self, config_file): if not fluid.core.is_compiled_with_cuda(): return class_dim = 10 image_shape = [1, 28, 28] train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): image = fluid.layers.data( name='image', shape=image_shape, dtype='float32') image.stop_gradient = False label = fluid.layers.data( name='label', shape=[1], dtype='int64') out = MobileNet(name='quan').net(input=image, class_dim=class_dim) print("out: {}".format(out.name)) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) val_program = train_program.clone(for_test=False) optimizer = fluid.optimizer.Momentum( momentum=0.9, learning_rate=0.01, regularization=fluid.regularizer.L2Decay(4e-5)) scope = fluid.Scope() place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program, scope=scope) val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) val_feed_list = [('img', image.name), ('label', label.name)] val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5', acc_top5.name)] train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=128) train_feed_list = [('img', image.name), ('label', label.name)] train_fetch_list = [('loss', avg_cost.name)] com_pass = Compressor( place, scope, train_program, train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=val_program, eval_reader=val_reader, eval_feed_list=val_feed_list, eval_fetch_list=val_fetch_list, train_optimizer=optimizer) com_pass.config(config_file) eval_graph = com_pass.run()
def infer_epoch(args, vocab_size, test_reader, use_cuda, i2w): """ inference function """ epoch_model_path_list = [] epoch_model_name_list = [] for file in os.listdir(model_dir): file_path = os.path.join(model_dir, file) # hard code for epoch model folder if os.path.isdir(file_path) and is_number(file): epoch_model_path_list.append(file_path) epoch_model_name_list.append(file) if len(epoch_model_path_list) == 0: return print("Save model len {}".format(len(epoch_model_path_list))) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) emb_size = args.emb_size batch_size = args.batch_size result_dict = collections.OrderedDict() with fluid.scope_guard(fluid.Scope()): main_program = fluid.Program() with fluid.program_guard(main_program): values, pred = infer_network(vocab_size, emb_size) for epoch, model_path in enumerate(epoch_model_path_list): print("Begin infer model: {}".format(model_path)) copy_program = main_program.clone() fluid.io.load_vars( executor=exe, dirname=model_path, predicate=_load_emb) accum_num = 0 accum_num_sum = 0.0 t0 = time.time() step_id = 0 for data in test_reader(): step_id += 1 b_size = len([dat[0] for dat in data]) wa = np.array([dat[0] for dat in data]).astype( "int64").reshape(b_size) wb = np.array([dat[1] for dat in data]).astype( "int64").reshape(b_size) wc = np.array([dat[2] for dat in data]).astype( "int64").reshape(b_size) label = [dat[3] for dat in data] input_word = [dat[4] for dat in data] para = exe.run(copy_program, feed={ "analogy_a": wa, "analogy_b": wb, "analogy_c": wc, "all_label": np.arange(vocab_size) .reshape(vocab_size).astype("int64"), }, fetch_list=[pred.name, values], return_numpy=False) pre = np.array(para[0]) val = np.array(para[1]) for ii in range(len(label)): top4 = pre[ii] accum_num_sum += 1 for idx in top4: if int(idx) in input_word[ii]: continue if int(idx) == int(label[ii][0]): accum_num += 1 break if step_id % 1 == 0: print("step:%d %d " % (step_id, accum_num)) print("model: {} \t acc: {} ".format( model_path, 1.0 * accum_num / accum_num_sum)) epoch_acc = 1.0 * accum_num / accum_num_sum result_dict[epoch] = epoch_acc
def quantize_program(self, use_cuda, seed, activation_quant_type='abs_max', weight_quant_type='abs_max', for_ci=False): def build_program(main, startup, is_test): main.random_seed = seed startup.random_seed = seed with fluid.unique_name.guard(): with fluid.program_guard(main, startup): img = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') loss = conv_net(img, label) if not is_test: opt = fluid.optimizer.Adam(learning_rate=0.0001) opt.minimize(loss) return [img, label], loss random.seed(0) np.random.seed(0) # 1 Define program train_program = fluid.Program() startup_program = fluid.Program() test_program = fluid.Program() feeds, loss = build_program(train_program, startup_program, False) build_program(test_program, startup_program, True) test_program = test_program.clone(for_test=True) if not for_ci: train_graph = IrGraph(core.Graph(train_program.desc), for_test=False) train_graph.draw('.', 'train_program_1') test_graph = IrGraph(core.Graph(test_program.desc), for_test=True) test_graph.draw('.', 'test_program_1') # 2 Apply quantization qt = QuantizeTranspilerV2( activation_quantize_type=activation_quant_type, weight_quantize_type=weight_quant_type) qt.apply(train_program, startup_program, is_test=False) qt.apply(test_program, startup_program, is_test=True) # 3 Train place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() with fluid.scope_guard(scope): exe.run(startup_program) if not for_ci: train_graph = IrGraph(core.Graph(train_program.desc), for_test=False) train_graph.draw('.', 'train_program_2') test_graph = IrGraph(core.Graph(test_program.desc), for_test=True) test_graph.draw('.', 'test_program_2') build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False binary = fluid.CompiledProgram(train_program).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) iters = 5 batch_size = 8 train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) with fluid.scope_guard(scope): for idx in range(iters): data = next(train_reader()) loss_v = exe.run(binary, feed=feeder.feed(data), fetch_list=[loss]) if not for_ci and idx % 20 == 0: print('{}: {}'.format('loss', np.mean(loss_v))) print('{}: {}'.format('loss', np.mean(loss_v))) # 4 Convert qt.convert(test_program, scope) if not for_ci: with fluid.scope_guard(scope): fluid.io.save_inference_model('./infer_model', ['image', 'label'], [loss], exe, test_program, clip_extra=True)
def f3_data_reader(box): pass # Initialization place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) startup = fluid.Program() # Feed configure # if you want to shuffle "reader=paddle.reader.shuffle(dataReader(), buf_size)" # load infer model f1_scope = fluid.Scope() f2_scope = fluid.Scope() se_scope = fluid.Scope() with fluid.scope_guard(f1_scope): [infer_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(model_path, exe) with fluid.scope_guard(f2_scope): [infer_program2, feed_target_names2, fetch_targets2] = fluid.io.load_inference_model(model_path2, exe) f1_data_list = f1_data_reader(img_file_path) # Start infer exe.run(startup) s = time.time()
def test_batch_number_with_different_length_files(self): for p in self.get_all_places(): with fluid.scope_guard(fluid.Scope()): self.check_batch_number(place=p, randomize_batch_num=True)
def _test(self, place, use_tensor=True, use_fluid_api=True, use_global_beta_pow=False, flatten_param_grads=False): paddle.enable_static() main_prog = paddle.static.Program() startup_prog = paddle.static.Program() SEED = 2021 paddle.seed(SEED) np.random.seed(SEED) a_np = np.random.random(size=(2, 2)).astype('float32') b_np = np.random.random(size=(2, 2)).astype('float32') label_np = np.random.randint(2, size=(2, 1)).astype('int64') weight_attr1 = paddle.ParamAttr( name="weight1", initializer=fluid.initializer.Constant(value=1.0), trainable=True) weight_attr2 = paddle.ParamAttr( name="weight2", initializer=fluid.initializer.Constant(value=2.0), trainable=True) clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0) with paddle.static.program_guard(main_prog, startup_prog): with paddle.utils.unique_name.guard(): a = paddle.static.data(name="a", shape=[2, 2], dtype='float32') b = paddle.static.data(name="b", shape=[2, 2], dtype='float32') label = paddle.static.data(name="label", shape=[2, 1], dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) fc_1 = fluid.layers.fc(input=z, size=2, param_attr=weight_attr1) prediction = fluid.layers.fc(input=fc_1, size=2, param_attr=weight_attr2, act='softmax') cost = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.reduce_mean(cost) beta1_init = 0.9 beta2_init = 0.999 epsilon_init = 1e-8 if use_tensor: beta1 = fluid.layers.create_global_var( shape=[1], value=float(beta1_init), dtype='float32', persistable=True, name="beta1") beta2 = fluid.layers.create_global_var( shape=[1], value=float(beta2_init), dtype='float32', persistable=True, name="beta2") epsilon = fluid.layers.create_global_var( shape=[1], value=float(epsilon_init), dtype='float32', persistable=True, name="epsilon") if use_fluid_api: adam = fluid.optimizer.Adam( learning_rate=0.01, beta1=beta1, beta2=beta2, epsilon=epsilon, use_global_beta_pow=use_global_beta_pow, flatten_param_grads=flatten_param_grads, align_size=256, grad_clip=clip) else: adam = paddle.optimizer.Adam(learning_rate=0.01, beta1=beta1, beta2=beta2, epsilon=epsilon, grad_clip=clip) else: if use_fluid_api: adam = fluid.optimizer.Adam( learning_rate=0.01, beta1=beta1_init, beta2=beta2_init, epsilon=epsilon_init, use_global_beta_pow=use_global_beta_pow, flatten_param_grads=flatten_param_grads, align_size=256, grad_clip=clip) else: adam = fluid.optimizer.Adam(learning_rate=0.01, beta1=beta1_init, beta2=beta2_init, epsilon=epsilon_init, grad_clip=clip) adam.minimize(loss) scope = fluid.Scope() with fluid.scope_guard(scope): exe = paddle.static.Executor(place) exe.run(startup_prog) print("Start run on {}".format(place)) for epoch in range(10): pred_res, loss_res = exe.run(main_prog, feed={ "a": a_np, "b": b_np, "label": label_np }, fetch_list=[prediction, loss]) print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) paddle.disable_static() return pred_res, loss_res
def quantization_scale(self, use_cuda, seed, activation_quant_type, weight_quant_type='abs_max', for_ci=False): def build_program(main, startup, is_test): main.random_seed = seed startup.random_seed = seed with fluid.unique_name.guard(): with fluid.program_guard(main, startup): img = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') loss = conv_net(img, label) if not is_test: opt = fluid.optimizer.Adam(learning_rate=0.0001) opt.minimize(loss) return [img, label], loss random.seed(0) np.random.seed(0) main = fluid.Program() startup = fluid.Program() test_program = fluid.Program() feeds, loss = build_program(main, startup, False) build_program(test_program, startup, True) test_program = test_program.clone(for_test=True) main_graph = IrGraph(core.Graph(main.desc), for_test=False) test_graph = IrGraph(core.Graph(test_program.desc), for_test=True) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() with fluid.scope_guard(scope): exe.run(startup) transform_pass = QuantizationTransformPass( scope=scope, place=place, activation_quantize_type=activation_quant_type, weight_quantize_type=weight_quant_type) transform_pass.apply(main_graph) transform_pass.apply(test_graph) add_quant_dequant_pass = AddQuantDequantPass(scope=scope, place=place) add_quant_dequant_pass.apply(main_graph) add_quant_dequant_pass.apply(test_graph) scale_training_pass = OutScaleForTrainingPass(scope=scope, place=place) scale_training_pass.apply(main_graph) dev_name = '_gpu' if use_cuda else '_cpu' if not for_ci: marked_nodes = set() for op in main_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) main_graph.draw('.', 'main_scale' + dev_name, marked_nodes) marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'test_scale' + dev_name, marked_nodes) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) iters = 5 batch_size = 8 train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=500), batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) with fluid.scope_guard(scope): for _ in range(iters): data = next(train_reader()) loss_v = exe.run(binary, feed=feeder.feed(data), fetch_list=[loss]) if not for_ci: print('{}: {}'.format('loss' + dev_name, loss_v)) scale_inference_pass = OutScaleForInferencePass(scope=scope) scale_inference_pass.apply(test_graph) # Freeze graph for inference, but the weight of fc/conv is still float type. freeze_pass = QuantizationFreezePass( scope=scope, place=place, weight_quantize_type=weight_quant_type) freeze_pass.apply(test_graph) server_program = test_graph.to_program() if not for_ci: marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'quant_scale' + dev_name, marked_nodes) with open('quant_scale_model' + dev_name + '.txt', 'w') as f: f.write(str(server_program)) with fluid.scope_guard(scope): fluid.io.save_inference_model('quant_scale_model' + dev_name, ['image', 'label'], [loss], exe, server_program)
def test_pslib_1(self): """Test cases for pslib.""" import paddle.fluid as fluid from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.parameter_server.pslib import PSLib from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker os.environ["POD_IP"] = "127.0.0.1" os.environ["PADDLE_PORT"] = "36001" os.environ["TRAINING_ROLE"] = "TRAINER" os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002" os.environ["PADDLE_TRAINER_ID"] = "0" role_maker = GeneralRoleMaker() #print("init rolemaker") #role_maker.generate_role() place = fluid.CPUPlace() exe = fluid.Executor(place) #fleet.init(role_maker) train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() with fluid.program_guard(train_program, startup_program): show = fluid.layers.data(name="show", shape=[-1, 1], \ dtype="float32", lod_level=1, append_batch_size=False) fc = fluid.layers.fc(input=show, size=1, act=None) label = fluid.layers.data(name="click", shape=[-1, 1], \ dtype="int64", lod_level=1, append_batch_size=False) label_cast = fluid.layers.cast(label, dtype='float32') cost = fluid.layers.log_loss(fc, label_cast) try: adam = fluid.optimizer.Adam(learning_rate=0.000005) adam = fleet.distributed_optimizer(adam) adam.minimize([cost], [scope]) fleet.run_server() except: print("do not support pslib test, skip") return fleet.clear_one_table(0) from paddle.fluid.incubate.fleet.base.role_maker import \ MPISymetricRoleMaker try: role = MPISymetricRoleMaker() role._all_reduce([1], [2]) except: print("catch expected error of not inited") try: role = MPISymetricRoleMaker() role._all_reduce([1], [2], "min") except: print("catch expected error of not inited") try: role = MPISymetricRoleMaker() role._all_reduce([1], [2], "max") except: print("catch expected error of not inited") try: role = MPISymetricRoleMaker() role._all_reduce([1], [2], "unknown") except: print("catch expected error of unknown type")
def test_main(self): with fluid.program_guard(fluid.Program(), fluid.Program()): with fluid.scope_guard(fluid.Scope()): self.run_network()
def set_program(self, avg_cost, strategy): with fluid.scope_guard(fluid.Scope()): optimizer = fluid.optimizer.SGD(0.1) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost)
def test_prune(self): main_program = fluid.Program() startup_program = fluid.Program() # X X O X O # conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6 # | ^ | ^ # |____________| |____________________| # # X: prune output channels # O: prune input channels with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3") conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") conv6 = conv_bn_layer(conv5, 8, 3, "conv6") conv7 = fluid.layers.conv2d_transpose(input=conv6, num_filters=16, filter_size=2, stride=2) shapes = {} for param in main_program.global_block().all_parameters(): shapes[param.name] = param.shape place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() exe.run(startup_program, scope=scope) pruner = Pruner() main_program, _, _ = pruner.prune( main_program, scope, params=["conv4_weights", "conv2d_transpose_0.w_0"], ratios=[0.5, 0.6], place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None) shapes = { "conv1_weights": (4, 3, 3, 3), "conv2_weights": (4, 4, 3, 3), "conv3_weights": (8, 4, 3, 3), "conv4_weights": (4, 8, 3, 3), "conv5_weights": (8, 4, 3, 3), "conv6_weights": (8, 8, 3, 3), "conv2d_transpose_0.w_0": (8, 16, 2, 2), } for param in main_program.global_block().all_parameters(): if param.name in shapes: print("param: {}; param shape: {}".format( param.name, param.shape)) self.assertTrue(param.shape == shapes[param.name])
def evaluate(): place = fluid.CUDAPlace(0) if cfg.use_cuda else fluid.CPUPlace() inference_scope = fluid.Scope() test_files = [ os.path.join(cfg.evaluate_file_path, x) for x in os.listdir(cfg.evaluate_file_path) ] dataset = CriteoDataset() test_reader = paddle.batch(dataset.test(test_files), batch_size=cfg.batch_size) startup_program = fluid.framework.Program() test_program = fluid.framework.Program() model = DNN() model_path = os.path.join(cfg.save_path, model.name + "_epoch_" + str(cfg.test_epoch), "checkpoint") with fluid.framework.program_guard(test_program, startup_program): with fluid.unique_name.guard(): inputs = model.input_data() loss, auc_var = model.net(inputs) exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=inputs, place=place) fluid.load(fluid.default_main_program(), model_path, exe) auc_states_names = [ '_generated_var_0', '_generated_var_1', '_generated_var_2', '_generated_var_3' ] for var in auc_states_names: set_zero(var, scope=inference_scope, place=place) run_index = 0 infer_auc = 0 L = [] for batch_id, data_test in enumerate(test_reader()): loss_val, auc_val = exe.run(test_program, feed=feeder.feed(data_test), fetch_list=[loss, auc_var]) run_index += 1 infer_auc = auc_val L.append(loss_val / cfg.batch_size) if batch_id % cfg.log_interval == 0: logger.info("TEST --> batch: {} loss: {} auc: {}".format( batch_id, loss_val / cfg.batch_size, auc_val)) infer_loss = np.mean(L) infer_result = {} infer_result['loss'] = infer_loss infer_result['auc'] = infer_auc if not os.path.isdir(cfg.log_dir): os.makedirs(cfg.log_dir) log_path = os.path.join(cfg.log_dir, model.name + '_infer_result.log') logger.info(str(infer_result)) with open(log_path, 'w+') as f: f.write(str(infer_result)) logger.info("Done.") return infer_result
def test_concat(self): main_program = fluid.Program() startup_program = fluid.Program() # X # conv1 conv2-->concat conv3-->sum-->out # | ^ | ^ # |____________| |____________________| # with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(input, 8, 3, "conv2", sync_bn=True) tmp = fluid.layers.concat([conv1, conv2], axis=1) conv3 = conv_bn_layer(input, 16, 3, "conv3", bias=None) out = conv3 + tmp shapes = {} for param in main_program.global_block().all_parameters(): shapes[param.name] = param.shape place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() exe.run(startup_program, scope=scope) pruner = Pruner() # test backward search of concat pruned_program, _, _ = pruner.prune(main_program, scope, params=["conv3_weights"], ratios=[0.5], place=place, lazy=False, only_graph=True, param_backup=None, param_shape_backup=None) shapes = { "conv3_weights": (8, 3, 3, 3), "conv2_weights": (4, 3, 3, 3), "conv1_weights": (4, 3, 3, 3) } for param in pruned_program.global_block().all_parameters(): if "weights" in param.name and "conv2d" in param.name: self.assertTrue(shapes[param.name] == param.shape) # test forward search of concat pruned_program, _, _ = pruner.prune( main_program, scope, params=["conv1_weights", "conv2_weights"], ratios=[0.5, 0.5], place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None) shapes = { "conv1_weights": (4, 3, 3, 3), "conv1_bn_scale": (4, ), "conv1_bn_variance": (4, ), "conv1_bn_mean": (4, ), "conv1_bn_offset": (4, ), "conv2_weights": (4, 3, 3, 3), "sync_batch_norm_0.w_0": (4, ), "sync_batch_norm_0.w_1": (4, ), "conv2_bn_scale": (4, ), "conv2_bn_offset": (4, ), "conv3_weights": (8, 3, 3, 3), "conv3_bn_mean": (8, ), "conv3_bn_offset": (8, ), "conv3_bn_scale": (8, ), "conv3_bn_variance": (8, ), "conv3_out.b_0": (8, ), } for param in pruned_program.global_block().all_parameters(): if "weights" in param.name and "conv2d" in param.name: self.assertTrue(shapes[param.name] == param.shape)
def loss_scaling_check(self, use_cuda=True, scope=fluid.Scope()): a = fluid.data(name="a", shape=[1024, 1024], dtype='float32') b = fluid.data(name="b", shape=[512, 128], dtype='float32') x = [a, b] found_inf = fluid.data(name="found_inf", shape=[1], dtype='bool') prev_loss_scaling = fluid.data(name="prev_loss_scaling", shape=[1], dtype='float32') num_good_steps = fluid.data(name="num_good_steps", shape=[1], dtype='int32') num_bad_steps = fluid.data(name="num_bad_steps", shape=[1], dtype='int32') a_v = np.random.random([1024, 1024]).astype('float32') b_v = np.random.random([512, 128]).astype('float32') found_inf_v = np.array([False]).astype('bool') prev_loss_scaling_v = np.array([2048]).astype('float32') num_good_steps_v = np.array([999], dtype=np.int32) num_bad_steps_v = np.array([1], dtype=np.int32) incr_every_n_steps = 1000 decr_every_n_nan_or_inf = 2 incr_ratio = 2 decr_ratio = 0.8 result = amp_nn.update_loss_scaling(x, found_inf, prev_loss_scaling, num_good_steps, num_bad_steps, incr_every_n_steps, decr_every_n_nan_or_inf, incr_ratio, decr_ratio, name="update_loss_scaling") place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) with fluid.scope_guard(scope): exe.run(fluid.default_startup_program()) result_v = exe.run(feed={ 'a': a_v, 'b': b_v, 'found_inf': found_inf_v, 'prev_loss_scaling': prev_loss_scaling_v, 'num_good_steps': num_good_steps_v, 'num_bad_steps': num_bad_steps_v }, fetch_list=[ result, x, found_inf, prev_loss_scaling, num_good_steps, num_bad_steps ]) assert np.array_equal(result_v[0], a_v) assert np.array_equal(result_v[1], b_v) assert np.array_equal(result_v[0], result_v[2]) assert np.array_equal(result_v[1], result_v[3]) assert np.array_equal(result_v[4], found_inf_v) assert np.array_equal(result_v[5], prev_loss_scaling_v * incr_ratio) assert np.array_equal(result_v[6], np.zeros_like(num_good_steps_v)) assert np.array_equal(result_v[7], np.zeros_like(num_bad_steps_v))
def mkldnn_based_freeze_graph(self, use_cuda, seed, activation_quant_type, weight_quant_type='abs_max', quant_perf=False, for_ci=False): random.seed(0) np.random.seed(0) main = fluid.Program() startup = fluid.Program() test_program = fluid.Program() feeds, loss = self.build_program(main, startup, False, seed) self.build_program(test_program, startup, True, seed) test_program = test_program.clone(for_test=True) main_graph = IrGraph(core.Graph(main.desc), for_test=False) test_graph = IrGraph(core.Graph(test_program.desc), for_test=True) place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() with fluid.scope_guard(scope): exe.run(startup) # Apply the QuantizationTransformPass transform_pass = QuantizationTransformPass( scope=scope, place=place, activation_quantize_type=activation_quant_type, weight_quantize_type=weight_quant_type) transform_pass.apply(main_graph) transform_pass.apply(test_graph) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) quantized_test_program = test_graph.to_program() iters = 5 batch_size = 8 train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=500), batch_size=batch_size) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) # Training the model to get the weights value with fluid.scope_guard(scope): for _ in range(iters): data = next(train_reader()) loss_v = exe.run(binary, feed=feeder.feed(data), fetch_list=[loss]) # Freeze graph for inference, but the weight of fc/conv is still float type. freeze_pass = QuantizationFreezePass( scope=scope, place=place, weight_quantize_type=weight_quant_type) freeze_pass.apply(test_graph) # Transform quantized graph for MKL-DNN INT8 inference mkldnn_int8_pass = QuantInt8MkldnnPass(_scope=scope, _place=place) mkldnn_int8_pass.apply(test_graph) dev_name = '_cpu_' if not for_ci: marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw( '.', 'test_mkldnn' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) mkldnn_program = test_graph.to_program() # Check the transformation weights of conv2d and mul conv_w_mkldnn = np.array(scope.find_var('conv2d_1.w_0').get_tensor()) mul_w_mkldnn = np.array(scope.find_var('fc_0.w_0').get_tensor()) # Check if weights are still integer self.assertFalse(self.isinteger(np.sum(conv_w_mkldnn))) self.assertFalse(self.isinteger(np.sum(mul_w_mkldnn))) # Check if the conv2d output and mul output are correctly linked to fake_dequantize's # output self.check_program(mkldnn_program) if not for_ci: print('{}: {}'.format( 'w_mkldnn' + dev_name + activation_quant_type + '_' + weight_quant_type, np.sum(w_mkldnn)))
def build_network(self, context): context["model"] = {} for model_dict in context["phases"]: context["model"][model_dict["name"]] = {} train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() dataset_name = model_dict["dataset_name"] with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): with fluid.scope_guard(scope): model_path = envs.os_path_adapter( envs.workspace_adapter(model_dict["model"])) model = envs.lazy_instance_by_fliename( model_path, "Model")(context["env"]) model._data_var = model.input_data( dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader( is_infer=context["is_infer"]) data_loader = DataLoader(context) data_loader.get_dataloader(context, dataset_name, model._data_loader) model.net(model._data_var, context["is_infer"]) finetuning_varnames = envs.get_global_env( "runner." + context["runner_name"] + ".finetuning_aspect_varnames", default_value=[]) if len(finetuning_varnames) == 0: raise ValueError( "nothing need to be fine tuning, you may use other traning mode" ) if len(finetuning_varnames) != 1: raise ValueError( "fine tuning mode can only accept one varname now" ) varname = finetuning_varnames[0] finetuning_vars = train_program.global_block( ).vars[varname] finetuning_vars.stop_gradient = True optimizer = model.optimizer() optimizer.minimize(model._cost) context["model"][ model_dict["name"]]["main_program"] = train_program context["model"][ model_dict["name"]]["startup_program"] = startup_program context["model"][model_dict["name"]]["scope"] = scope context["model"][model_dict["name"]]["model"] = model context["model"][model_dict["name"]][ "default_main_program"] = train_program.clone() context["model"][model_dict["name"]]["compiled_program"] = None context["dataset"] = {} for dataset in context["env"]["dataset"]: type = envs.get_global_env("dataset." + dataset["name"] + ".type") if type == "QueueDataset": dataset_class = QueueDataset(context) context["dataset"][ dataset["name"]] = dataset_class.create_dataset( dataset["name"], context) context["status"] = "startup_pass"
def run_main(self, use_legacy_py_reader, with_data_parallel, places, use_double_buffer): scope = fluid.Scope() with fluid.scope_guard(scope): startup_prog, main_prog, py_reader, loss = simple_fc_net( places, use_legacy_py_reader, use_double_buffer) reader = paddle.batch(random_reader, batch_size=BATCH_SIZE) ps = places if use_double_buffer else fluid.cpu_places(len(places)) py_reader.set_sample_list_generator( reader, places=ps if py_reader.iterable else None) exe = fluid.Executor(place=places[0]) exe.run(startup_prog) prog = fluid.CompiledProgram(main_prog) if with_data_parallel: prog = prog.with_data_parallel(loss_name=loss.name, places=places) step = 0 step_list = [] loss_list = [] start_t = time.time() if not py_reader.iterable: for _ in six.moves.range(EPOCH_NUM): step = 0 py_reader.start() while True: try: L, = exe.run(program=prog, fetch_list=[loss], use_program_cache=True) loss_list.append(np.mean(L)) step += 1 except fluid.core.EOFException: py_reader.reset() break step_list.append(step) else: for _ in six.moves.range(EPOCH_NUM): step = 0 for d in py_reader(): assert len(d) == len(places), "{} != {}".format( len(d), len(places)) for i, item in enumerate(d): image = item['image'] label = item['label'] assert image.shape() == [BATCH_SIZE, 784] assert label.shape() == [BATCH_SIZE, 1] assert image._place()._equals(ps[i]) assert label._place()._equals(ps[i]) L, = exe.run(program=prog, feed=d, fetch_list=[loss], use_program_cache=True) loss_list.append(np.mean(L)) step += 1 step_list.append(step) end_t = time.time() ret = { "time": end_t - start_t, "step": step_list, "loss": np.array(loss_list) } return ret
def build_network(self, context): context["model"] = {} for model_dict in context["phases"]: context["model"][model_dict["name"]] = {} train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() dataset_name = model_dict["dataset_name"] with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): with fluid.scope_guard(scope): model_path = envs.os_path_adapter( envs.workspace_adapter(model_dict["model"])) model = envs.lazy_instance_by_fliename( model_path, "Model")(context["env"]) if context["is_infer"]: model._infer_data_var = model.input_data( is_infer=context["is_infer"], dataset_name=model_dict["dataset_name"]) else: model._data_var = model.input_data( dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader( is_infer=context["is_infer"]) data_loader = DataLoader(context) data_loader.get_dataloader(context, dataset_name, model._data_loader) if context["is_infer"]: model.net(model._infer_data_var, context["is_infer"]) else: model.net(model._data_var, context["is_infer"]) optimizer = model.optimizer() optimizer.minimize(model._cost) context["model"][ model_dict["name"]]["main_program"] = train_program context["model"][ model_dict["name"]]["startup_program"] = startup_program context["model"][model_dict["name"]]["scope"] = scope context["model"][model_dict["name"]]["model"] = model context["model"][model_dict["name"]][ "default_main_program"] = train_program.clone() context["model"][model_dict["name"]]["compiled_program"] = None context["dataset"] = {} for dataset in context["env"]["dataset"]: type = envs.get_global_env("dataset." + dataset["name"] + ".type") if type == "QueueDataset": dataset_class = QueueDataset(context) context["dataset"][ dataset["name"]] = dataset_class.create_dataset( dataset["name"], context) context["status"] = "startup_pass"
def test_pslib_1(self): """Test cases for pslib.""" import paddle.fluid as fluid from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.parameter_server.pslib import \ fleet_embedding, _prepare_params, _fleet_embedding, \ _fleet_embedding_v2, FLEET_GLOBAL_DICT from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker try: import netifaces except: print("warning: no netifaces, skip test_pslib_1") return os.environ["POD_IP"] = "127.0.0.1" os.environ["PADDLE_PORT"] = "36001" os.environ["TRAINING_ROLE"] = "TRAINER" os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002" os.environ["PADDLE_TRAINER_ID"] = "0" role_maker = GeneralRoleMaker() role_maker.generate_role() place = fluid.CPUPlace() exe = fluid.Executor(place) fleet.init(role_maker) train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() global FLEET_GLOBAL_DICT with fluid.program_guard(train_program, startup_program): show = fluid.layers.data(name="show", shape=[-1, 1], \ dtype="int64", lod_level=1, append_batch_size=False) click = fluid.layers.data(name="click", shape=[-1, 1], \ dtype="int64", lod_level=1, append_batch_size=False) with fleet_embedding(click_name=click.name): emb = fluid.layers.embedding(input=show, size=[1, 1], \ is_sparse=True, is_distributed=True, \ param_attr=fluid.ParamAttr(name="embedding")) emb = fluid.layers.data_norm(input=emb, name="a", epsilon=1e-4, param_attr={ "batch_size": 1e4, "batch_sum_default": 0.0, "batch_square": 1e4 }) fc = fluid.layers.fc(input=emb, size=1, act=None) label = fluid.layers.data(name="click", shape=[-1, 1], \ dtype="int64", lod_level=1, append_batch_size=False) label_cast = fluid.layers.cast(label, dtype='float32') cost = fluid.layers.log_loss(fc, label_cast) try: adam = fluid.optimizer.Adam(learning_rate=0.000005) adam = fleet.distributed_optimizer( adam, strategy={ "embedding": { "sparse_accessor_class": "DownpourSparseValueAccessor" } }) adam.minimize([cost], [scope]) except: print("do not support pslib test, skip") return FLEET_GLOBAL_DICT["cur_accessor"] = "DownpourCtrAccessor" try: _prepare_params(input=show, size=[1, 1]) except: print("catch expected exception of param_attr=None") try: _prepare_params(input=show, size=[1, 1], param_attr=fluid.ParamAttr()) except: print("catch expected exception of name=None") try: tmp = fluid.ParamAttr(name="embedding") _prepare_params(input=show, size=1, param_attr=tmp) except: print("catch expected exception of size not list") try: tmp = fluid.ParamAttr(name="embedding") _prepare_params(input=show, size=[-1, 12], param_attr=tmp) except: print("catch expected exception of size not equal") try: tmp = fluid.ParamAttr(name="embedding") _prepare_params(input=show, size=[-1, 1], param_attr=tmp, is_sparse=False) except: print("catch expected exception of is_sparse=False") try: tmp = fluid.ParamAttr(name="embedding") _prepare_params(input=show, size=[-1, 1], param_attr=tmp, \ is_sparse=True, is_distributed=False) except: print("catch expected exception of is_distributed=False") try: _prepare_params(input=show, size=[-1, 1], \ param_attr=fluid.ParamAttr(name="embedding"), \ is_sparse=True, is_distributed=True, dtype="abc") except: print("catch expected exception of unknown dtype") try: FLEET_GLOBAL_DICT["emb_to_accessor"]["embedding"] = "unknown" tmp = fluid.ParamAttr(name="embedding") _prepare_params(input=show, size=[-1, 1], param_attr=tmp) except: print("catch expected exception of unknown accessor") FLEET_GLOBAL_DICT["cur_accessor"] = "DownpourCtrAccessor" try: _fleet_embedding(input=show, size=[-1, 1], is_sparse=True, \ is_distributed=True, dtype="float32", \ param_attr=fluid.ParamAttr(name="embedding")) except: print("catch expected exception of unknown accessor") try: _fleet_embedding_v2(input=show, size=[-1, 1], is_sparse=True, \ is_distributed=True, dtype="float32", \ param_attr=fluid.ParamAttr(name="embedding")) except: print("catch expected exception of unknown accessor") adam1 = fluid.optimizer.Adam(learning_rate=0.000005) adam1 = fleet.distributed_optimizer( adam1, strategy={ "embedding": { "sparse_accessor_class": "DownpourSparseValueAccessor" } }) try: pre = FLEET_GLOBAL_DICT["emb_to_table"] FLEET_GLOBAL_DICT["emb_to_table"] = {} adam1.minimize([cost], [scope]) except: FLEET_GLOBAL_DICT["emb_to_table"] = pre print("catch expected exception of empty emb_to_table") try: pre = FLEET_GLOBAL_DICT["emb_to_table"] FLEET_GLOBAL_DICT["emb_to_table"] = {} FLEET_GLOBAL_DICT["emb_to_table"]["emb1"] = 0 adam1.minimize([cost], [scope]) except: FLEET_GLOBAL_DICT["emb_to_table"] = pre print("catch expected exception of error emb_to_table") try: adam2 = fluid.optimizer.Adam(learning_rate=0.000005) adam2 = fleet.distributed_optimizer(adam2) adam2.supported_embedding_types = [] adam2.minimize([cost], [scope]) except: print("catch expected exception of embedding_types") try: adam3 = fluid.optimizer.Adam(learning_rate=0.000005) adam3 = fleet.distributed_optimizer( adam3, strategy={ "embedding": { "sparse_accessor_class": "DownpourSparseValueAccessor", "sparse_embedx_dim": 999 } }) adam3.minimize([cost], [scope]) except: print("catch expected exception of embedx_dim error") try: adam4 = fluid.optimizer.Adam(learning_rate=0.000005) adam4 = fleet.distributed_optimizer( adam4, strategy={ "embedding": { "sparse_accessor_class": "DownpourCtrAccessor", "sparse_embedx_dim": 999 } }) adam4.minimize([cost], [scope]) except: print("catch expected exception of embedx_dim error") train_program1 = fluid.Program() startup_program1 = fluid.Program() FLEET_GLOBAL_DICT["emb_to_accessor"] = {} with fluid.program_guard(train_program1, startup_program1): show = fluid.layers.data(name="show", shape=[-1, 1], \ dtype="int64", lod_level=1, append_batch_size=False) with fleet_embedding(click_name=click.name): emb = fluid.layers.embedding(input=show, size=[1, 1], \ is_sparse=True, is_distributed=True, \ param_attr=fluid.ParamAttr(name="embedding")) with fleet_embedding(click_name=click.name): emb1 = fluid.embedding(input=show, size=[1, 1], \ is_sparse=True, is_distributed=True, \ param_attr=fluid.ParamAttr(name="embedding"))
data = fluid.data(name='data', shape=[-1, 10], dtype='float32') fc = layers.fc(data, size=10) loss = layers.reduce_sum(fc) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) opt_out = sgd_optimizer.minimize(loss) print_program(main_program, 'program.before') grad_dict = {grad.name: grad for param, grad in opt_out[1]} coalesce_program, grad_out_dict, grad_fused, fused_shape_var = create_coalesce_program( grad_dict) print_program(coalesce_program, 'program.coalesce') scope = fluid.Scope() place = fluid.CPUPlace() exe = fluid.Executor(place) # initialize parameters exe.run(start_program, scope=scope) # pre-allocate coalesce buffer in scope and get the runtime buffer size # coalesce_program could be merged with startup program shape_array = exe.run(coalesce_program, fetch_list=[fused_shape_var.name], scope=scope) runtime_shape = shape_array[0] # rewrite the main program by replacing all original gradients to sliced variables # from gradient-fused buffer, and update the buffer size by runtime shape
def create_fake_model(program_config): ''' Create a Paddle model(in memory) according to the given config. ''' paddle.enable_static() main_program_desc = core.ProgramDesc() util_program = fluid.Program() main_block_desc = main_program_desc.block(0) var_desc = main_block_desc.var(cpt.to_bytes("feed")) var_desc.set_type(core.VarDesc.VarType.FEED_MINIBATCH) var_desc.set_persistable(True) index = 0 for name, tensor_config in program_config.inputs.items(): var_desc = main_block_desc.var(cpt.to_bytes(name)) var_desc.set_type(core.VarDesc.VarType.LOD_TENSOR) var_desc.set_dtype(convert_np_dtype_to_dtype_(tensor_config.dtype)) var_desc.set_shape(tensor_config.shape) var_desc.set_need_check_feed(True) if tensor_config.lod is not None: var_desc.set_lod_level(len(tensor_config.lod)) op_desc = main_block_desc._prepend_op() op_desc.set_type("feed") op_desc.set_input('X', ["feed"]) op_desc.set_output('Out', [name]) op_desc._set_attr("col", index) index = index + 1 save_var_map = {} for name, tensor_config in program_config.weights.items(): var_desc = main_block_desc.var(cpt.to_bytes(name)) var_desc.set_type(core.VarDesc.VarType.LOD_TENSOR) var_desc.set_dtype(convert_np_dtype_to_dtype_(tensor_config.dtype)) var_desc.set_shape(tensor_config.shape) var_desc.set_persistable(True) save_var_map[name] = util_program.global_block().create_parameter( dtype=tensor_config.dtype, shape=tensor_config.shape, type=core.VarDesc.VarType.LOD_TENSOR, name=name, initializer=NumpyArrayInitializer(tensor_config.data)) in_vars = [] for name in sorted(save_var_map.keys()): in_vars.append(save_var_map[name]) out_var = util_program.global_block().create_var( type=core.VarDesc.VarType.RAW, name="out_var_0") out_var.desc.set_persistable(True) util_program.global_block().append_op(type='save_combine', inputs={'X': in_vars}, outputs={'Y': out_var}, attrs={ 'file_path': '', 'save_to_memory': True }) for op_config in program_config.ops: op_desc = main_block_desc.append_op() op_desc.set_type(op_config.type) for name, values in op_config.inputs.items(): op_desc.set_input(name, values) for name, values in op_config.attrs.items(): op_desc._set_attr(name, values) for name, values in op_config.outputs.items(): op_desc.set_output(name, values) for v in values: var_desc = main_block_desc.var(cpt.to_bytes(v)) var_desc.set_type(core.VarDesc.VarType.LOD_TENSOR) var_desc.set_dtype(convert_np_dtype_to_dtype_(np.float32)) if op_config.outputs_dtype is not None and v in op_config.outputs_dtype.keys( ): var_desc.set_dtype( convert_np_dtype_to_dtype_(op_config.outputs_dtype[v])) op_desc.infer_var_type(main_block_desc) op_desc.infer_shape(main_block_desc) op_desc.check_attrs() for index, name in enumerate(program_config.outputs): var_desc = main_block_desc.var(cpt.to_bytes("fetch")) var_desc.set_type(core.VarDesc.VarType.FETCH_LIST) var_desc.set_need_check_feed(True) op_desc = main_block_desc.append_op() op_desc.set_type("fetch") op_desc.set_input('X', [name]) op_desc.set_output('Out', ["fetch"]) op_desc._set_attr("col", index) main_program_desc._set_version() paddle.fluid.core.save_op_version_info(main_program_desc) model = main_program_desc.serialize_to_string() util_program._sync_with_cpp() place = fluid.CPUPlace() executor = fluid.Executor(place) scope = fluid.Scope() with fluid.scope_guard(scope): executor.run(util_program) params = scope.find_var("out_var_0").get_bytes() return model, params
def infer_step(args, vocab_size, test_reader, use_cuda, i2w): """ inference function """ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) emb_size = args.emb_size batch_size = args.batch_size with fluid.scope_guard(fluid.Scope()): main_program = fluid.Program() with fluid.program_guard(main_program): values, pred = net.infer_network(vocab_size, emb_size) for epoch in range(start_index, last_index + 1): for batchid in range(args.start_batch, args.end_batch): copy_program = main_program.clone() model_path = model_dir + "/pass-" + str(epoch) + ( '/batch-' + str(batchid * args.print_step)) fluid.load(copy_program, model_path, exe) accum_num = 0 accum_num_sum = 0.0 t0 = time.time() step_id = 0 for data in test_reader(): step_id += 1 b_size = len([dat[0] for dat in data]) wa = np.array([dat[0] for dat in data ]).astype("int64").reshape(b_size) wb = np.array([dat[1] for dat in data ]).astype("int64").reshape(b_size) wc = np.array([dat[2] for dat in data ]).astype("int64").reshape(b_size) label = [dat[3] for dat in data] input_word = [dat[4] for dat in data] para = exe.run( copy_program, feed={ "analogy_a": wa, "analogy_b": wb, "analogy_c": wc, "all_label": np.arange(vocab_size).reshape(vocab_size), }, fetch_list=[pred.name, values], return_numpy=False) pre = np.array(para[0]) val = np.array(para[1]) for ii in range(len(label)): top4 = pre[ii] accum_num_sum += 1 for idx in top4: if int(idx) in input_word[ii]: continue if int(idx) == int(label[ii][0]): accum_num += 1 break if step_id % 1 == 0: print("step:%d %d " % (step_id, accum_num)) print("epoch:%d \t acc:%.3f " % (epoch, 1.0 * accum_num / accum_num_sum)) t1 = time.time()
# Author: Acer Zhang # Datetime:2020/5/10 14:19 # Copyright belongs to the author. # Please indicate the source for reprinting. import paddle.fluid as fluid import numpy as np import PIL.Image as Image img_path = r"D:\DLExample\easy07_visual_feature_map\data\1.jpg" save_model_path = "./model" img1 = Image.open(img_path).convert('L') img1 = np.array(img1).reshape(1, 1, 30, 15).astype(np.float32) # NCHW格式 img1 /= 255 # 归一化以提升训练效果 # 防止Notebook中出现冲突等问题,使用新的ScopeS、来保证程序的健壮性。 new_scope = fluid.Scope() place = fluid.CPUPlace() exe = fluid.Executor(place) with fluid.scope_guard(new_scope): # 读取预测模型 infer_program, feed_name, fetch_list = fluid.io.load_inference_model( save_model_path, exe) outs = exe.run(program=infer_program, feed={feed_name[0]: img1}, fetch_list=fetch_list) print("概率分布:", np.round(outs[0][0], 2)) # 保留2位整数 print("推理结果:", np.argmax(outs[0][0])) # 获取概率最高的标签索引
exe.run(fluid.default_startup_program()) ##开始训练,迭代500次 for i in range(500): outs = exe.run(feed={ 'x': train_data, 'y': y_true }, fetch_list=[y_predict.name, avg_cost.name]) if i % 50 == 0: print('iter={:.0f},cost={}'.format(i, outs[1][0])) # 存储训练结果 params_dirname = "result" fluid.io.save_inference_model(params_dirname, ['x'], [y_predict], exe) # 开始预测 infer_exe = fluid.Executor(cpu) inference_scope = fluid.Scope() # 加载训练好的模型 with fluid.scope_guard(inference_scope): [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(params_dirname, infer_exe) # 生成测试数据 test = np.array([[[9], [5], [2], [10]]]).astype('float32') # 进行预测 results = infer_exe.run(inference_program, feed={"x": test}, fetch_list=fetch_targets) # 给出题目为 【9,5,2,10】 输出y=4*9+6*5+7*2+10*2的值 print("9a+5b+2c+10d={}".format(results[0][0]))
def test_pslib_1(self): """Test cases for pslib.""" import paddle.fluid as fluid from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.parameter_server.pslib import PSLib from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker try: import netifaces except: print("warning: no netifaces, skip test_pslib_1") return os.environ["POD_IP"] = "127.0.0.1" os.environ["PADDLE_PORT"] = "36001" os.environ["TRAINING_ROLE"] = "TRAINER" os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002" os.environ["PADDLE_TRAINER_ID"] = "0" role_maker = GeneralRoleMaker() #role_maker.generate_role() place = fluid.CPUPlace() exe = fluid.Executor(place) #fleet.init(role_maker) train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() with fluid.program_guard(train_program, startup_program): show = fluid.layers.data(name="show", shape=[-1, 1], \ dtype="int64", lod_level=1, append_batch_size=False) emb = fluid.layers.embedding(input=show, size=[1, 1], \ is_sparse=True, is_distributed=True, \ param_attr=fluid.ParamAttr(name="embedding")) fc = fluid.layers.fc(input=emb, size=1, act=None) label = fluid.layers.data(name="click", shape=[-1, 1], \ dtype="int64", lod_level=1, append_batch_size=False) label_cast = fluid.layers.cast(label, dtype='float32') cost = fluid.layers.log_loss(fc, label_cast) try: adam = fluid.optimizer.Adam(learning_rate=0.000005) adam = fleet.distributed_optimizer(adam, strategy={ "embedding": { "sparse_accessor_class": "DownpourCtrAccessor" } }) adam.minimize([cost], [scope]) fleet.run_server() except: print("do not support pslib test, skip") return try: # worker should call these methods instead of server # the following is only for test when with_pslib=off def test_func(): """ it is only a test function """ return True fleet._role_maker.is_first_worker = test_func fleet._role_maker._barrier_worker = test_func fleet.save_model("./model_000") fleet.save_one_table(0, "./model_001") fleet.save_one_table(0, "./model_002", prefix="hahaha") fleet.load_model("./model_0003") fleet.load_one_table(0, "./model_004") fleet.confirm() fleet.revert() except: print("do not support pslib test, skip") return