def get_places(self): place_list = [fluid.cpu_places(1), fluid.cpu_places(4)] if fluid.is_compiled_with_cuda(): place_list.extend( [fluid.cuda_places(0), fluid.cuda_places([0, 1])]) return place_list
def run_main(self, reader, use_sample_generator, iterable, drop_last): image = fluid.layers.data(name='image', dtype='float32', shape=[784]) label = fluid.layers.data(name='label', dtype='int64', shape=[1]) py_reader = fluid.io.PyReader(feed_list=[image, label], capacity=16, iterable=iterable, use_double_buffer=False) batch_reader = paddle.batch(reader, self.batch_size, drop_last) all_datas = self.generate_all_data(batch_reader) if not use_sample_generator: py_reader.decorate_sample_list_generator(batch_reader, places=fluid.cpu_places()) else: py_reader.decorate_sample_generator(reader, self.batch_size, drop_last, places=fluid.cpu_places()) if drop_last: batch_num = int(self.sample_num / self.batch_size) else: batch_num = math.ceil(float(self.sample_num) / self.batch_size) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) for _ in range(self.epoch_num): if py_reader.iterable: step = 0 for data in py_reader(): img, lbl = exe.run(feed=data, fetch_list=[image, label]) self.assertArrayEqual(img, all_datas[step][0]) self.assertArrayEqual(lbl, all_datas[step][1]) step += 1 self.assertEqual(step, len(all_datas)) else: step = 0 try: py_reader.start() while True: img, lbl = exe.run(fetch_list=[image, label]) self.assertArrayEqual(img, all_datas[step][0]) self.assertArrayEqual(lbl, all_datas[step][1]) step += 1 except fluid.core.EOFException: py_reader.reset() self.assertEqual(step, len(all_datas)) break
def main(args): config = get_config(args.config, overrides=args.override, show=True) use_gpu = config.get("use_gpu", True) places = fluid.cuda_places() if use_gpu else fluid.cpu_places() startup_prog = fluid.Program() valid_prog = fluid.Program() valid_dataloader, valid_fetchs = program.build(config, valid_prog, startup_prog, is_train=False, is_distributed=False) valid_prog = valid_prog.clone(for_test=True) exe = fluid.Executor(places[0]) exe.run(startup_prog) init_model(config, valid_prog, exe) valid_reader = Reader(config, 'valid')() valid_dataloader.set_sample_list_generator(valid_reader, places) compiled_valid_prog = program.compile(config, valid_prog) program.run(valid_dataloader, exe, compiled_valid_prog, valid_fetchs, -1, 'eval')
def __init__(self, model_type): self.model_type = model_type # 现有的CV模型都有这个属性,而这个属且也需要在eval时用到 self.num_classes = None self.labels = None self.version = paddlex.__version__ if paddlex.env_info['place'] == 'cpu': self.places = fluid.cpu_places() else: self.places = fluid.cuda_places() self.exe = fluid.Executor(self.places[0]) self.train_prog = None self.test_prog = None self.parallel_train_prog = None self.train_inputs = None self.test_inputs = None self.train_outputs = None self.test_outputs = None self.train_data_loader = None self.eval_metrics = None # 若模型是从inference model加载进来的,无法调用训练接口进行训练 self.trainable = True # 是否使用多卡间同步BatchNorm均值和方差 self.sync_bn = False # 当前模型状态 self.status = 'Normal' # 已完成迭代轮数,为恢复训练时的起始轮数 self.completed_epochs = 0 self.scope = fluid.global_scope() # 线程池,在模型在预测时用于对输入数据以图片为单位进行并行处理 # 主要用于batch_predict接口 thread_num = mp.cpu_count() if mp.cpu_count() < 8 else 8 self.thread_pool = mp.pool.ThreadPool(thread_num)
def train(use_cuda): # define program train_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): # For training: # inputs = [src, src_sequence_length, trg, trg_sequence_length, label] inputs, loader = data_func(is_train=True) logits = model_func(inputs, is_train=True) loss = loss_func(logits, inputs[-1], inputs[-2]) optimizer = optimizer_func() optimizer.minimize(loss) # define data source places = fluid.cuda_places() if use_cuda else fluid.cpu_places() loader.set_batch_generator(inputs_generator(batch_size, eos_id, is_train=True), places=places) exe = fluid.Executor(places[0]) exe.run(startup_prog) prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name) EPOCH_NUM = 20 for pass_id in six.moves.xrange(EPOCH_NUM): batch_id = 0 for data in loader(): loss_val = exe.run(prog, feed=data, fetch_list=[loss])[0] print('pass_id: %d, batch_id: %d, loss: %f' % (pass_id, batch_id, loss_val)) batch_id += 1 fluid.io.save_params(exe, model_save_dir, main_program=train_prog)
def __init__(self, sent_emb_dim, word_emb_dim, sent_len, lr=0.001, bidirectional=False, dropout_prob=None, num_layers=1, use_gpu=True, emb_size_ratio=1.5): super(SkipThoughts, self).__init__(sent_len=sent_len) self.sent_emb_dim = sent_emb_dim self.word_emb_dim = word_emb_dim self.lr = lr self.bidirectional = bidirectional self.dropout_prob = dropout_prob self.num_layers = num_layers self.place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() self.dataloader_places = fluid.cuda_places( ) if use_gpu else fluid.cpu_places(8) self.built = False self.test_emb_pin = 0 self.init = False self.test_fitted = False self.emb_size_ratio = emb_size_ratio
def __init__(self, model_type): self.model_type = model_type # 现有的CV模型都有这个属性,而这个属且也需要在eval时用到 self.num_classes = None self.labels = None self.version = paddlex.__version__ if paddlex.env_info['place'] == 'cpu': self.places = fluid.cpu_places() else: self.places = fluid.cuda_places() self.exe = fluid.Executor(self.places[0]) self.train_prog = None self.test_prog = None self.parallel_train_prog = None self.train_inputs = None self.test_inputs = None self.train_outputs = None self.test_outputs = None self.train_data_loader = None self.eval_metrics = None # 若模型是从inference model加载进来的,无法调用训练接口进行训练 self.trainable = True # 是否使用多卡间同步BatchNorm均值和方差 self.sync_bn = False # 当前模型状态 self.status = 'Normal' # 已完成迭代轮数,为恢复训练时的起始轮数 self.completed_epochs = 0
def test_queue_dataset_run_3(self): """ Testcase for QueueDataset from create to run. Use CUDAPlace Use float type id """ with open("test_queue_dataset_run_a.txt", "w") as f: data = "2 1 2 2 5 4 2 2 7 2 1 3\n" data += "2 6 2 2 1 4 2 2 4 2 2 3\n" data += "2 5 2 2 9 9 2 2 7 2 1 3\n" data += "2 7 2 2 1 9 2 3 7 2 5 3\n" f.write(data) with open("test_queue_dataset_run_b.txt", "w") as f: data = "2 1 2 2 5 4 2 2 7 2 1 3\n" data += "2 6 2 2 1 4 2 2 4 2 2 3\n" data += "2 5 2 2 9 9 2 2 7 2 1 3\n" data += "2 7 2 2 1 9 2 3 7 2 5 3\n" f.write(data) slots = ["slot1", "slot2", "slot3", "slot4"] slots_vars = [] for slot in slots: var = fluid.data(name=slot, shape=[None, 1], dtype="int64", lod_level=1) slots_vars.append(var) dataset = paddle.distributed.InMemoryDataset() dataset.init(batch_size=1, thread_num=2, input_type=1, pipe_command="cat", use_var=slots_vars) dataset.set_filelist( ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) dataset.load_into_memory() exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) if self.use_data_loader: data_loader = fluid.io.DataLoader.from_dataset( dataset, fluid.cpu_places(), self.drop_last) for i in range(self.epoch_num): for data in data_loader(): exe.run(fluid.default_main_program(), feed=data) else: for i in range(self.epoch_num): try: exe.train_from_dataset(fluid.default_main_program(), dataset) except Exception as e: self.assertTrue(False) if os.path.exists("./test_queue_dataset_run_a.txt"): os.remove("./test_queue_dataset_run_a.txt") if os.path.exists("./test_queue_dataset_run_b.txt"): os.remove("./test_queue_dataset_run_b.txt")
def test_in_memory_dataset_run(self): """ Testcase for InMemoryDataset from create to run. """ with open("test_in_memory_dataset_run_a.txt", "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) with open("test_in_memory_dataset_run_b.txt", "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" data += "1 7 2 3 6 4 8 8 8 8 1 7\n" f.write(data) slots = ["slot1", "slot2", "slot3", "slot4"] slots_vars = [] for slot in slots: var = fluid.layers.data(name=slot, shape=[1], dtype="int64", lod_level=1) slots_vars.append(var) dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( "InMemoryDataset") dataset.set_batch_size(32) dataset.set_thread(3) dataset.set_filelist([ "test_in_memory_dataset_run_a.txt", "test_in_memory_dataset_run_b.txt" ]) dataset.set_pipe_command("cat") dataset.set_use_var(slots_vars) dataset.load_into_memory() dataset.set_fea_eval(1, True) dataset.slots_shuffle(["slot1"]) dataset.local_shuffle() dataset.set_generate_unique_feasigns(True, 15) dataset.generate_local_tables_unlock(0, 11, 1, 25, 15) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) if self.use_data_loader: data_loader = fluid.io.DataLoader.from_dataset( dataset, fluid.cpu_places(), self.drop_last) for i in range(self.epoch_num): for data in data_loader(): exe.run(fluid.default_main_program(), feed=data) else: for i in range(self.epoch_num): try: exe.train_from_dataset(fluid.default_main_program(), dataset) except Exception as e: self.assertTrue(False) os.remove("./test_in_memory_dataset_run_a.txt") os.remove("./test_in_memory_dataset_run_b.txt")
def infer(args): if not args.use_cpu: place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if args.use_data_parallel else fluid.CUDAPlace(0) else: place = fluid.cpu_places()[0] with fluid.dygraph.guard(place): transformer = TransFormer( 'transformer', ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, ModelHyperParams.n_head, ModelHyperParams.d_key, ModelHyperParams.d_value, ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, ModelHyperParams.prepostprocess_dropout, ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout, ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd, ModelHyperParams.weight_sharing) # load checkpoint model_dict, _ = fluid.load_dygraph(args.model_file) transformer.load_dict(model_dict) print("checkpoint loaded") # start evaluate mode transformer.eval() generator, word_dict = transformer_reader('test') reader = paddle.batch( generator, # wmt16.test(ModelHyperParams.src_vocab_size, # ModelHyperParams.trg_vocab_size), batch_size=InferTaskConfig.batch_size) id2word = word_dict # wmt16.get_dict("de", # ModelHyperParams.trg_vocab_size, # reverse=True) f = open(args.output_file, "wb") for batch in reader(): enc_inputs, dec_inputs = prepare_infer_input( batch, ModelHyperParams.eos_idx, ModelHyperParams.bos_idx, ModelHyperParams.n_head) finished_seq, finished_scores = transformer.beam_search( enc_inputs, dec_inputs, bos_id=ModelHyperParams.bos_idx, eos_id=ModelHyperParams.eos_idx, max_len=InferTaskConfig.max_out_len, alpha=InferTaskConfig.alpha) finished_seq = finished_seq.numpy() finished_scores = finished_scores.numpy() for ins in finished_seq: for beam in ins: id_list = post_process_seq(beam, ModelHyperParams.bos_idx, ModelHyperParams.eos_idx) word_list = [id2word[id] for id in id_list] sequence = " ".join(word_list) + "\n" f.write(sequence.encode("utf8")) break # only print the best
def default_exe_params(is_distributed, use_cuda, thread_num): """ Set the default execute parameters. """ gpu_id = 0 trainer_num = 1 trainer_id = 0 dist_strategy = None places = None if is_distributed: if use_cuda: role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) gpu_id = int(os.getenv("FLAGS_selected_gpus")) trainer_num = fleet.worker_num() trainer_id = fleet.worker_index() exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_experimental_executor = True exec_strategy.num_threads = 4 exec_strategy.num_iteration_per_drop_scope = 1 dist_strategy = DistributedStrategy() dist_strategy.exec_strategy = exec_strategy dist_strategy.nccl_comm_num = 2 dist_strategy.fuse_all_reduce_ops = True dist_strategy.forward_recompute = True dist_strategy.use_amp = True dist_strategy.amp_loss_scaling = 12800.0 places = fluid.cuda_places() else: print('Only gpu is supported for distributed mode at present.') exit(-1) else: if use_cuda: places = fluid.cuda_places() else: places = fluid.cpu_places(thread_num) os.environ['CPU_NUM'] = str(thread_num) if use_cuda: exe = fluid.Executor(fluid.CUDAPlace(gpu_id)) else: exe = fluid.Executor(fluid.CPUPlace()) return { 'exe': exe, 'trainer_num': trainer_num, 'trainer_id': trainer_id, 'gpu_id': gpu_id, 'dist_strategy': dist_strategy, 'places': places }
def build(self, model, data_gen, config): self.optimize(model.loss, config.optimizer_type, config.lr) self.init_and_run_ps_worker(config.ckpt_path) self.program = self.complie_program(model.loss) self.fleet = tfleet model.data_loader.decorate_batch_generator(data_gen, places=F.cpu_places()) self.config = config self.model = model
def test_set_download_cmd(self): """ Testcase for InMemoryDataset from create to run. """ filename1 = "afs:test_in_memory_dataset_run_a.txt" filename2 = "afs:test_in_memory_dataset_run_b.txt" with open(filename1, "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) with open(filename2, "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" data += "1 7 2 3 6 4 8 8 8 8 1 7\n" f.write(data) slots = ["slot1", "slot2", "slot3", "slot4"] slots_vars = [] for slot in slots: var = fluid.layers.data(name=slot, shape=[1], dtype="int64", lod_level=1) slots_vars.append(var) dataset = paddle.distributed.InMemoryDataset() dataset.init(batch_size=32, thread_num=3, pipe_command="cat", download_cmd="cat", use_var=slots_vars) dataset.set_filelist([filename1, filename2]) dataset.load_into_memory() paddle.enable_static() exe = paddle.static.Executor(paddle.CPUPlace()) startup_program = paddle.static.Program() main_program = paddle.static.Program() exe = fluid.Executor(fluid.CPUPlace()) exe.run(startup_program) if self.use_data_loader: data_loader = fluid.io.DataLoader.from_dataset( dataset, fluid.cpu_places(), self.drop_last) for i in range(self.epoch_num): for data in data_loader(): exe.run(main_program, feed=data) else: for i in range(self.epoch_num): try: exe.train_from_dataset(main_program, dataset) except Exception as e: self.assertTrue(False) os.remove(filename1) os.remove(filename2)
def test_dataset_run_with_stat(self): with open("test_in_memory_dataset_run_a.txt", "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) with open("test_in_memory_dataset_run_b.txt", "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" data += "1 7 2 3 6 4 8 8 8 8 1 7\n" f.write(data) slots = ["slot1", "slot2", "slot3", "slot4"] slots_vars = [] for slot in slots: var = fluid.layers.data(name=slot, shape=[1], dtype="int64", lod_level=1) slots_vars.append(var) dataset = paddle.distributed.InMemoryDataset() dataset._set_batch_size(32) dataset._set_thread(3) dataset.set_filelist([ "test_in_memory_dataset_run_a.txt", "test_in_memory_dataset_run_b.txt" ]) dataset._set_pipe_command("cat") dataset._set_use_var(slots_vars) dataset.load_into_memory() dataset._set_fea_eval(1, True) dataset.slots_shuffle(["slot1"]) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) if self.use_data_loader: data_loader = fluid.io.DataLoader.from_dataset( dataset, fluid.cpu_places(), self.drop_last) for i in range(self.epoch_num): for data in data_loader(): exe.run(fluid.default_main_program(), feed=data) else: for i in range(self.epoch_num): try: exe.train_from_dataset(fluid.default_main_program(), dataset) except Exception as e: self.assertTrue(False) int_stat = core.get_int_stats() # total 56 keys print(int_stat["STAT_total_feasign_num_in_mem"]) os.remove("./test_in_memory_dataset_run_a.txt") os.remove("./test_in_memory_dataset_run_b.txt")
def test_queue_dataset_run_2(self): """ Testcase for QueueDataset from create to run. Use CUDAPlace Use float type id """ with open("test_queue_dataset_run_a.txt", "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) with open("test_queue_dataset_run_b.txt", "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" data += "1 7 2 3 6 4 8 8 8 8 1 7\n" f.write(data) slots = ["slot1_f", "slot2_f", "slot3_f", "slot4_f"] slots_vars = [] for slot in slots: var = fluid.layers.data(name=slot, shape=[1], dtype="float32", lod_level=1) slots_vars.append(var) dataset = paddle.distributed.QueueDataset() dataset.init(batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) dataset.set_filelist( ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) if self.use_data_loader: data_loader = fluid.io.DataLoader.from_dataset( dataset, fluid.cpu_places(), self.drop_last) for i in range(self.epoch_num): for data in data_loader(): exe.run(fluid.default_main_program(), feed=data) else: for i in range(self.epoch_num): try: exe.train_from_dataset(fluid.default_main_program(), dataset) except Exception as e: self.assertTrue(False) if os.path.exists("./test_queue_dataset_run_a.txt"): os.remove("./test_queue_dataset_run_a.txt") if os.path.exists("./test_queue_dataset_run_b.txt"): os.remove("./test_queue_dataset_run_b.txt")
def test_main(self): place = fluid.cpu_places()[0] with fluid.dygraph.guard(place): dataset = RandomDataset(100) batch_sampler = BatchSampler(dataset=dataset, batch_size=4) # dataset is not instance of Dataset try: loader = DataLoader(dataset=batch_sampler, places=place) self.assertTrue(False) except AssertionError: pass # places is None try: loader = DataLoader(dataset=dataset, places=None) self.assertTrue(False) except AssertionError: pass # num_workers < 0 try: loader = DataLoader(dataset=dataset, places=place, num_workers=-1) self.assertTrue(False) except AssertionError: pass # timeout < 0 try: loader = DataLoader(dataset=dataset, places=place, timeout=-1) self.assertTrue(False) except AssertionError: pass # set batch_sampler and shuffle/batch_size/drop_last try: loader = DataLoader(dataset=dataset, places=place, batch_sampler=batch_sampler, shuffle=True, drop_last=True) self.assertTrue(False) except AssertionError: pass # set batch_sampler correctly try: loader = DataLoader(dataset=dataset, places=place, batch_sampler=batch_sampler) self.assertTrue(True) except AssertionError: self.assertTrue(False)
def test_main(self): places = [fluid.cpu_places(4)] if fluid.is_compiled_with_cuda(): places.append(fluid.cuda_places()) for p in places: for has_persistable in [False, True]: for use_split in [False, True]: self.run_network(p, use_split=use_split, has_persistable=has_persistable)
def get_data_run_places(args): """ 根据获取数据层(dataloader)的运行位置 :return: 运行位置 """ USE_PARALLEL = args["use_parallel"] USE_GPU = args["use_gpu"] NUM_OF_DEVICE = args["num_of_device"] if USE_PARALLEL and NUM_OF_DEVICE > 1: if USE_GPU: os.environ['CUDA_VISIBLE_DEVICES'] = str(NUM_OF_DEVICE) places = fluid.cuda_places() else: places = fluid.cpu_places(NUM_OF_DEVICE) else: if USE_GPU: places = fluid.cuda_places(0) else: places = fluid.cpu_places(1) return places
def __init__(self, num_classes=2, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, sync_bn=True): self.init_params = locals() if num_classes > 2 and (use_bce_loss or use_dice_loss): raise ValueError( "dice loss and bce loss is only applicable to binary classfication" ) if class_weight is not None: if isinstance(class_weight, list): if len(class_weight) != num_classes: raise ValueError( "Length of class_weight should be equal to number of classes" ) elif isinstance(class_weight, str): if class_weight.lower() != 'dynamic': raise ValueError( "if class_weight is string, must be dynamic!") else: raise TypeError( 'Expect class_weight is a list or string but receive {}'. format(type(class_weight))) self.num_classes = num_classes self.use_bce_loss = use_bce_loss self.use_dice_loss = use_dice_loss self.class_weight = class_weight self.ignore_index = ignore_index self.sync_bn = sync_bn self.labels = None self.env_info = get_environ_info() if self.env_info['place'] == 'cpu': self.places = fluid.cpu_places() else: self.places = fluid.cuda_places() self.exe = fluid.Executor(self.places[0]) self.train_prog = None self.test_prog = None self.parallel_train_prog = None self.train_inputs = None self.test_inputs = None self.train_outputs = None self.test_outputs = None self.train_data_loader = None self.eval_metrics = None # 当前模型状态 self.status = 'Normal'
def piecewise_decay(args): places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() step = int( math.ceil(float(args.total_images) / (args.batch_size * len(places)))) bd = [step * e for e in args.step_epochs] lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)] learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr) optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=args.momentum_rate, regularization=fluid.regularizer.L2Decay(args.l2_decay)) return learning_rate, optimizer
def cosine_decay(args): places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() step = int( math.ceil(float(args.total_images) / (args.batch_size * len(places)))) learning_rate = fluid.layers.cosine_decay(learning_rate=args.lr, step_each_epoch=step, epochs=args.num_epochs) optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=args.momentum_rate, regularization=fluid.regularizer.L2Decay(args.l2_decay)) return learning_rate, optimizer
def main(args): # construct the sample input, output, data_size = construct_sample(args) # construct the train program train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): seq2seq_model = SeqModel(args.seq_num, args.batch_size, args.hidden_size) ret_dict = seq2seq_model.build_graph() val_program = train_program.clone() with fluid.program_guard(train_program, startup_program): optimizer = fluid.optimizer.Adam(args.lr) optimizer.minimize(ret_dict.loss) places = fluid.cuda_places() if args.use_cuda else fluid.cpu_places() train_loader = fluid.io.DataLoader.from_generator( feed_list=ret_dict.feed_list, capacity=3, iterable=True) train_loader.set_batch_generator(train_reader(input, output, data_size, args.batch_size), places=places) exe = Executor(places[0]) exe.run(startup_program) # train stage:use data_loader as reader for _ in range(args.epoch): for data in train_loader(): results = exe.run(train_program, feed=data, fetch_list=ret_dict.fetch_list) print("train process loss:{}".format(results[0])) # save the model for inferenceing with fluid.program_guard(train_program, startup_program): fluid.io.save_inference_model(dirname="./model", feeded_var_names=['feat', 'lod'], \ target_vars=[ret_dict.last_predict], executor=exe, export_for_deployment=True) # val stage: use data_loader as reader val_loader = fluid.io.DataLoader.from_generator( feed_list=ret_dict.feed_list, capacity=3, iterable=True) val_loader.set_batch_generator(val_reader(input, output, data_size, output.shape[0]), places=places) for _ in range(1): for data in train_loader: results = exe.run(train_program, feed=data, fetch_list=ret_dict.fetch_list) print("val process loss:{}".format(results[0]))
def check_multi_card_fetch_var(self): if self.is_invalid_test(): return prog1, scope1, exe, loss1 = self.build_program_and_scope() scopes = [] compiled_programs = [] if self.use_cuda: places = fluid.cuda_places() else: places = fluid.cpu_places(self.device_count) for memory_optimize in [False, True]: for enable_inplace in [False, True]: prog, scope, _, loss = self.build_program_and_scope() scopes.append(scope) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = memory_optimize build_strategy.enable_inplace = enable_inplace build_strategy.fuse_all_optimizer_ops = self.fuse_all_optimizer_ops compiled_program = fluid.CompiledProgram( prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, places=places) compiled_programs.append(compiled_program) repeated_var_names = self.get_all_vars(prog1) * 2 random.shuffle(repeated_var_names) # add some random for fetch_var in repeated_var_names: for _ in range(4): fetch_vals = [] for scope, compiled_prog in zip(scopes, compiled_programs): with fluid.scope_guard(scope): fetch_val, = exe.run(compiled_prog, feed=feed_dict, fetch_list=[fetch_var]) fetch_vals.append(fetch_val) for item in fetch_vals: self.assertTrue(np.array_equal(fetch_vals[0], item)) self.assertTrue( np.array_equal(fetch_vals[0], item), "error var name: {}, fetch_vals[0]: {}, item: {}". format(fetch_var, fetch_vals[0][~np.equal(fetch_vals[0], item)], item[~np.equal(fetch_vals[0], item)]))
def main(args): input, output, data_size = construct_sample(args) places = fluid.cuda_places() if args.use_cuda else fluid.cpu_places() exe = Executor(places[0]) [inference_program, feed_target_names, fetch_targets] = (fluid.io.load_inference_model(dirname="./model", executor=exe)) feat, lod = test_reader() result = exe.run(inference_program, feed={ feed_target_names[0]: feat, feed_target_names[1]: lod }, fetch_list=fetch_targets) print(result[0].shape) output_final_result(result[0])
def run_network(self, iterable, use_cuda, drop_last): x = fluid.data(shape=[None, 1], name='x', dtype='float32') places = fluid.cuda_places() if use_cuda else fluid.cpu_places(4) loader = fluid.io.DataLoader.from_generator(feed_list=[x], capacity=16, iterable=iterable, drop_last=drop_last) y = fluid.layers.fc(x, size=10) loss = fluid.layers.reduce_mean(y) exe = fluid.Executor(places[0]) exe.run(fluid.default_startup_program()) prog = fluid.CompiledProgram( fluid.default_main_program()).with_data_parallel( places=places, loss_name=loss.name) loader.set_batch_generator(self.create_reader(), places=places if iterable else None) for _ in six.moves.range(self.epoch_num): actual_batch_num = 0 if loader.iterable: for feed_data in loader(): x_data, = exe.run(prog, feed=feed_data, fetch_list=[x]) self.assertEqual(x_data.shape[0] % self.batch_size, 0) self.assertTrue(x_data.shape[0] != 0) actual_batch_num += int(x_data.shape[0] / self.batch_size) else: loader.start() try: while True: x_data, = exe.run(prog, fetch_list=[x]) self.assertEqual(x_data.shape[0] % self.batch_size, 0) self.assertTrue(x_data.shape[0] != 0) actual_batch_num += int(x_data.shape[0] / self.batch_size) except fluid.core.EOFException: loader.reset() if not drop_last or len(places) == 1: self.assertEqual(self.batch_num, actual_batch_num) else: self.assertGreater(self.batch_num, actual_batch_num)
def test_analysis_helper(self): image = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') model = MobileNet() out = model.net(input=image, class_dim=10) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) optimizer = fluid.optimizer.Momentum( momentum=0.9, learning_rate=0.01, regularization=fluid.regularizer.L2Decay(4e-5)) optimizer.minimize(avg_cost) main_prog = fluid.default_main_program() places = fluid.cuda_places() if fluid.is_compiled_with_cuda( ) else fluid.cpu_places() exe = fluid.Executor(places[0]) train_reader = paddle.fluid.io.batch(paddle.dataset.mnist.train(), batch_size=64) train_loader = fluid.io.DataLoader.from_generator( feed_list=[image, label], capacity=512, use_double_buffer=True, iterable=True) train_loader.set_sample_list_generator(train_reader, places) exe.run(fluid.default_startup_program()) vars = ['conv2d_0.tmp_0', 'fc_0.tmp_0', 'fc_0.tmp_1', 'fc_0.tmp_2'] var_collector1 = VarCollector(main_prog, vars, use_ema=True) values = var_collector1.abs_max_run(train_loader, exe, step=None, loss_name=avg_cost.name) vars = [v.name for v in main_prog.list_vars() if v.persistable] var_collector2 = VarCollector(main_prog, vars, use_ema=False) values = var_collector2.run(train_loader, exe, step=None, loss_name=avg_cost.name) var_collector2.pdf(values)
def infer(use_cuda): # define program infer_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): inputs, loader = data_func(is_train=False) predict_seqs = model_func(inputs, is_train=False) # define data source places = fluid.cuda_places() if use_cuda else fluid.cpu_places() loader.set_batch_generator(inputs_generator(batch_size, eos_id, is_train=False), places=places) src_idx2word = paddle.dataset.wmt16.get_dict("en", source_dict_size, reverse=True) trg_idx2word = paddle.dataset.wmt16.get_dict("de", target_dict_size, reverse=True) exe = fluid.Executor(places[0]) exe.run(startup_prog) fluid.io.load_params(exe, model_save_dir, main_program=infer_prog) prog = fluid.CompiledProgram(infer_prog).with_data_parallel() for data in loader(): seq_ids = exe.run(prog, feed=data, fetch_list=[predict_seqs])[0] for ins_idx in range(seq_ids.shape[0]): print("Original sentence:") src_seqs = np.array(data[0]["src"]) print(" ".join([ src_idx2word[idx] for idx in src_seqs[ins_idx][1:] if idx != eos_id ])) print("Translated sentence:") for beam_idx in range(beam_size): seq = [ trg_idx2word[idx] for idx in seq_ids[ins_idx, :, beam_idx] if idx != eos_id ] print(" ".join(seq).encode("utf8"))
def start(self, places=None): """start Pyreader""" if places is None: places = F.cuda_places() if F.core.is_compiled_with_cuda( ) else F.cpu_places() #assert self.pyreader is not None, 'use Dataset.features to build net first, then start dataset' def _gen(): try: for idx, i in enumerate(self.generator()): yield i except Exception as e: log.exception(e) raise e r = F.io.PyReader(feed_list=self.placeholders(), capacity=50, iterable=True) r.decorate_batch_generator(_gen, places=places) return r()
def run_without_worker_done(self, use_shared_memory=True): try: place = fluid.cpu_places()[0] with fluid.dygraph.guard(place): dataset = RandomDataset(800) # test init_fn def _init_fn(worker_id): pass # test collate_fn def _collate_fn(sample_list): return [ np.stack( s, axis=0) for s in list(zip(*sample_list)) ] loader = DataLoader( dataset, num_workers=1, places=place, use_shared_memory=use_shared_memory) assert loader.num_workers > 0, \ "go to AssertionError and pass in Mac and Windows" loader = iter(loader) print("loader length", len(loader)) indices_queue = multiprocessing.Queue() for i in range(10): indices_queue.put([i, i + 10]) indices_queue.put(None) _worker_loop(loader._dataset, 0, indices_queue, loader._data_queue, loader._workers_done_event, True, _collate_fn, _init_fn, 0, 1, loader._use_shared_memory) self.assertTrue(False) except AssertionError: pass except Exception as e: print("Exception", e) import sys sys.stdout.flush() self.assertTrue(False)
def get_next(self, exe, program): result = [] assert isinstance(exe, fluid.Executor), "exe must be Executor" use_cuda = isinstance(exe.place, fluid.CUDAPlace) if isinstance(program, fluid.CompiledProgram): if program._is_data_parallel: use_executor = False if program._places is None: device_num = len(fluid.cuda_places()) if use_cuda else len( fluid.cpu_places()) else: device_num = len(program._places) else: use_executor = True device_num = 1 else: use_executor = True device_num = 1 if use_executor: return self._feed_executor() else: return self._feed_parallel_executor(device_num)