def test(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() test_prog = fluid.Program() test_startup = fluid.Program() with fluid.program_guard(test_prog, test_startup): with fluid.unique_name.guard(): test_pyreader, next_sent_acc, mask_lm_loss, total_loss = create_model( pyreader_name='test_reader', ernie_config=ernie_config) test_prog = test_prog.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(test_startup) predict = predict_wrapper( args, exe, ernie_config, test_prog=test_prog, pyreader=test_pyreader, fetch_list=[next_sent_acc.name, mask_lm_loss.name, total_loss.name]) log.info("test begin") loss, lm_loss, acc, steps, speed = predict() log.info( "[test_set] loss: %f, global ppl: %f, next_sent_acc: %f, speed: %f steps/s" % (np.mean(np.array(loss) / steps), np.exp(np.mean(np.array(lm_loss) / steps)), np.mean(np.array(acc) / steps), speed))
def extract_weights(args): # add ERNIR to environment print('extract weights start'.center(60, '=')) startup_prog = fluid.Program() test_prog = fluid.Program() place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) args.max_seq_len = 512 args.use_fp16 = False args.num_labels = 2 args.loss_scaling = 1.0 ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): create_model(args, pyreader_name='train', ernie_config=ernie_config) fluid.io.load_vars(exe, args.init_pretraining_params, main_program=test_prog, predicate=if_exist) state_dict = collections.OrderedDict() weight_map = build_weight_map() for ernie_name, pytorch_name in weight_map.items(): fluid_tensor = fluid.global_scope().find_var(ernie_name).get_tensor() fluid_array = np.array(fluid_tensor, dtype=np.float32) if 'w_0' in ernie_name: fluid_array = fluid_array.transpose() state_dict[pytorch_name] = fluid_array print(f'{ernie_name} -> {pytorch_name} {fluid_array.shape}') print('extract weights done!'.center(60, '=')) return state_dict
def save_model(state_dict, dump_path): print('save model start'.center(60, '=')) if not os.path.exists(dump_path): os.makedirs(dump_path) # save model for key in state_dict: state_dict[key] = torch.FloatTensor(state_dict[key]) torch.save(state_dict, os.path.join(dump_path, "pytorch_model.bin")) print('finish save model') # save config ernie_config = ErnieConfig(args.ernie_config_path)._config_dict # set layer_norm_eps, more detail see: https://github.com/PaddlePaddle/LARK/issues/75 ernie_config['layer_norm_eps'] = 1e-5 with open(os.path.join(dump_path, "config.json"), 'wt', encoding='utf-8') as f: json.dump(ernie_config, f, indent=4) print('finish save config') # save vocab.txt vocab_f = open(os.path.join(dump_path, "vocab.txt"), "wt", encoding='utf-8') with open("./ERNIE/config/vocab.txt", "rt", encoding='utf-8') as f: for line in f: data = line.strip().split("\t") vocab_f.writelines(data[0] + "\n") vocab_f.close() print('finish save vocab') print('save model done!'.center(60, '='))
def main(args): args = parser.parse_args() ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.ExtractEmbeddingReader( vocab_path=args.vocab_path, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case) startup_prog = fluid.Program() data_generator = reader.data_generator(input_file=args.data_set, batch_size=args.batch_size, epoch=1, shuffle=False) total_examples = reader.get_num_examples(args.data_set) print("Device count: %d" % dev_count) print("Total num examples: %d" % total_examples) infer_program = fluid.Program() with fluid.program_guard(infer_program, startup_prog): with fluid.unique_name.guard(): pyreader, graph_vars = create_model(args, pyreader_name='reader', ernie_config=ernie_config) infer_program = infer_program.clone(for_test=True) exe.run(startup_prog) if args.init_pretraining_params: init_pretraining_params(exe, args.init_pretraining_params, main_program=startup_prog) else: raise ValueError( "WARNING: args 'init_pretraining_params' must be specified") exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = dev_count pyreader.set_batch_generator(data_generator) pyreader.start() total_cls_emb = [] total_top_layer_emb = [] total_labels = [] while True: try: cls_emb, unpad_top_layer_emb = exe.run( program=infer_program, fetch_list=[ graph_vars["cls_embeddings"].name, graph_vars["top_layer_embeddings"].name ], return_numpy=False) # batch_size * embedding_size total_cls_emb.append(np.array(cls_emb)) total_top_layer_emb.append(np.array(unpad_top_layer_emb)) except fluid.core.EOFException: break total_cls_emb = np.concatenate(total_cls_emb) total_top_layer_emb = np.concatenate(total_top_layer_emb) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) else: raise RuntimeError('output dir exists: %s' % args.output_dir) with open(os.path.join(args.output_dir, "cls_emb.npy"), "wb") as cls_emb_file: np.save(cls_emb_file, total_cls_emb) with open(os.path.join(args.output_dir, "top_layer_emb.npy"), "wb") as top_layer_emb_file: np.save(top_layer_emb_file, total_top_layer_emb)
def train(args): log.info("pretraining start") ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() train_program = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, next_sent_acc, mask_lm_loss, total_loss = create_model( pyreader_name='train_reader', ernie_config=ernie_config) scheduled_lr, _ = optimization( loss=total_loss, warmup_steps=args.warmup_steps, num_train_steps=args.num_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio) test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, next_sent_acc, mask_lm_loss, total_loss = create_model( pyreader_name='test_reader', ernie_config=ernie_config) test_prog = test_prog.clone(for_test=True) if len(fluid.cuda_places()) == 0: raise RuntimeError('not cuda device cound, check ur env setting') if args.use_cuda: place = fluid.cuda_places()[0] dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) log.info("Device count %d" % dev_count) log.info("theoretical memory usage: ") log.info( fluid.contrib.memory_usage(program=train_program, batch_size=args.batch_size // args.max_seq_len)) nccl2_num_trainers = 1 nccl2_trainer_id = 0 log.info("args.is_distributed: %s" % args.is_distributed) if args.is_distributed: worker_endpoints_env = os.getenv("PADDLE_TRAINER_ENDPOINTS") worker_endpoints = worker_endpoints_env.split(",") trainers_num = len(worker_endpoints) current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") trainer_id = worker_endpoints.index(current_endpoint) if trainer_id == 0: log.info("train_id == 0, sleep 60s") time.sleep(60) log.info("worker_endpoints:{} trainers_num:{} current_endpoint:{} \ trainer_id:{}".format(worker_endpoints, trainers_num, current_endpoint, trainer_id)) # prepare nccl2 env. config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" t = fluid.DistributeTranspiler(config=config) t.transpile(trainer_id, trainers=worker_endpoints_env, current_endpoint=current_endpoint, program=train_program, startup_program=startup_prog) nccl2_num_trainers = trainers_num nccl2_trainer_id = trainer_id exe = fluid.Executor(place) exe.run(startup_prog) if args.init_checkpoint and args.init_checkpoint != "": init_checkpoint(exe, args.init_checkpoint, train_program, args.use_fp16) data_reader = ErnieDataReader(filelist=args.train_filelist, batch_size=args.batch_size, vocab_path=args.vocab_path, voc_size=ernie_config['vocab_size'], epoch=args.epoch, max_seq_len=args.max_seq_len, generate_neg_sample=args.generate_neg_sample) exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = min(10, args.skip_steps) build_strategy = fluid.BuildStrategy() build_strategy.remove_unnecessary_lock = False train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=total_loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) if args.valid_filelist and args.valid_filelist != "": predict = predict_wrapper(args, exe, ernie_config, test_prog=test_prog, pyreader=test_pyreader, fetch_list=[ next_sent_acc.name, mask_lm_loss.name, total_loss.name ]) train_pyreader.set_batch_generator(data_reader.data_generator()) train_pyreader.start() steps = 0 cost = [] lm_cost = [] acc = [] time_begin = time.time() while steps < args.num_train_steps: try: steps += nccl2_num_trainers skip_steps = args.skip_steps * nccl2_num_trainers if nccl2_trainer_id != 0: train_exe.run(fetch_list=[]) continue if steps % skip_steps != 0: train_exe.run(fetch_list=[]) else: each_next_acc, each_mask_lm_cost, each_total_cost, np_lr = train_exe.run( fetch_list=[ next_sent_acc.name, mask_lm_loss.name, total_loss.name, scheduled_lr.name ]) acc.extend(each_next_acc) lm_cost.extend(each_mask_lm_cost) cost.extend(each_total_cost) log.info("feed_queue size %d" % train_pyreader.queue.size()) time_end = time.time() used_time = time_end - time_begin epoch, current_file_index, total_file, current_file, mask_type = data_reader.get_progress( ) log.info("current learning_rate:%f" % np_lr[0]) log.info( "epoch: %d, progress: %d/%d, step: %d, loss: %f, " "ppl: %f, next_sent_acc: %f, speed: %f steps/s, file: %s, mask_type: %s" % (epoch, current_file_index, total_file, steps, np.mean(np.array(cost)), np.mean(np.exp( np.array(lm_cost))), np.mean(np.array(acc)), skip_steps / used_time, current_file, mask_type)) cost = [] lm_cost = [] acc = [] time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if args.valid_filelist and steps % args.validation_steps == 0: vali_cost, vali_lm_cost, vali_acc, vali_steps, vali_speed = predict( ) log.info( "[validation_set] epoch: %d, step: %d, " "loss: %f, global ppl: %f, batch-averged ppl: %f, " "next_sent_acc: %f, speed: %f steps/s" % (epoch, steps, np.mean(np.array(vali_cost) / vali_steps), np.exp(np.mean(np.array(vali_lm_cost) / vali_steps)), np.mean(np.exp(np.array(vali_lm_cost) / vali_steps)), np.mean(np.array(vali_acc) / vali_steps), vali_speed)) except fluid.core.EOFException: train_pyreader.reset() break