def main(args): paddle.seed(12345) # load config config = load_yaml(args.config_yaml) config["yaml_path"] = args.config_yaml config["config_abs_dir"] = args.abs_dir # modify config from command if args.opt: for parameter in args.opt: parameter = parameter.strip() key, value = parameter.split("=") config[key] = value # load static model class static_model_class = load_static_model_class(config) input_data = static_model_class.create_feeds() input_data_names = [data.name for data in input_data] fetch_vars = static_model_class.net(input_data) #infer_target_var = model.infer_target_var logger.info("cpu_num: {}".format(os.getenv("CPU_NUM"))) static_model_class.create_optimizer() use_gpu = config.get("runner.use_gpu", True) use_auc = config.get("runner.use_auc", False) use_visual = config.get("runner.use_visual", False) use_inference = config.get("runner.use_inference", False) auc_num = config.get("runner.auc_num", 1) train_data_dir = config.get("runner.train_data_dir", None) epochs = config.get("runner.epochs", None) print_interval = config.get("runner.print_interval", None) model_save_path = config.get("runner.model_save_path", "model_output") model_init_path = config.get("runner.model_init_path", None) batch_size = config.get("runner.train_batch_size", None) reader_type = config.get("runner.reader_type", "DataLoader") os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1)) logger.info("**************common.configs**********") logger.info( "use_gpu: {}, use_visual: {}, train_batch_size: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}" .format(use_gpu, use_visual, batch_size, train_data_dir, epochs, print_interval, model_save_path)) logger.info("**************common.configs**********") place = paddle.set_device('gpu' if use_gpu else 'cpu') exe = paddle.static.Executor(place) # initialize exe.run(paddle.static.default_startup_program()) last_epoch_id = config.get("last_epoch", -1) # Create a log_visual object and store the data in the path if use_visual: from visualdl import LogWriter log_visual = LogWriter(args.abs_dir + "/visualDL_log/train") else: log_visual = None step_num = 0 if reader_type == 'QueueDataset': dataset, file_list = get_reader(input_data, config) elif reader_type == 'DataLoader': train_dataloader = create_data_loader(config=config, place=place) for epoch_id in range(last_epoch_id + 1, epochs): epoch_begin = time.time() if use_auc: reset_auc(auc_num) if reader_type == 'DataLoader': fetch_batch_var, step_num = dataloader_train( epoch_id, train_dataloader, input_data_names, fetch_vars, exe, config, use_visual, log_visual, step_num) metric_str = "" for var_idx, var_name in enumerate(fetch_vars): metric_str += "{}: {}, ".format(var_name, fetch_batch_var[var_idx]) logger.info("epoch: {} done, ".format(epoch_id) + metric_str + "epoch time: {:.2f} s".format(time.time() - epoch_begin)) elif reader_type == 'QueueDataset': fetch_batch_var = dataset_train(epoch_id, dataset, fetch_vars, exe, config) logger.info("epoch: {} done, ".format(epoch_id) + "epoch time: {:.2f} s".format(time.time() - epoch_begin)) else: logger.info("reader type wrong") save_static_model(paddle.static.default_main_program(), model_save_path, epoch_id, prefix='rec_static') if use_inference: feed_var_names = config.get("runner.save_inference_feed_varnames", []) feedvars = [] fetch_var_names = config.get( "runner.save_inference_fetch_varnames", []) fetchvars = [] for var_name in feed_var_names: if var_name not in paddle.static.default_main_program( ).global_block().vars: raise ValueError( "Feed variable: {} not in default_main_program, global block has follow vars: {}" .format( var_name, paddle.static.default_main_program().global_block( ).vars.keys())) else: feedvars.append(paddle.static.default_main_program(). global_block().vars[var_name]) for var_name in fetch_var_names: if var_name not in paddle.static.default_main_program( ).global_block().vars: raise ValueError( "Fetch variable: {} not in default_main_program, global block has follow vars: {}" .format( var_name, paddle.static.default_main_program().global_block( ).vars.keys())) else: fetchvars.append(paddle.static.default_main_program(). global_block().vars[var_name]) save_inference_model(model_save_path, epoch_id, feedvars, fetchvars, exe)
def main(args): paddle.seed(12345) # load config config = load_yaml(args.config_yaml) config["yaml_path"] = args.config_yaml config["config_abs_dir"] = args.abs_dir # load static model class static_model_class = load_static_model_class(config) input_data = static_model_class.create_feeds() input_data_names = [data.name for data in input_data] fetch_vars = static_model_class.net(input_data) #infer_target_var = model.infer_target_var logger.info("cpu_num: {}".format(os.getenv("CPU_NUM"))) static_model_class.create_optimizer() use_gpu = config.get("runner.use_gpu", True) use_auc = config.get("runner.use_auc", False) auc_num = config.get("runner.auc_num", 1) train_data_dir = config.get("runner.train_data_dir", None) epochs = config.get("runner.epochs", None) print_interval = config.get("runner.print_interval", None) model_save_path = config.get("runner.model_save_path", "model_output") model_init_path = config.get("runner.model_init_path", None) batch_size = config.get("runner.train_batch_size", None) reader_type = config.get("runner.reader_type", "DataLoader") os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1)) logger.info("**************common.configs**********") logger.info( "use_gpu: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}" .format(use_gpu, train_data_dir, epochs, print_interval, model_save_path)) logger.info("**************common.configs**********") place = paddle.set_device('gpu' if use_gpu else 'cpu') exe = paddle.static.Executor(place) # initialize exe.run(paddle.static.default_startup_program()) last_epoch_id = config.get("last_epoch", -1) if reader_type == 'QueueDataset': dataset, file_list = get_reader(input_data, config) elif reader_type == 'DataLoader': train_dataloader = create_data_loader(config=config, place=place) for epoch_id in range(last_epoch_id + 1, epochs): epoch_begin = time.time() if use_auc: reset_auc(auc_num) if reader_type == 'DataLoader': fetch_batch_var = dataloader_train(epoch_id, train_dataloader, input_data_names, fetch_vars, exe, config) metric_str = "" for var_idx, var_name in enumerate(fetch_vars): metric_str += "{}: {}, ".format(var_name, fetch_batch_var[var_idx]) logger.info("epoch: {} done, ".format(epoch_id) + metric_str + "epoch time: {:.2f} s".format(time.time() - epoch_begin)) elif reader_type == 'QueueDataset': fetch_batch_var = dataset_train(epoch_id, dataset, fetch_vars, exe, config) logger.info("epoch: {} done, ".format(epoch_id) + "epoch time: {:.2f} s".format(time.time() - epoch_begin)) else: logger.info("reader type wrong") save_static_model(paddle.static.default_main_program(), model_save_path, epoch_id, prefix='rec_static')
def main(args): paddle.seed(12345) # load config config = load_yaml(args.config_yaml) config["config_abs_dir"] = args.abs_dir # load static model class static_model_class = load_static_model_class(config) input_data = static_model_class.create_feeds() input_data_names = [data.name for data in input_data] fetch_vars = static_model_class.net(input_data) #infer_target_var = model.infer_target_var logger.info("cpu_num: {}".format(os.getenv("CPU_NUM"))) static_model_class.create_optimizer() use_gpu = config.get("runner.use_gpu", True) use_auc = config.get("runner.use_auc", False) train_data_dir = config.get("runner.train_data_dir", None) epochs = config.get("runner.epochs", None) print_interval = config.get("runner.print_interval", None) model_save_path = config.get("runner.model_save_path", "model_output") model_init_path = config.get("runner.model_init_path", None) batch_size = config.get("runner.train_batch_size", None) os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1)) logger.info("**************common.configs**********") logger.info( "use_gpu: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}". format(use_gpu, train_data_dir, epochs, print_interval, model_save_path)) logger.info("**************common.configs**********") place = paddle.set_device('gpu' if use_gpu else 'cpu') exe = paddle.static.Executor(place) # initialize exe.run(paddle.static.default_startup_program()) last_epoch_id = config.get("last_epoch", -1) train_dataloader = create_data_loader(config=config, place=place) for epoch_id in range(last_epoch_id + 1, epochs): epoch_begin = time.time() interval_begin = time.time() train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() if use_auc: reset_auc() for batch_id, batch_data in enumerate(train_dataloader()): train_reader_cost += time.time() - reader_start train_start = time.time() fetch_batch_var = exe.run( program=paddle.static.default_main_program(), feed=dict(zip(input_data_names, batch_data)), fetch_list=[var for _, var in fetch_vars.items()]) train_run_cost += time.time() - train_start total_samples += batch_size if batch_id % print_interval == 0: metric_str = "" for var_idx, var_name in enumerate(fetch_vars): metric_str += "{}: {}, ".format(var_name, fetch_batch_var[var_idx]) logger.info( "epoch: {}, batch_id: {}, ".format(epoch_id, batch_id) + metric_str + "avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec". format(train_reader_cost / print_interval, ( train_reader_cost + train_run_cost) / print_interval, total_samples / print_interval, total_samples / ( train_reader_cost + train_run_cost))) train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() metric_str = "" for var_idx, var_name in enumerate(fetch_vars): metric_str += "{}: {}, ".format(var_name, fetch_batch_var[var_idx]) logger.info("epoch: {} done, ".format(epoch_id) + metric_str + "epoch time: {:.2f} s".format(time.time() - epoch_begin)) save_static_model( paddle.static.default_main_program(), model_save_path, epoch_id, prefix='rec_static')