parser.add_argument('--eval_memsize', type=int, default=5, help="Approximate amount of avalable memory on GPU, used for calculation of optimal evaluation batch size") parser.add_argument('--gpu', default=0, type=int, help='GPU instance to use') parser.add_argument('--gpu_allow_growth', default=False, action='store_true', help='Allow to gradually increase GPU memory usage instead of grabbing all available memory at start') parser.add_argument('--save_best_model', default=False, action='store_true', help='Save best model during training. Requires do_eval=True') parser.add_argument('--no_forward_split', default=True, dest='forward_split', action='store_false', help='Use walk-forward split for model evaluation. Requires do_eval=True') parser.add_argument('--side_split', default=False, action='store_true', help='Use side split for model evaluation. Requires do_eval=True') parser.add_argument('--no_eval', default=True, dest='do_eval', action='store_false', help="Don't evaluate model quality during training") parser.add_argument('--no_summaries', default=True, dest='write_summaries', action='store_false', help="Don't Write Tensorflow summaries") parser.add_argument('--verbose', default=False, action='store_true', help='Print additional information during graph construction') parser.add_argument('--asgd_decay', type=float, help="EMA decay for averaged SGD. Not use ASGD if not set") parser.add_argument('--no_tqdm', default=True, dest='tqdm', action='store_false', help="Don't use tqdm for status display during training") parser.add_argument('--max_steps', type=int, help="Stop training after max steps") parser.add_argument('--save_from_step', type=int, help="Save model on each evaluation (10 evals per epoch), starting from this step") parser.add_argument('--predict_window', default=63, type=int, help="Number of days to predict") args = parser.parse_args() param_dict = dict(vars(args)) param_dict['hparams'] = build_from_set(args.hparam_set) del param_dict['hparam_set'] train(**param_dict) # hparams = build_hparams() # result = train("definc_attn", hparams, n_models=1, train_sampling=1.0, eval_sampling=1.0, patience=5, multi_gpu=True, # save_best_model=False, gpu=0, eval_memsize=15, seed=5, verbose=True, forward_split=False, # write_summaries=True, side_split=True, do_eval=False, predict_window=63, asgd_decay=None, max_steps=11500, # save_from_step=10500) # print("Training result:", result) # preds = predict('data/cpt/fair_365-15428', 380, hparams, verbose=True, back_offset=60, n_models=3) # print(preds)
def main(_): if len(sys.argv) < 3: print( 'Usage: ucdoc_saved_model.py [--model_version=y] --data_dir=xxx --ckpt_dir=xxx --saved_dir=xxx' ) sys.exit(-1) if FLAGS.training_iteration <= 0: print('Please specify a positive value for training iteration.') sys.exit(-1) if FLAGS.model_version <= 0: print('Please specify a positive value for version number.') sys.exit(-1) # create deploy model first with tf.variable_scope('input') as inp_scope: with tf.device("/cpu:0"): #inp = VarFeeder.read_vars("data/vars") inp = VarFeeder.read_vars(FLAGS.data_dir) pipe = InputPipe(inp, ucdoc_features(inp), inp.hits.shape[0], mode=ModelMode.PREDICT, batch_size=FLAGS.batch_size, n_epoch=1, verbose=False, train_completeness_threshold=0.01, predict_window=FLAGS.predict_window, predict_completeness_threshold=0.0, train_window=FLAGS.train_window, back_offset=FLAGS.predict_window + 1) asgd_decay = 0.99 if FLAGS.asgd else None if FLAGS.n_models == 1: model = Model(pipe, build_from_set(FLAGS.hparam_set), is_train=False, seed=1, asgd_decay=asgd_decay) else: models = [] for i in range(FLAGS.n_models): prefix = f"m_{i}" with tf.variable_scope(prefix) as scope: models.append( Model(pipe, build_from_set(FLAGS.hparam_set), is_train=False, seed=1, asgd_decay=asgd_decay, graph_prefix=prefix)) model = models[FLAGS.target_model] # load checkpoint model from training #ckpt_path = FLAGS.ckpt_dir print('loading checkpoint model...') ckpt_file = tf.train.latest_checkpoint(FLAGS.ckpt_dir) #graph = tf.Graph() graph = model.predictions.graph saver = tf.train.Saver(name='deploy_saver', var_list=None) with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True))) as sess: pipe.load_vars(sess) pipe.init_iterator(sess) saver.restore(sess, ckpt_file) print('Done loading checkpoint model') export_path_base = FLAGS.saved_dir export_path = os.path.join( tf.compat.as_bytes(export_path_base), tf.compat.as_bytes(str(FLAGS.model_version))) print('Exporting trained model to', export_path) if os.path.isdir(export_path): shutil.rmtree(export_path) builder = tf.saved_model.builder.SavedModelBuilder(export_path) true_x = tf.saved_model.utils.build_tensor_info(model.inp.true_x) time_x = tf.saved_model.utils.build_tensor_info(model.inp.time_x) norm_x = tf.saved_model.utils.build_tensor_info(model.inp.norm_x) lagged_x = tf.saved_model.utils.build_tensor_info(model.inp.lagged_x) true_y = tf.saved_model.utils.build_tensor_info(model.inp.true_y) time_y = tf.saved_model.utils.build_tensor_info(model.inp.time_y) norm_y = tf.saved_model.utils.build_tensor_info(model.inp.norm_y) norm_mean = tf.saved_model.utils.build_tensor_info(model.inp.norm_mean) norm_std = tf.saved_model.utils.build_tensor_info(model.inp.norm_std) pg_features = tf.saved_model.utils.build_tensor_info( model.inp.ucdoc_features) page_ix = tf.saved_model.utils.build_tensor_info(model.inp.page_ix) pred = tf.saved_model.utils.build_tensor_info(model.predictions) labeling_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={ "truex": true_x, "timex": time_x, "normx": norm_x, "laggedx": lagged_x, "truey": true_y, "timey": time_y, "normy": norm_y, "normmean": norm_mean, "normstd": norm_std, "page_features": pg_features, "pageix": page_ix, }, outputs={"pred": pred}, method_name="tensorflow/serving/predict")) legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={ tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: labeling_signature }, main_op=tf.tables_initializer(), strip_default_attrs=True) builder.save() print("Build Done")
# parser.add_argument('--max_steps', type=int, help="Stop training after max steps") # parser.add_argument('--save_from_step', type=int, help="Save model on each evaluation (10 evals per epoch), starting from this step") # parser.add_argument('--predict_window', default=3, type=int, help="Number of days to predict") # args = parser.parse_args() logging.basicConfig(level=logging.DEBUG, stream=DummyTqdmFile(sys.stderr)) log = logging.getLogger('trainer') parser = argparse.ArgumentParser(description='Prepare data') parser.add_argument('config_file') args = parser.parse_args() with open(args.config_file, 'r') as ymlfile: cfg = yaml.load(ymlfile) param_dict = cfg['trainer'] param_dict['hparams'] = build_from_set(cfg['trainer']['hparam_set']) del param_dict['hparam_set'] train(**param_dict) # hparams = build_hparams() # # result = predict("definc_attn", hparams, n_models=1, train_sampling=1.0, eval_sampling=1.0, patience=5, multi_gpu=True, # # save_best_model=False, gpu=0, eval_memsize=15, seed=5, verbose=True, forward_split=False, # # write_summaries=True, side_split=True, do_eval=False, predict_window=63, asgd_decay=None, max_steps=11500, # # save_from_step=10500) # # # print("Training result:", result) # ckpt_file = tf.train.latest_checkpoint('data/cpt/s32') # preds = predict([ckpt_file],hparams, return_x=True, back_offset=11, predict_window=10,verbose=False, n_models=1, target_model=0, asgd=False,batch_size=1) # print(preds)
def main(_): if len(sys.argv) < 3: print( 'Usage: saved_model.py [--model_version=y] --data_dir=xxx --ckpt_dir=xxx --saved_dir=xxx' ) sys.exit(-1) if FLAGS.training_iteration <= 0: print('Please specify a positive value for training iteration.') sys.exit(-1) if FLAGS.model_version <= 0: print('Please specify a positive value for version number.') sys.exit(-1) with open(FLAGS.config_file, 'r') as ymlfile: cfg = yaml.load(ymlfile) holiday_list = cfg['pipeline']['normalization']['holidays'] if FLAGS.back_offset < FLAGS.predict_window: extend_inp(FLAGS.data_dir, FLAGS.predict_window, holiday_list) # create deploy model first back_offset_ = FLAGS.back_offset with tf.variable_scope('input') as inp_scope: with tf.device("/cpu:0"): if FLAGS.back_offset < FLAGS.predict_window: inp = VarFeeder.read_vars( os.path.join(FLAGS.data_dir, 'predict_future')) back_offset_ += FLAGS.predict_window else: inp = VarFeeder.read_vars(FLAGS.data_dir) pipe = InputPipe(inp, ucdoc_features(inp), inp.hits.shape[0], mode=ModelMode.PREDICT, batch_size=FLAGS.batch_size, n_epoch=1, verbose=False, train_completeness_threshold=0.01, predict_window=FLAGS.predict_window, predict_completeness_threshold=0.0, train_window=FLAGS.train_window, back_offset=back_offset_) asgd_decay = 0.99 if FLAGS.asgd else None if FLAGS.n_models == 1: model = Model(pipe, build_from_set(FLAGS.hparam_set), is_train=False, seed=1, asgd_decay=asgd_decay) else: models = [] for i in range(FLAGS.n_models): prefix = f"m_{i}" with tf.variable_scope(prefix) as scope: models.append( Model(pipe, build_from_set(FLAGS.hparam_set), is_train=False, seed=1, asgd_decay=asgd_decay, graph_prefix=prefix)) model = models[FLAGS.target_model] if FLAGS.asgd: var_list = model.ema.variables_to_restore() if FLAGS.n_models > 1: prefix = f"m_{target_model}" for var in list(var_list.keys()): if var.endswith('ExponentialMovingAverage' ) and not var.startswith(prefix): del var_list[var] else: var_list = None # load checkpoint model from training #ckpt_path = FLAGS.ckpt_dir print('loading checkpoint model...') ckpt_file = tf.train.latest_checkpoint(FLAGS.ckpt_dir) #graph = tf.Graph() graph = model.predictions.graph init = tf.global_variables_initializer() saver = tf.train.Saver(name='deploy_saver', var_list=var_list) with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True))) as sess: sess.run(init) pipe.load_vars(sess) pipe.init_iterator(sess) saver.restore(sess, ckpt_file) print('Done loading checkpoint model') export_path_base = FLAGS.saved_dir export_path = os.path.join( tf.compat.as_bytes(export_path_base), tf.compat.as_bytes(str(FLAGS.model_version))) print('Exporting trained model to', export_path) if os.path.isdir(export_path): shutil.rmtree(export_path) builder = tf.saved_model.builder.SavedModelBuilder(export_path) true_x = tf.saved_model.utils.build_tensor_info( model.inp.true_x) # pipe.true_x time_x = tf.saved_model.utils.build_tensor_info( model.inp.time_x) # pipe.time_x norm_x = tf.saved_model.utils.build_tensor_info( model.inp.norm_x) # pipe.norm_x lagged_x = tf.saved_model.utils.build_tensor_info( model.inp.lagged_x) # pipe.lagged_x true_y = tf.saved_model.utils.build_tensor_info( model.inp.true_y) # pipe.true_y time_y = tf.saved_model.utils.build_tensor_info( model.inp.time_y) # pipe.time_y norm_y = tf.saved_model.utils.build_tensor_info( model.inp.norm_y) # pipe.norm_y norm_mean = tf.saved_model.utils.build_tensor_info( model.inp.norm_mean) # pipe.norm_mean norm_std = tf.saved_model.utils.build_tensor_info( model.inp.norm_std) # pipe.norm_std pg_features = tf.saved_model.utils.build_tensor_info( model.inp.ucdoc_features) # pipe.ucdoc_features page_ix = tf.saved_model.utils.build_tensor_info( model.inp.page_ix) # pipe.page_ix #pred = tf.saved_model.utils.build_tensor_info(graph.get_operation_by_name('m_0/add').outputs[0]) pred = tf.saved_model.utils.build_tensor_info(model.predictions) labeling_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={ "truex": true_x, "timex": time_x, "normx": norm_x, "laggedx": lagged_x, "truey": true_y, "timey": time_y, "normy": norm_y, "normmean": norm_mean, "normstd": norm_std, "page_features": pg_features, "pageix": page_ix, }, outputs={"predictions": pred}, method_name="tensorflow/serving/predict")) legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={ tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: labeling_signature }, main_op=tf.tables_initializer(), strip_default_attrs=True) builder.save() print("Build Done")