def main(_): config = get_configs() train_data, valid_data = load_train_valid_data(config) train_data.cache(verbose=True) valid_data.cache(verbose=True)
def train_model(config): if config.start_date is not None: print("Training start date: ", config.start_date) if config.start_date is not None: print("Training end date: ", config.end_date) print("Loading training data from %s ..."%config.datafile) train_data = None valid_data = None if (config.validation_size > 0.0) or (config.split_date is not None): train_data, valid_data = data_utils.load_train_valid_data(config) else: train_data = data_utils.load_all_data(config, is_training_only=True) valid_data = train_data tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tf_config.gpu_options.allow_growth = True with tf.Graph().as_default(), tf.Session(config=tf_config) as session: if config.seed is not None: tf.set_random_seed(config.seed) print("Constructing model ...") model = model_utils.get_model(session, config, verbose=True) if config.data_scaler is not None: start_time = time.time() print("Calculating scaling parameters ...", end=' '); sys.stdout.flush() scaling_params = train_data.get_scaling_params(config.data_scaler) model.set_scaling_params(session,**scaling_params) print("done in %.2f seconds."%(time.time() - start_time)) print("%-10s %-6s %-6s"%('feature','mean','std')) for i in range(len(train_data.feature_names)): center = "%.4f"%scaling_params['center'][i]; scale = "%.4f"%scaling_params['scale'][i]; print("%-10s %-6s %-6s"%(train_data.feature_names[i], center,scale)) sys.stdout.flush() if config.early_stop is not None: print("Training will early stop without " "improvement after %d epochs."%config.early_stop) train_history = list() valid_history = list() lr = model.set_learning_rate(session, config.learning_rate) train_data.cache(verbose=True) valid_data.cache(verbose=True) for i in range(config.max_epoch): # MVE Epoch if config.UQ_model_type == 'MVE': (train_mse, train_mse_var, valid_mse, valid_mse_var) = run_epoch_mve(session, model, train_data, valid_data, keep_prob=config.keep_prob, passes=config.passes, verbose=True) # Status to check if valid mse is nan, used to stop training if math.isnan(valid_mse): is_metric_nan = True else: is_metric_nan = False print('Epoch: %d Train MSE: %.8f Valid MSE: %.8f Learning rate: %.4f' % (i + 1, train_mse, valid_mse, lr)) print('Epoch: %d Train MSE_w_variance: %.8f Valid MSE_w_variance: %.8f Learning rate: %.4f' % (i + 1, train_mse_var, valid_mse_var, lr)) sys.stdout.flush() train_history.append(train_mse_var) valid_history.append(valid_mse_var) # PIE Epoch elif config.UQ_model_type == 'PIE': (train_mpiw, train_picp, train_picp_loss, valid_mpiw, valid_picp, valid_picp_loss) = \ run_epoch_pie(session, model, train_data, valid_data, keep_prob=config.keep_prob, passes=config.passes, verbose=True) train_loss = train_mpiw + config.picp_lambda*train_picp_loss valid_loss = valid_mpiw + config.picp_lambda*valid_picp_loss # Status to check if valid loss is nan, used to stop training if math.isnan(valid_loss): is_metric_nan = True else: is_metric_nan = False print('Epoch: %d Train MPIW: %.8f Valid MPIW: %.8f Learning rate: %.4f' % (i + 1, train_mpiw, valid_mpiw, lr)) print('Epoch: %d Train PICP: %.8f Valid PICP: %.8f' % (i + 1, train_picp, valid_picp)) print('Epoch: %d Train LOSS: %.8f Valid LOSS: %.8f' % (i + 1, train_loss, valid_loss )) sys.stdout.flush() train_history.append(train_loss) valid_history.append(valid_loss) if re.match("Gradient|Momentum", config.optimizer): lr = model_utils.adjust_learning_rate(session, model, lr, config.lr_decay, train_history) if not os.path.exists(config.model_dir): print("Creating directory %s" % config.model_dir) os.mkdir(config.model_dir) if is_metric_nan: print("Training failed due to nan.") quit() elif stop_training(config, valid_history): print("Training stopped.") quit() else: if ( (config.early_stop is None) or (valid_history[-1] <= min(valid_history)) ): model_utils.save_model(session, config, i)
def train_model(config): if config.start_date is not None: print("Training start date: ", config.start_date) if config.start_date is not None: print("Training end date: ", config.end_date) print("Loading training data from %s ..." % config.datafile) train_data = None valid_data = None if (config.validation_size > 0.0) or (config.split_date is not None): train_data, valid_data = data_utils.load_train_valid_data(config) else: train_data = data_utils.load_all_data(config, is_training_only=True) valid_data = train_data tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) with tf.Graph().as_default(), tf.Session(config=tf_config) as session: if config.seed is not None: tf.set_random_seed(config.seed) print("Constructing model ...") model = model_utils.get_model(session, config, verbose=True) params = model_utils.get_scaling_params(config, train_data, verbose=True) model.set_scaling_params(session, **params) noise_model = None if config.training_noise is not None: print("Training noise level: %.2f * 1-stdev" % config.training_noise) noise_model = NoiseModel(seed=config.seed, scaling_params=params, degree=config.training_noise) if config.early_stop is not None: print("Training will early stop without " "improvement after %d epochs." % config.early_stop) sys.stdout.flush() train_history = list() valid_history = list() lr = model.set_learning_rate(session, config.learning_rate) train_data.cache(verbose=True) valid_data.cache(verbose=True) for i in range(config.max_epoch): (train_mse, valid_mse) = run_epoch(session, model, train_data, valid_data, keep_prob=config.keep_prob, passes=config.passes, noise_model=noise_model, verbose=True) print(( 'Epoch: %d Train MSE: %.6f Valid MSE: %.6f Learning rate: %.4f' ) % (i + 1, train_mse, valid_mse, lr)) sys.stdout.flush() train_history.append(train_mse) valid_history.append(valid_mse) if re.match("Gradient|Momentum", config.optimizer): lr = model_utils.adjust_learning_rate(session, model, lr, config.lr_decay, train_history) if not os.path.exists(config.model_dir): print("Creating directory %s" % config.model_dir) os.mkdir(config.model_dir) if math.isnan(valid_mse): print("Training failed due to nan.") quit() elif stop_training(config, valid_history): print("Training stopped.") quit() else: if ((config.early_stop is None) or (valid_history[-1] <= min(valid_history))): model_utils.save_model(session, config, i)
def train_model(config): print("\nLoading training data ...") train_data, valid_data = data_utils.load_train_valid_data(config) if config.start_date is not None: print("Training start date: ", config.start_date) if config.start_date is not None: print("Training end date: ", config.end_date) tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) with tf.Graph().as_default(), tf.Session(config=tf_config) as session: if config.seed is not None: tf.set_random_seed(config.seed) print("\nConstructing model ...") model = model_utils.get_model(session, config, verbose=True) if config.data_scaler is not None: start_time = time.time() print("Calculating scaling parameters ...", end=' ') sys.stdout.flush() scaling_params = train_data.get_scaling_params(config.data_scaler) model.set_scaling_params(session, **scaling_params) print("done in %.2f seconds." % (time.time() - start_time)) #print(scaling_params['center']) #print(scaling_params['scale']) #exit(0) if config.early_stop is not None: print("Training will early stop without " "improvement after %d epochs." % config.early_stop) train_history = list() valid_history = list() lr = model.set_learning_rate(session, config.learning_rate) train_data.cache(verbose=True) valid_data.cache(verbose=True) for i in range(config.max_epoch): (train_mse, valid_mse) = run_epoch(session, model, train_data, valid_data, keep_prob=config.keep_prob, passes=config.passes, verbose=True) print(( 'Epoch: %d Train MSE: %.6f Valid MSE: %.6f Learning rate: %.4f' ) % (i + 1, train_mse, valid_mse, lr)) sys.stdout.flush() train_history.append(train_mse) valid_history.append(valid_mse) if re.match("Gradient|Momentum", config.optimizer): lr = model_utils.adjust_learning_rate(session, model, lr, config.lr_decay, train_history) if not os.path.exists(config.model_dir): print("Creating directory %s" % config.model_dir) os.mkdir(config.model_dir) chkpt_file_prefix = "training.ckpt" if model_utils.stop_training(config, valid_history, chkpt_file_prefix): print("Training stopped.") quit() else: checkpoint_path = os.path.join(config.model_dir, chkpt_file_prefix) if (valid_history[-1] == min(valid_history)): tf.train.Saver().save(session, checkpoint_path, global_step=i)