Exemplo n.º 1
0
def main(_):
    config = get_configs()

    train_data, valid_data = load_train_valid_data(config)

    train_data.cache(verbose=True)
    valid_data.cache(verbose=True)
Exemplo n.º 2
0
def train_model(config):
    if config.start_date is not None:
        print("Training start date: ", config.start_date)
    if config.start_date is not None:
        print("Training end date: ", config.end_date)

    print("Loading training data from %s ..."%config.datafile)
    train_data = None
    valid_data = None

    if (config.validation_size > 0.0) or (config.split_date is not None):
        train_data, valid_data = data_utils.load_train_valid_data(config)
    else:
        train_data = data_utils.load_all_data(config, is_training_only=True)
        valid_data = train_data
        
    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)
    tf_config.gpu_options.allow_growth = True

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:
        if config.seed is not None:
            tf.set_random_seed(config.seed)

        print("Constructing model ...")
        model = model_utils.get_model(session, config, verbose=True)

        if config.data_scaler is not None:
            start_time = time.time()
            print("Calculating scaling parameters ...", end=' '); sys.stdout.flush()
            scaling_params = train_data.get_scaling_params(config.data_scaler)
            model.set_scaling_params(session,**scaling_params)
            print("done in %.2f seconds."%(time.time() - start_time))
            print("%-10s %-6s %-6s"%('feature','mean','std'))
            for i in range(len(train_data.feature_names)):
                center = "%.4f"%scaling_params['center'][i];
                scale  = "%.4f"%scaling_params['scale'][i];
                print("%-10s %-6s %-6s"%(train_data.feature_names[i],
                                         center,scale))
            sys.stdout.flush()

        if config.early_stop is not None:
            print("Training will early stop without "
              "improvement after %d epochs."%config.early_stop)

        train_history = list()
        valid_history = list()

        lr = model.set_learning_rate(session, config.learning_rate)

        train_data.cache(verbose=True)
        valid_data.cache(verbose=True)

        for i in range(config.max_epoch):

            # MVE Epoch
            if config.UQ_model_type == 'MVE':
                (train_mse, train_mse_var, valid_mse, valid_mse_var) = run_epoch_mve(session, model, train_data,
                                                                                     valid_data,
                                                                                     keep_prob=config.keep_prob,
                                                                                     passes=config.passes,
                                                                                     verbose=True)
                # Status to check if valid mse is nan, used to stop training
                if math.isnan(valid_mse):
                    is_metric_nan = True
                else:
                    is_metric_nan = False
                print('Epoch: %d Train MSE: %.8f Valid MSE: %.8f Learning rate: %.4f' %
                      (i + 1, train_mse, valid_mse, lr))
                print('Epoch: %d Train MSE_w_variance: %.8f Valid MSE_w_variance: %.8f Learning rate: %.4f' %
                      (i + 1, train_mse_var, valid_mse_var, lr))
                sys.stdout.flush()

                train_history.append(train_mse_var)
                valid_history.append(valid_mse_var)

            # PIE Epoch
            elif config.UQ_model_type == 'PIE':
                (train_mpiw, train_picp, train_picp_loss, valid_mpiw, valid_picp, valid_picp_loss) = \
                    run_epoch_pie(session, model, train_data, valid_data,
                                  keep_prob=config.keep_prob,
                                  passes=config.passes,
                                  verbose=True)

                train_loss = train_mpiw + config.picp_lambda*train_picp_loss
                valid_loss = valid_mpiw + config.picp_lambda*valid_picp_loss
                # Status to check if valid loss is nan, used to stop training
                if math.isnan(valid_loss):
                    is_metric_nan = True
                else:
                    is_metric_nan = False

                print('Epoch: %d Train MPIW: %.8f Valid MPIW: %.8f Learning rate: %.4f' %
                      (i + 1, train_mpiw, valid_mpiw, lr))
                print('Epoch: %d Train PICP: %.8f Valid PICP: %.8f' %
                      (i + 1, train_picp, valid_picp))
                print('Epoch: %d Train LOSS: %.8f Valid LOSS: %.8f' %
                      (i + 1, train_loss, valid_loss ))

                sys.stdout.flush()

                train_history.append(train_loss)
                valid_history.append(valid_loss)

            if re.match("Gradient|Momentum", config.optimizer):
                lr = model_utils.adjust_learning_rate(session, model, 
                                                      lr, config.lr_decay, train_history)

            if not os.path.exists(config.model_dir):
                print("Creating directory %s" % config.model_dir)
                os.mkdir(config.model_dir)

            if is_metric_nan:
                print("Training failed due to nan.")
                quit()
            elif stop_training(config, valid_history):
                print("Training stopped.")
                quit()
            else:
                if ( (config.early_stop is None) or 
                     (valid_history[-1] <= min(valid_history)) ):
                    model_utils.save_model(session, config, i)
Exemplo n.º 3
0
def train_model(config):
    if config.start_date is not None:
        print("Training start date: ", config.start_date)
    if config.start_date is not None:
        print("Training end date: ", config.end_date)

    print("Loading training data from %s ..." % config.datafile)
    train_data = None
    valid_data = None

    if (config.validation_size > 0.0) or (config.split_date is not None):
        train_data, valid_data = data_utils.load_train_valid_data(config)
    else:
        train_data = data_utils.load_all_data(config, is_training_only=True)
        valid_data = train_data

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:
        if config.seed is not None:
            tf.set_random_seed(config.seed)

        print("Constructing model ...")
        model = model_utils.get_model(session, config, verbose=True)

        params = model_utils.get_scaling_params(config,
                                                train_data,
                                                verbose=True)
        model.set_scaling_params(session, **params)

        noise_model = None
        if config.training_noise is not None:
            print("Training noise level: %.2f * 1-stdev" %
                  config.training_noise)
            noise_model = NoiseModel(seed=config.seed,
                                     scaling_params=params,
                                     degree=config.training_noise)

        if config.early_stop is not None:
            print("Training will early stop without "
                  "improvement after %d epochs." % config.early_stop)
        sys.stdout.flush()

        train_history = list()
        valid_history = list()

        lr = model.set_learning_rate(session, config.learning_rate)

        train_data.cache(verbose=True)
        valid_data.cache(verbose=True)

        for i in range(config.max_epoch):

            (train_mse, valid_mse) = run_epoch(session,
                                               model,
                                               train_data,
                                               valid_data,
                                               keep_prob=config.keep_prob,
                                               passes=config.passes,
                                               noise_model=noise_model,
                                               verbose=True)
            print((
                'Epoch: %d Train MSE: %.6f Valid MSE: %.6f Learning rate: %.4f'
            ) % (i + 1, train_mse, valid_mse, lr))
            sys.stdout.flush()

            train_history.append(train_mse)
            valid_history.append(valid_mse)

            if re.match("Gradient|Momentum", config.optimizer):
                lr = model_utils.adjust_learning_rate(session, model, lr,
                                                      config.lr_decay,
                                                      train_history)

            if not os.path.exists(config.model_dir):
                print("Creating directory %s" % config.model_dir)
                os.mkdir(config.model_dir)

            if math.isnan(valid_mse):
                print("Training failed due to nan.")
                quit()
            elif stop_training(config, valid_history):
                print("Training stopped.")
                quit()
            else:
                if ((config.early_stop is None)
                        or (valid_history[-1] <= min(valid_history))):
                    model_utils.save_model(session, config, i)
Exemplo n.º 4
0
def train_model(config):
    print("\nLoading training data ...")
    train_data, valid_data = data_utils.load_train_valid_data(config)

    if config.start_date is not None:
        print("Training start date: ", config.start_date)
    if config.start_date is not None:
        print("Training end date: ", config.end_date)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:
        if config.seed is not None:
            tf.set_random_seed(config.seed)

        print("\nConstructing model ...")
        model = model_utils.get_model(session, config, verbose=True)

        if config.data_scaler is not None:
            start_time = time.time()
            print("Calculating scaling parameters ...", end=' ')
            sys.stdout.flush()
            scaling_params = train_data.get_scaling_params(config.data_scaler)
            model.set_scaling_params(session, **scaling_params)
            print("done in %.2f seconds." % (time.time() - start_time))
            #print(scaling_params['center'])
            #print(scaling_params['scale'])
            #exit(0)

        if config.early_stop is not None:
            print("Training will early stop without "
                  "improvement after %d epochs." % config.early_stop)

        train_history = list()
        valid_history = list()

        lr = model.set_learning_rate(session, config.learning_rate)

        train_data.cache(verbose=True)
        valid_data.cache(verbose=True)

        for i in range(config.max_epoch):

            (train_mse, valid_mse) = run_epoch(session,
                                               model,
                                               train_data,
                                               valid_data,
                                               keep_prob=config.keep_prob,
                                               passes=config.passes,
                                               verbose=True)
            print((
                'Epoch: %d Train MSE: %.6f Valid MSE: %.6f Learning rate: %.4f'
            ) % (i + 1, train_mse, valid_mse, lr))
            sys.stdout.flush()

            train_history.append(train_mse)
            valid_history.append(valid_mse)

            if re.match("Gradient|Momentum", config.optimizer):
                lr = model_utils.adjust_learning_rate(session, model, lr,
                                                      config.lr_decay,
                                                      train_history)

            if not os.path.exists(config.model_dir):
                print("Creating directory %s" % config.model_dir)
                os.mkdir(config.model_dir)

            chkpt_file_prefix = "training.ckpt"
            if model_utils.stop_training(config, valid_history,
                                         chkpt_file_prefix):
                print("Training stopped.")
                quit()
            else:
                checkpoint_path = os.path.join(config.model_dir,
                                               chkpt_file_prefix)
                if (valid_history[-1] == min(valid_history)):
                    tf.train.Saver().save(session,
                                          checkpoint_path,
                                          global_step=i)