示例#1
0
def run_model(train_data,
              test_data,
              id1_train,
              id1_test,
              num_epochs,
              NUM_CORES,
              debug=False):

    # Get model configurations based on whether in debug mode
    config = get_config(train_data.num_features, num_epochs, debug)

    # Set the max_group_size (which will end up being the batch size)
    config.max_group_size = train_data.max_group_size

    msg = 'Initiating the model with the following params:\n'
    msg += '\n'.join('{0} = {1}'.format(paramName, paramValue)
                     for paramName, paramValue in config.params().iteritems())
    print msg

    # Set number of cores for TensorFlow to use
    tf_config = tf.ConfigProto(inter_op_parallelism_threads=NUM_CORES,
                               intra_op_parallelism_threads=NUM_CORES)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        # Initializer for weights and biases
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.variable_scope('model', reuse=None, initializer=initializer):
            m = LSTM(True, config=config)

        with tf.variable_scope('model', reuse=True, initializer=initializer):
            mtest = LSTM(False, config=config)

        # Path to save everything
        save_path = train_data.config[
            'OutputInSample'][:train_data.config['OutputInSample'].rfind('/')]

        # Writer for tensorboard summaries
        writer = tf.train.SummaryWriter(save_path, session.graph)

        # Prepare the data to be iterated over
        train_data.prepBatches(m.num_steps)
        test_data.prepBatches(m.num_steps)

        tf.initialize_all_variables().run()

        for epoch in range(num_epochs):

            # Decay the learning rate (begins decaying at config.decay_epoch)
            lr_decay = config.lr_decay**max(epoch - config.decay_epoch, 1.0)
            m.assign_lr(session, config.learning_rate * lr_decay)
            current_lr = session.run(m.lr)

            # Run epoch and retrieve results
            predictions, mse, summary = run_epoch(session, m, train_data,
                                                  m.train_op)
            r2 = compute_r2(np.reshape(train_data.Y.values, len(train_data.Y)),
                            predictions)
            writer.add_summary(summary, epoch)

            print 'Epoch: {} - learning rate: {:.3f} - train mse: {:.3f}e-03 - train r2: {:.3f}'.format(
                epoch, current_lr, mse * 10**3, r2)

        writer.close()
        m.save_model(session, save_path)

        # Train predictions to write to CSV
        ytrain_pred, _, _ = run_epoch(session, mtest, train_data, tf.no_op())
        print 'Train r2: {:.3f}'.format(
            compute_r2(np.reshape(train_data.Y.values, len(train_data.Y)),
                       ytrain_pred))

        # Test predictions to write to CSV
        ytest_pred, _, _ = run_epoch(session, mtest, test_data, tf.no_op())
        print 'Test r2: {:.3f}'.format(
            compute_r2(np.reshape(test_data.Y.values, len(test_data.Y)),
                       ytest_pred))

        # Write predictions to csv
        pd.DataFrame(
            {
                'id1': id1_train.id1.values,
                '{}_prediction'.format(train_data.config['Target']):
                ytrain_pred
            },
            index=train_data.X.index).to_csv(
                train_data.config['OutputInSample'])

        pd.DataFrame(
            {
                'id1': id1_test.id1.values,
                '{}_prediction'.format(train_data.config['Target']): ytest_pred
            },
            index=test_data.X.index).to_csv(
                test_data.config['OutputOutSample'])

        print 'Wrote predictions to csv'
示例#2
0
文件: run.py 项目: trevorlindsay/lstm
def run_model(args,
              train_data,
              test_data,
              id1_train,
              id1_test,
              num_epochs,
              NUM_CORES,
              debug=False,
              final_run=False):

    # Set the max_group_size (which will end up being the max_steps)
    args['max_group_size'] = train_data.max_group_size

    msg = 'Initiating the model with the following params:\n'
    msg += '\n'.join('{0} = {1}'.format(paramName, paramValue)
                     for paramName, paramValue in args.iteritems())
    print msg

    # Set number of cores for TensorFlow to use
    tf_config = tf.ConfigProto(inter_op_parallelism_threads=NUM_CORES,
                               intra_op_parallelism_threads=NUM_CORES)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        # Initializer for weights and biases
        initializer = tf.random_uniform_initializer(-args['init_scale'],
                                                    args['init_scale'])

        with tf.variable_scope('model', reuse=None, initializer=initializer):
            m = LSTM(True, args)

        with tf.variable_scope('model', reuse=True, initializer=initializer):
            mtest = LSTM(False, args)

        # Path to save everything
        save_path = train_data.config[
            'OutputInSample'][:train_data.config['OutputInSample'].rfind('/')]

        # Writer for tensorboard summaries
        writer = tf.train.SummaryWriter(save_path, session.graph)

        # Prepare the data to be iterated over
        train_data.prepBatches(m.batch_size)

        tf.initialize_all_variables().run()

        for epoch in range(num_epochs):

            # Decay the learning rate (begins decaying at config.decay_epoch)
            lr_decay = args['lr_decay']**max(epoch - args['decay_epoch'], 0.0)
            m.assign_lr(session, args['learning_rate'] * lr_decay)
            current_lr = session.run(m.lr)

            # Run epoch and retrieve results
            predictions, mse, summary = run_epoch(session, m, train_data,
                                                  m.train_op)
            r2 = compute_r2(pd.Series(train_data.Y.values[:, 0]), predictions)
            writer.add_summary(summary, epoch)

            if final_run:
                print 'Epoch: {} - learning rate: {:.3f} - train mse: {:.3f}e-03 - train corr: {:.5f}'.format(
                    epoch, current_lr, mse * 10**3, r2)
            elif epoch % 10 == 0:
                print 'Epoch: {} - learning rate: {:.3f} - train mse: {:.3f}e-03 - train corr: {:.5f}'.format(
                    epoch, current_lr, mse * 10**3, r2)

        writer.close()

        if final_run:
            m.save_model(session, save_path)

        # Train predictions
        ytrain_pred, _, _ = run_epoch(session, mtest, train_data, tf.no_op())
        print 'Train corr: {}'.format(
            compute_r2(pd.Series(train_data.Y.values[:, 0]), ytrain_pred))
        print 'Train r2: {}'.format(
            compute_r2(pd.Series(train_data.Y.values[:, 0]),
                       ytrain_pred,
                       pearson=False))

        # Free up memory then prep batches for testing data
        train_data.reset()
        test_data.prepBatches(m.batch_size)

        # Test predictions
        ytest_pred, _, _ = run_epoch(session, mtest, test_data, tf.no_op())
        print 'Test corr: {}'.format(
            compute_r2(pd.Series(test_data.Y.values[:, 0]), ytest_pred))
        print 'Test r2: {}'.format(
            compute_r2(pd.Series(test_data.Y.values[:, 0]),
                       ytest_pred,
                       pearson=False))

        return ytrain_pred, \
               pd.Series(train_data.Y.values[:, 0]), \
               ytest_pred, \
               pd.Series(test_data.Y.values[:, 0])