Пример #1
0
def tower_loss(scope, maze_ims, maze_labels, config):
    '''
  Computer the loss for each GPU tower.
  Args:
    scope: tower scope
    maze_ims: Tensor of [batch_size, maze_size, maze_size, 1] of maze images
    maze_labels: Tensor of [batch_size, maze_size] for target label of the connection of diagonal elements
    config: configuration of the predictron hyperparameters
  Returns:
    total_loss to optimize, preturns regression loss and \lambda-preturn loss
  '''
    model = Predictron(maze_ims, maze_labels, config)
    model.build()
    loss_preturns = model.loss_preturns
    loss_lambda_preturns = model.loss_lambda_preturns
    losses = tf.get_collection('losses', scope)
    total_loss = tf.add_n(losses, name='total_loss')
    return total_loss, loss_preturns, loss_lambda_preturns
Пример #2
0
def tower_loss(scope, maze_ims, maze_labels, config):
  '''
  Computer the loss for each GPU tower.
  Args:
    scope: tower scope
    maze_ims: Tensor of [batch_size, maze_size, maze_size, 1] of maze images
    maze_labels: Tensor of [batch_size, maze_size] for target label of the connection of diagonal elements
    config: configuration of the predictron hyperparameters
  Returns:
    total_loss to optimize, preturns regression loss and \lambda-preturn loss
  '''
  model = Predictron(maze_ims, maze_labels, config)
  model.build()
  loss_preturns = model.loss_preturns
  loss_lambda_preturns = model.loss_lambda_preturns
  losses = tf.get_collection('losses', scope)
  total_loss = tf.add_n(losses, name='total_loss')
  return total_loss, loss_preturns, loss_lambda_preturns
Пример #3
0
# types and sequence steps for all allowed actions.
action_space = list(chain.from_iterable(my_sim.station_HT_seq.values()))
action_size = len(action_space)
state_size = len(state)
step_counter = 0

# setup of predictron
config = Config_predictron()
config.state_size = state_size
state_queue = list([])
for i in range(config.episode_length):
    state_queue.append(np.zeros(config.state_size))
reward_queue = list(np.zeros(config.episode_length))
replay_buffer = Replay_buffer(memory_size=config.replay_memory_size)

predictron = Predictron(config)
model = predictron.model
model.load_weights("Predictron_CR.h5")
preturn_loss_arr = []
max_preturn_loss = 0
lambda_preturn_loss_arr = []
max_lambda_preturn_loss = 0

DQN_arr = []
predictron_lambda_arr = []
reward_episode_arr = []

# Creating the DQN agent
dqn_agent = DeepQNet.DQN(state_space_dim=state_size,
                         action_space=action_space,
                         epsilon_max=0.,
Пример #4
0
def train():
    config = FLAGS

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)

    maze_ims_ph = tf.placeholder(tf.float32,
                                 [None, FLAGS.maze_size, FLAGS.maze_size, 1])
    maze_labels_ph = tf.placeholder(tf.float32, [None, FLAGS.maze_size])

    model = Predictron(maze_ims_ph, maze_labels_ph, config)
    model.build()

    loss = model.total_loss
    loss_preturns = model.loss_preturns
    loss_lambda_preturns = model.loss_lambda_preturns

    opt = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
    grad_vars = opt.compute_gradients(loss, tf.trainable_variables())
    grads, vars = zip(*grad_vars)
    grads_clipped, _ = tf.clip_by_global_norm(grads, FLAGS.max_grad_norm)
    grad_vars = zip(grads_clipped, vars)
    apply_gradient_op = opt.apply_gradients(grad_vars, global_step=global_step)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    update_op = tf.group(*update_ops)
    # Group all updates to into a single train op.
    train_op = tf.group(apply_gradient_op, update_op)

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    saver = tf.train.Saver(tf.global_variables())
    tf.train.start_queue_runners(sess=sess)

    train_dir = os.path.join(FLAGS.train_dir,
                             'max_steps_{}'.format(FLAGS.max_depth))
    summary_merged = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(train_dir, sess.graph)

    maze_queue = Queue.Queue(100)

    def maze_generator():
        maze_gen = MazeGenerator(height=FLAGS.maze_size,
                                 width=FLAGS.maze_size,
                                 density=FLAGS.maze_density)

        while True:
            maze_ims, maze_labels = maze_gen.generate_labelled_mazes(
                FLAGS.batch_size)
            maze_queue.put((maze_ims, maze_labels))

    for thread_i in xrange(FLAGS.num_threads):
        t = threading.Thread(target=maze_generator)
        t.start()

    for step in xrange(FLAGS.max_steps):
        start_time = time.time()
        maze_ims_np, maze_labels_np = maze_queue.get()

        _, loss_value, loss_preturns_val, loss_lambda_preturns_val, summary_str = sess.run(
            [
                train_op, loss, loss_preturns, loss_lambda_preturns,
                summary_merged
            ],
            feed_dict={
                maze_ims_ph: maze_ims_np,
                maze_labels_ph: maze_labels_np
            })
        duration = time.time() - start_time

        assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

        if step % 10 == 0:
            num_examples_per_step = FLAGS.batch_size
            examples_per_sec = num_examples_per_step / duration
            sec_per_batch = duration

            format_str = (
                '%s: step %d, loss = %.4f, loss_preturns = %.4f, loss_lambda_preturns = %.4f (%.1f examples/sec; %.3f '
                'sec/batch)')
            logger.info(
                format_str %
                (datetime.datetime.now(), step, loss_value, loss_preturns_val,
                 loss_lambda_preturns_val, examples_per_sec, sec_per_batch))

        if step % 100 == 0:
            summary_writer.add_summary(summary_str, step)

        # Save the model checkpoint periodically.
        if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
            checkpoint_path = os.path.join(train_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)
Пример #5
0
def main():

    parser = argparse.ArgumentParser(description='Predictron on random mazes')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of transitions in each mini-batch')
    parser.add_argument('--max-iter',
                        type=int,
                        default=10000,
                        help='Number of iterations to run')
    parser.add_argument('--n-model-steps',
                        type=int,
                        default=16,
                        help='Number of model steps')
    parser.add_argument('--n-channels',
                        type=int,
                        default=32,
                        help='Number of channels for hidden units')
    parser.add_argument('--maze-size',
                        type=int,
                        default=20,
                        help='Size of random mazes')
    parser.add_argument('--use-reward-gamma',
                        type=bool,
                        default=True,
                        help='Use reward and gamma')
    parser.add_argument('--use-lambda',
                        type=bool,
                        default=True,
                        help='Use lambda-network')
    parser.add_argument('--usage-weighting',
                        type=bool,
                        default=True,
                        help='Enable usage weighting')
    parser.add_argument('--n-unsupervised-updates',
                        type=int,
                        default=0,
                        help='Number of unsupervised upates per supervised'
                        'updates')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    args = parser.parse_args()

    # chainer.set_debug(True)
    model = Predictron(n_tasks=args.maze_size,
                       n_channels=args.n_channels,
                       model_steps=args.n_model_steps,
                       use_reward_gamma=args.use_reward_gamma,
                       use_lambda=args.use_lambda,
                       usage_weighting=args.usage_weighting)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu(args.gpu)
    opt = optimizers.Adam()
    opt.setup(model)

    for i in range(args.max_iter):
        x, t = generate_supervised_batch(maze_size=args.maze_size,
                                         batch_size=args.batchsize)
        if args.gpu >= 0:
            x = chainer.cuda.to_gpu(x)
            t = chainer.cuda.to_gpu(t)
        model.cleargrads()
        g_k_loss, g_lambda_loss = model.supervised_loss(x, t)
        supervised_loss = g_k_loss + g_lambda_loss
        supervised_loss.backward()
        opt.update()
        for _ in range(args.n_unsupervised_updates):
            x = generate_unsupervised_batch(maze_size=args.maze_size,
                                            batch_size=args.batchsize)
            if args.gpu >= 0:
                x = chainer.cuda.to_gpu(x)
            model.cleargrads()
            unsupervised_loss = model.unsupervised_loss(x)
            unsupervised_loss.backward()
            opt.update()
        print(i, g_k_loss.data, g_lambda_loss.data, (g_lambda_loss.data**0.5))