Exemplo n.º 1
0
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can use the built-in Keras callbacks!
    os.chdir(DIR)
    os.chdir('runs')
    if not args.run_id:
        run_id = '{}_{}'.format(args.env_name, time())
    else: 
        run_id = args.run_id
    os.mkdir(run_id)
    weights_filename = '{}/weights.h5f'.format(run_id)
    checkpoint_weights_filename = weights_filename + '_{step}.h5f'
 #  model_filename = 'dqn_micropolis_model.hdf5'
    log_filename = '{}/log.json'.format(run_id)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
  # callbacks += [ModelCheckpoint(model_filename)]
    callbacks += [FileLogger(log_filename, interval=250000)]
    callbacks += [TensorBoard(log_dir=run_id)]
#   class TestCallback(Callback):
#       def on_epoch_end(self, epoch, logs=None):
#           test_env = gym.make(args.env_name)
#           test_env.setMapSize(MAP_X,MAP_Y)
#           dqn.test(test_env, nb_episodes=1, visualize=True, nb_max_start_steps=100)
#           test_env.win1.destroy()
#           test_env.close()
#           del(test_env)
#   callbacks += [TestCallback()]
#   if args.loadmodel:
#       dqn.model.load(args.loadmodel)
    if args.weights:
        dqn.load_weights(args.weights)
def training_game():
    env = Environment(
        map_name="HallucinIce",
        visualize=True,
        game_steps_per_episode=150,
        agent_interface_format=features.AgentInterfaceFormat(
            feature_dimensions=features.Dimensions(screen=64, minimap=32)))

    input_shape = (_SIZE, _SIZE, 1)
    nb_actions = 12  # Number of actions

    model = neural_network_model(input_shape, nb_actions)
    memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH)

    processor = SC2Proc()

    # Policy

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr="eps",
                                  value_max=1,
                                  value_min=0.7,
                                  value_test=.0,
                                  nb_steps=1e6)

    # Agent

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   enable_double_dqn=False,
                   nb_steps_warmup=500,
                   target_model_update=1e-2,
                   policy=policy,
                   batch_size=150,
                   processor=processor)

    dqn.compile(Adam(lr=.001), metrics=["mae"])

    # Save the parameters and upload them when needed

    name = "HallucinIce"
    w_file = "dqn_{}_weights.h5f".format(name)
    check_w_file = "train_w" + name + "_weights.h5f"

    if SAVE_MODEL:
        check_w_file = "train_w" + name + "_weights_{step}.h5f"

    log_file = "training_w_{}_log.json".format(name)
    callbacks = [ModelIntervalCheckpoint(check_w_file, interval=1000)]
    callbacks += [FileLogger(log_file, interval=100)]

    if LOAD_MODEL:
        dqn.load_weights(w_file)

    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=1e7,
            action_repetition=2,
            log_interval=1e4,
            verbose=2)

    dqn.save_weights(w_file, overwrite=True)
    dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)