Exemplo n.º 1
0
    def start(self):
        weights_filename = '{}/dqn_weights/dqn_{}_weights.h5f'.format(
            self.cwd, self.env_name)
        if self.weights:
            self.agent.load_weights(weights_filename)
            print('...loading weights for {}'.format(self.env_name))

        if self.train:
            checkpoint_weights_filename = 'dqn_' + self.env_name + \
                                          '_weights_{step}.h5f'
            checkpoint_weights_filename = '{}/dqn_weights/'.format(self.cwd) + \
                                          checkpoint_weights_filename
            log_filename = '{}/dqn_weights/dqn_{}_log.json'.format(
                self.cwd, self.env_name)
            print('FileLogger: {}'.format(log_filename))

            callbacks = [
                ModelIntervalCheckpoint(checkpoint_weights_filename,
                                        interval=250000)
            ]
            callbacks += [FileLogger(log_filename, interval=100)]

            print('Starting training...')
            self.agent.fit(self.env,
                           callbacks=callbacks,
                           nb_steps=self.number_of_training_steps,
                           log_interval=10000,
                           verbose=0,
                           visualize=self.visualize)
            print('Saving AGENT weights...')
            self.agent.save_weights(weights_filename, overwrite=True)
        else:
            print('Starting TEST...')
            self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
def train_model(seed=1, setup=0):
    np.random.seed(seed)

    if setup == 0:
        env = CameraControlEnv(a_p=0, a_r=0, e_thres=0)
    elif setup == 1:
        env = CameraControlEnv(a_p=0, a_r=0, e_thres=0.8)
    else:
        env = CameraControlEnv(a_p=0.5, a_r=0.2, e_thres=0.8)

    env.seed(seed)

    model = define_model(actions=7)

    memory = SequentialMemory(limit=10000, window_length=1)

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1.0, value_min=0.1, value_test=0.05,
                                  nb_steps=95000)
    dqn = DQNAgent(model=model, nb_actions=7, policy=policy, memory=memory, processor=None,
                   nb_steps_warmup=500, gamma=0.95, delta_clip=1, target_model_update=0.001, batch_size=32)
    dqn.compile(RMSprop(lr=.0001), metrics=['mae'])

    log_filename = 'results/drone_camera_control_log_' + str(setup) + '.json'
    model_checkpoint_filename = 'results/drone_camera_cnn_weights_' + str(setup) + '_{step}.model'
    callbacks = [ModelIntervalCheckpoint(model_checkpoint_filename, interval=5000)]
    callbacks += [FileLogger(log_filename, interval=1)]

    dqn.fit(env, nb_steps=100000, nb_max_episode_steps=100, verbose=2, visualize=False, log_interval=1,
            callbacks=callbacks)

    # After training is done, save the final weights.
    model_filename = 'models/drone_camera_cnn_' + str(setup) + '.model'
    dqn.save_weights(model_filename, overwrite=True)
Exemplo n.º 3
0
    def train(self):

        # Okay, now it's time to learn something! We capture the interrupt exception so that training
        # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks!
        weights_filename = self.get_weight_path(self.stock)
        checkpoint_weights_filename = self.get_weight_path(
            self.stock) + '_{step}.h5f'
        log_filename = 'dqn_{}_log.json'.format(self.stock)

        callbacks = [
            ModelIntervalCheckpoint(checkpoint_weights_filename,
                                    interval=250000)
        ]
        callbacks += [FileLogger(log_filename, interval=100)]
        # callbacks += [WandbLogger(
        #     project = "stock-bot-v0"
        # )]

        self.dqn.fit(self.env,
                     callbacks=callbacks,
                     nb_steps=1750000,
                     log_interval=10000)

        # After training is done, we save the final weights one more time.
        self.dqn.save_weights(weights_filename, overwrite=True)

        # Finally, evaluate our algorithm for 10 episodes.
        self.dqn.test(self.env, nb_episodes=10, visualize=False)
Exemplo n.º 4
0
    def start(self):
        if self.train:
            weights_filename = 'dqn_{}_weights.h5f'.format(self.env_id)
            if self.weights:
                self.agent.load_weights(weights_filename)
                print('...loading weights for {}'.format(self.env_id))

            checkpoint_weights_filename = 'dqn_' + self.env_id + '_weights_{step}.h5f'
            log_filename = 'dqn_{}_log.json'.format(self.env_id)

            callbacks = [
                ModelIntervalCheckpoint(checkpoint_weights_filename,
                                        interval=250000)
            ]
            callbacks += [FileLogger(log_filename, interval=100)]

            self.agent.fit(self,
                           callbacks=callbacks,
                           nb_steps=self.training_steps,
                           log_interval=10000)
            self.agent.save_weights(weights_filename, overwrite=True)
            self.agent.test(self, nb_episodes=2, visualize=False)
            self.render()
        else:
            weights_filename = 'dqn_{}_weights.h5f'.format(self.env_id)
            self.agent.load_weights(weights_filename)
            self.agent.test(self, nb_episodes=2, visualize=False)
            self.render()
Exemplo n.º 5
0
def build_callbacks(env_name):
    checkpoint_weights_filename = 'dqn_' + env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    return callbacks
Exemplo n.º 6
0
def build_callbacks(env_name):
    checkpoint_weights_filename = 'results/Swimmer/ddpg_' + env_name + '_weights_{step}.h5f'
    log_filename = 'results/Swimmer6/exp_5/ddpg_{}_log.json'.format(env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000)
    ]
    callbacks += [FileLogger(log_filename, interval=5000)]

    return callbacks
def train(index, policy_nb_steps, fit_nb_steps):

    # Get the environment and extract the number of actions.
    print("Using environment", environment_name)
    environment = gym.make(environment_name)
    np.random.seed(666)
    nb_actions = environment.action_space.shape[0]

    # Build the model.
    v_model, mu_model, l_model = build_models((WINDOW_LENGTH, ) + INPUT_SHAPE,
                                              nb_actions)
    v_model.summary()
    mu_model.summary()
    l_model.summary()

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
    processor = CarRacingProcessor()
    random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                              mu=0.,
                                              sigma=.3,
                                              size=nb_actions)

    agent = NAFAgent(nb_actions=nb_actions,
                     V_model=v_model,
                     L_model=l_model,
                     mu_model=mu_model,
                     memory=memory,
                     nb_steps_warmup=100,
                     random_process=random_process,
                     gamma=.99,
                     target_model_update=1e-3,
                     processor=processor)
    agent.compile(optimizers.Adam(lr=.001, clipnorm=1.), metrics=['mae'])

    weights_filename = 'naf_{}_{}_weights.h5f'.format(environment_name, index)

    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks!
    checkpoint_weights_filename = 'naf_' + environment_name + '_weights_{step}.h5f'
    log_filename = 'naf_{}_log.json'.format(environment_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [TensorboardCallback()]
    callbacks += [FileLogger(log_filename, interval=100)]
    agent.fit(
        environment,
        callbacks=callbacks,
        #nb_steps=1750000,
        nb_steps=fit_nb_steps,
        log_interval=10000,
        visualize="visualize" in sys.argv)

    # After training is done, we save the final weights one more time.
    agent.save_weights(weights_filename, overwrite=True)
Exemplo n.º 8
0
def build_callbacks(env_name, filename_pre, filename_exp):
    checkpoint_weights_filename = filename_pre + env_name + '_weights_{step}.h5f'
    log_filename = filename_pre + filename_exp + '/ddpg_{}_log.json'.format(
        env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=50000)
    ]
    callbacks += [FileLogger(log_filename, interval=50000)]

    return callbacks
Exemplo n.º 9
0
 def train(self):
     callbacks = [
         ModelIntervalCheckpoint(checkpoint_weights_filename,
                                 interval=INTERVAL_CALLBACK),
         FileLogger(log_filename, interval=FILE_LOGGER_INTERVAL)
     ]
     self.dqn.fit(self.env,
                  callbacks=callbacks,
                  nb_steps=NB_STEPS,
                  log_interval=FIT_LOG_INTERVAL)
Exemplo n.º 10
0
    def __init__(self, args):
        """Initialize.

        :param args: namespace of arguments; see --help.
        """

        # Params
        self.resuming = args.cont is not None
        self.initialize_from = args.cont
        self._log_interval = args.log_frequency
        self._max_episode_steps = args.max_episode_steps

        # Dirs
        model_path, log_path = tools.prepare_directories(
            "agent", args.env, resuming=self.resuming, args=args)
        log_filename = "log.json"
        self.log_file = os.path.join(log_path, log_filename)

        # Environment
        self.env = gym.make(args.env)
        self.env_name = args.env

        # Repeatability
        if args.deterministic:
            if "Deterministic" not in self.env_name:
                raise ValueError(
                    "--deterministic only works with deterministic"
                    " environments")
            self.env.seed(30013)
            np.random.seed(30013)
            tf.random.set_seed(30013)

        # Agent
        self.kerasrl_agent, self.atari_agent = self.build_agent(
            tools.Namespace(args, training=True))

        # Tools
        self.saver = CheckpointSaver(agent=self.kerasrl_agent,
                                     path=model_path,
                                     interval=args.saves)
        self.logger = TensorboardLogger(logdir=log_path,
                                        agent=self.atari_agent)

        # Callbacks
        self.callbacks = [
            self.saver,
            self.logger,
            FileLogger(filepath=self.log_file, interval=100),
        ]
        if args.random_epsilon:
            self.callbacks.append(self.kerasrl_agent.test_policy.callback)

        # Save on exit
        tools.QuitWithResources.add("last_save", lambda: self.saver.save())
Exemplo n.º 11
0
def build_callbacks():
    """
    callbacks for the deep q agent
    """
    checkpoint_weights_filename = 'model_checkpoints/dqn_weights_.h5f'
    log_filename = 'dqn_log.json'
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    return callbacks
Exemplo n.º 12
0
    def start(self) -> None:
        """
        Entry point for agent training and testing

        :return: (void)
        """
        output_directory = os.path.join(self.cwd, 'dqn_weights')
        if not os.path.exists(output_directory):
            LOGGER.info('{} does not exist. Creating Directory.'.format(
                output_directory))
            os.mkdir(output_directory)

        weight_name = 'dqn_{}_{}_weights.h5f'.format(self.env_name,
                                                     self.neural_network_type)
        weights_filename = os.path.join(output_directory, weight_name)
        LOGGER.info("weights_filename: {}".format(weights_filename))

        if self.load_weights:
            LOGGER.info('...loading weights for {} from\n{}'.format(
                self.env_name, weights_filename))
            self.agent.load_weights(weights_filename)

        if self.train:
            step_chkpt = '{step}.h5f'
            step_chkpt = 'dqn_{}_weights_{}'.format(self.env_name, step_chkpt)
            checkpoint_weights_filename = os.path.join(self.cwd, 'dqn_weights',
                                                       step_chkpt)
            LOGGER.info("checkpoint_weights_filename: {}".format(
                checkpoint_weights_filename))
            log_filename = os.path.join(
                self.cwd, 'dqn_weights',
                'dqn_{}_log.json'.format(self.env_name))
            LOGGER.info('log_filename: {}'.format(log_filename))

            callbacks = [
                ModelIntervalCheckpoint(checkpoint_weights_filename,
                                        interval=250000)
            ]
            callbacks += [FileLogger(log_filename, interval=100)]

            LOGGER.info('Starting training...')
            self.agent.fit(self.env,
                           callbacks=callbacks,
                           nb_steps=self.number_of_training_steps,
                           log_interval=10000,
                           verbose=0,
                           visualize=self.visualize)
            LOGGER.info("training over.")
            LOGGER.info('Saving AGENT weights...')
            self.agent.save_weights(weights_filename, overwrite=True)
            LOGGER.info("AGENT weights saved.")
        else:
            LOGGER.info('Starting TEST...')
            self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
Exemplo n.º 13
0
 def run(self, steps):
     callbacks = [FileLogger(self.log_filename)]
     self.dqn.fit(
         self.env,
         callbacks=callbacks,
         nb_steps=steps,
         visualize=False,
         verbose=1,
         log_interval=10000,
     )
     # After training is done, we save the final weights.
     self.dqn.save_weights(self.weights_filename, overwrite=True)
Exemplo n.º 14
0
def fit_dqn(fit_steps: int, cont: bool):
    training_env = gym.make('adver-v0', target_label=1, test_mode=False)

    callbacks = [ModelIntervalCheckpoint(DQN_WEIGHTS_FILENAME, interval=WEIGHTS_CHECKPOINT)]
    callbacks += [FileLogger(DQN_LOG_FILENAME, interval=LOG_CHECKPOINT)]
    dqn = init_dqn(fit_steps)
    if cont:
        dqn.load_weights(DQN_WEIGHTS_FILENAME.format(step="final"))
        print("continue fitting from last checkpoint")

    dqn.fit(training_env, callbacks=callbacks, nb_steps=fit_steps, start_step_policy=start_policy, nb_max_start_steps=3)

    dqn.save_weights(DQN_WEIGHTS_FILENAME.format(step="final"), overwrite=True)
Exemplo n.º 15
0
def build_callbacks(env_name):
    log_dir = 'logs'
    if not exists(log_dir):
        os.makedirs(log_dir)

    checkpoint_weights_filename = join(
        log_dir, 'dqn_' + env_name + '_weights_{step}.h5f')
    log_filename = join(log_dir, 'dqn_{}_log.json'.format(env_name))
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=25000)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    return callbacks
Exemplo n.º 16
0
def main():
    """Build model and train on environment."""
    env = MarketEnv(("ES", "FUT", "GLOBEX", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=3)
    #env = MarketEnv(("AAPL", "STK", "SMART", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=4)
    nb_actions = 3      # Keras-RL CEM is a discrete agent

    # Option 1 : Simple model
    model = Sequential([
        Flatten(input_shape=(1,) + env.observation_space.shape),
        Dense(nb_actions),
        Activation('softmax')
    ])

    # Option 2: deep network
    # hidden_nodes = reduce(operator.imul, env.observation_space.shape, 1)
    # model = Sequential([
    #     Flatten(input_shape=(1,) + env.observation_space.shape),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(nb_actions),
    #     Activation('softmax')
    # ])

    print(model.summary())

    param_logger = CEMParamLogger('cem_{}_params.json'.format(env.instrument.symbol))
    callbacks = [
        param_logger,
        FileLogger('cem_{}_log.json'.format(env.instrument.symbol), interval=STEPS_PER_EPISODE)
    ]

    theta_init = param_logger.read_params()     # Start with last saved params if present
    if theta_init is not None:
        print('Starting with parameters from {}:\n{}'.format(param_logger.params_filename, theta_init))

    memory = EpisodeParameterMemory(limit=EPISODES, window_length=1)        # Remember the parameters and rewards for the last `limit` episodes.
    cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=EPISODES, nb_steps_warmup=WARMUMP_EPISODES * STEPS_PER_EPISODE, train_interval=TRAIN_INTERVAL_EPISODES, elite_frac=0.2, theta_init=theta_init, processor=DiscreteProcessor(), noise_decay_const=0, noise_ampl=0)
    """
    :param memory: Remembers the parameters and rewards for the last `limit` episodes.
    :param int batch_size: Randomly sample this many episode parameters from memory before taking the top `elite_frac` to construct the next gen parameters from.
    :param int nb_steps_warmup: Run for this many steps (total) to fill memory before training
    :param int train_interval: Train (update parameters) every this many episodes
    :param float elite_frac: Take this top fraction of the `batch_size` randomly sampled parameters from the episode memory to construct new parameters.
    """
    cem.compile()
    cem.fit(env, nb_steps=STEPS_PER_EPISODE * EPISODES, visualize=True, verbose=2, callbacks=callbacks)
    cem.save_weights('cem_{}_weights.h5f'.format(env.instrument.symbol), overwrite=True)
Exemplo n.º 17
0
    def train(self, steps=50000, interval=100, visualise=False):
        callbacks = [
            ModelIntervalCheckpoint(self.checkpoint_path, interval=10),
            FileLogger(self.log_path, interval=100),
        ]

        self.dqn.fit(env=self.env,
                     nb_steps=steps,
                     visualize=visualise,
                     callbacks=callbacks,
                     log_interval=interval,
                     nb_max_episode_steps=self.env.total_steps - 1)

        self.dqn.save_weights(self.model_path, overwrite=True)
Exemplo n.º 18
0
    def train(self, env, steps, log_interval=5000):
        self.dqn.fit(
            env,
            callbacks=[FileLogger("dqn_log.json")],
            log_interval=log_interval,
            nb_steps=steps,
            enable_dueling_network=True,  # Enable dueling
            dueling_type="avg",
            enable_double_dqn=True,  # Enable double dqn
            verbose=1,
            visualize=False,
        )

        # After training is done, we save the final weights.
        self.dqn.save_weights("dqn_weights.h5f", overwrite=True)
Exemplo n.º 19
0
def trainDQN(cfg, env, dqn):
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    callbacks = [
        ModelIntervalCheckpoint(cfg.checkpoint_weights_filename,
                                interval=250000)
    ]
    callbacks += [FileLogger(cfg.log_filename, interval=100)]
    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=cfg.nb_steps_dqn_fit,
            log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(cfg.weights_filename, overwrite=True)
Exemplo n.º 20
0
    def train(self, env):
        policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,nb_steps=1000)
        amount_memory = 10000000
        memory = SequentialMemory(limit=amount_memory, window_length=WINDOW_LENGTH)
        processor = EmptyProcessor()
        self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50, gamma=.99, target_model_update=10000,train_interval=4, delta_clip=1., enable_double_dqn=False)
        self.dqn.compile(Adam(lr=0.01), metrics=['mae'])

        weights_filename = 'dqn_{}_weights.h5f'.format(ENV_NAME)
        checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f'
        log_filename = 'dqn_{}_log.json'.format(ENV_NAME)
        callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=2500)]
        callbacks += [FileLogger(log_filename, interval=100)]
        #self.dqn.load_weights(ENV_NAME+'weights.h5f')
        self.dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000, visualize=True, verbose=2)
           # After training is done, we save the final weights one more time.
        self.dqn.save_weights(weights_filename, overwrite=True)
Exemplo n.º 21
0
 def train(self, env, nb_actions):
     weights_filename = 'dqn_{}_weights.h5f'.format(ENV_NAME)
     checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f'
     log_filename = 'dqn_{}_log.json'.format(ENV_NAME)
     callbacks = [
         ModelIntervalCheckpoint(checkpoint_weights_filename,
                                 interval=10000)
     ]
     callbacks += [FileLogger(log_filename, interval=100)]
     self.dqn.fit(env,
                  callbacks=callbacks,
                  nb_steps=1750000,
                  log_interval=10000,
                  visualize=True,
                  verbose=3)
     # After training is done, we save the final weights one more time.
     self.dqn.save_weights(weights_filename, overwrite=True)
Exemplo n.º 22
0
def main():

    # Get the environment and extract the number of actions.
    environment_name = "lawnmower-medium-obstacles-v0"
    environment = gym.make(environment_name)
    environment.print_description()
    nb_actions = environment.action_space.n

    # Build the model.
    model = build_model_cnn((WINDOW_LENGTH,) + INPUT_SHAPE, nb_actions)
    print(model.summary())

    # Create sequential memory for memory replay.
    memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)

    # Process environment inputs and outputs.
    processor = LawnmowerProcessor()

    # Use epsilon-greedy as our policy.
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                                  nb_steps=int(STEPS * 0.8))

    # Instantiate and compile our agent.
    dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
                   processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
                   train_interval=4, delta_clip=1.)
    dqn.compile(optimizers.Adam(lr=.00025), metrics=['mae'])

    # Set up some callbacks for training.
    checkpoint_weights_filename = 'dqn_' + environment_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(environment_name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [TensorboardCallback(os.path.join("tensorboard", datetime_string))]
    callbacks += [FileLogger(log_filename, interval=100)]

    # Train the agent.
    dqn.fit(environment, callbacks=callbacks, nb_steps=STEPS, log_interval=10000)

    # Save the final networkt after training.
    weights_filename = 'dqn_{}_weights.h5f'.format(environment_name)
    dqn.save_weights(weights_filename, overwrite=True)

    # Run the agent.
    dqn.test(environment, nb_episodes=10, visualize=False)
def train_model(seed=1):
    np.random.seed(seed)
    env = CameraControlEnvCont()
    env.seed(seed)

    actor, critic, action_input = define_actor_critic_models(actions=3)

    memory = SequentialMemory(limit=10000, window_length=1)

    random_process = GaussianWhiteNoiseProcess(mu=0,
                                               sigma=0.1,
                                               sigma_min=0.01,
                                               n_steps_annealing=49000,
                                               size=3)

    agent = DDPGAgent(nb_actions=3,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=500,
                      nb_steps_warmup_actor=500,
                      random_process=random_process,
                      gamma=.1,
                      target_model_update=1e-3,
                      batch_size=32)
    agent.compile([RMSprop(lr=.0001), RMSprop(lr=.01)], metrics=['mae'])

    log_filename = 'results/drone_camera_cont_control_log.json'
    model_checkpoint_filename = 'results/drone_camera_cont_cnn_weights_{step}.model'
    callbacks = [
        ModelIntervalCheckpoint(model_checkpoint_filename, interval=5000)
    ]
    callbacks += [FileLogger(log_filename, interval=1)]

    agent.fit(env,
              nb_steps=50000,
              nb_max_episode_steps=100,
              verbose=2,
              visualize=False,
              log_interval=1,
              callbacks=callbacks)
Exemplo n.º 24
0
    def train(self, max_steps=100, episodes=100):
        # Okay, now it's time to learn something! We visualize the training here for show, but this
        # slows down training quite a lot. You can always safely abort the training prematurely using
        # Ctrl + C.
        self.env._max_steps = max_steps
        #for i in range(episodes):
        self.env.current_step = 0
        n_steps = max_steps * episodes
        logger = FileLogger(
            filepath='{}/{}.json'.format(self.out_path, self.ENV_NAME))
        self.dqn.fit(self.env,
                     nb_steps=n_steps,
                     nb_max_episode_steps=max_steps,
                     visualize=False,
                     verbose=1,
                     callbacks=[logger])
        #self.env.reset()

        # After episode is done, we save the final weights.
        self.dqn.save_weights('{}/{}.h5'.format(self.out_path, self.ENV_NAME),
                              overwrite=True)
Exemplo n.º 25
0
def create_dqn(model,
               log_interval=50000,
               model_name='dqn_agent_checkpoint',
               file_log_path='./logs/log.txt',
               tensorboard_path='./logs/tensorboard/'):
    model_path = './models/' + model_name + '.h5'
    file_logger = FileLogger(file_log_path, interval=log_interval)
    checkpoint = ModelIntervalCheckpoint(model_path, interval=log_interval)
    tensorboard = TensorboardLogger(tensorboard_path)
    callbacks = [file_logger, checkpoint, tensorboard]

    # Use 4 last observation - history_length = 4
    memory = SequentialMemory(limit=500000, window_length=history_length)

    # Use combine of BoltzmannQPolicy and EpsGreedyQPolicy
    policy = MaxBoltzmannQPolicy()

    # Set epsilon to 1.0 and decrease it over every step to stop taking random action when map is explored
    policy = LinearAnnealedPolicy(inner_policy=policy,
                                  attr='eps',
                                  value_max=1.0,
                                  value_min=0.1,
                                  value_test=0.04,
                                  nb_steps=NUMBER_OF_STEPS)

    # Create an instance of DQNAgent from keras-rl
    dqn = DQNAgent(model=model,
                   nb_actions=env.action_space.n,
                   memory=memory,
                   policy=policy,
                   processor=CustomProcessor(),
                   nb_steps_warmup=512,
                   enable_dueling_network=True,
                   dueling_type='avg',
                   target_model_update=5e2,
                   batch_size=32)

    dqn.compile(Adam(lr=5e-4), metrics=['mae'])

    return dqn, callbacks
Exemplo n.º 26
0
    def train(self, env: Env, base_folder: str, nbr_steps: int=1000):
        """
        Trains the agent
        :param env: The environment to train on
        :param base_folder: 
        :param nbr_steps: 
        """
        timef = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

        # filenames
        weights_out_filename = 'dqn_weights_{t}_trained_{nbr}.h5f'.format(t=timef, nbr=nbr_steps)
        checkpoint_weights_filename = 'dqn_checkpoint_weights_'+timef+'_{step}.h5f'
        log_filename = 'trainlog_dqn_{}.json'.format(timef)

        # full path of files
        template = '{folder}/{filename}'
        weights_out_file        = template.format(folder=base_folder, filename=weights_out_filename)
        checkpoint_weights_file = template.format(folder=base_folder, filename=checkpoint_weights_filename)
        log_file                = template.format(folder=base_folder, filename=log_filename)

        # Make sure the files/folders exists
        make_sure_path_exists(base_folder)
        for fn in [weights_out_file, log_file]:
            if not os.path.exists(fn):
                with open(fn, "w"):
                    pass

        callbacks = [ModelIntervalCheckpoint(checkpoint_weights_file, interval=ceil(nbr_steps//5))]  # 5 checkpoints
        callbacks += [FileLogger(log_file, interval=ceil(nbr_steps//100))]  # update 100 times

        # set LinearAnnealedPolicy policy such that tau reaches min value at 75% of nbr_steps
        self.agent.policy = LinearAnnealedPolicy(BoltzmannQPolicy(clip=(-500, 300)), attr='tau', value_max=0.8, value_min=0.3, value_test=0.01, nb_steps=ceil(nbr_steps*0.8))
        # self.agent.policy = BoltzmannQPolicy(clip=(-500, 300), tau=0.1)

        self.agent.fit(env, nb_steps=nbr_steps, visualize=False, verbose=0, nb_max_start_steps=0, callbacks=callbacks)

        logger.info("saving the weights to {}".format(weights_out_file))
        self.agent.save_weights(weights_out_file, overwrite=True)
Exemplo n.º 27
0
    def train(self, env):
        '''
        Trains the agent
        '''
        checkpoint_weights_filename = 'brain_weights_{step}.h5f'
        log_filename = 'brain_log.json'
        callbacks = [
            ModelIntervalCheckpoint(checkpoint_weights_filename, interval=25000),
            FileLogger(log_filename, interval=100)
        ]

        # Training our agent
        self.agent.fit(env,
                       callbacks=callbacks,
                       nb_steps=175000,
                       log_interval=10000,
                       verbose=0)  # just to remove the progress bar

        # After training is done, we save the final weights one more time.
        self.agent.save_weights(self.weights_filename, overwrite=True)

        # Finally, evaluate our algorithm for 10 episodes.
        self.agent.test(env, nb_episodes=10, visualize=False)
def train_agent(dqn, env, weights_filename):
    """
    trainning
    """
    checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(ENV_NAME)

    callbacks = [
        ModelIntervalCheckpoint(
            checkpoint_weights_filename,
            interval=CALLBACK_INTERVAL
        )
    ]
    callbacks += [FileLogger(log_filename, interval=LOG_INTERVAL)]
    dqn.fit(
        env,
        callbacks=callbacks,
        nb_steps=MAX_STEPS,
        log_interval=LOG_INTERVAL
    )
    dqn.save_weights(
        weights_filename,
        overwrite=True
    )
Exemplo n.º 29
0
               target_model_update=10000,
               train_interval=4,
               delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks!
    weights_filename = '/content/gdrive/My Drive/dqn_{}_weights.h5f'.format(
        args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = '/content/gdrive/My Drive/dqn_{}_log.json'.format(
        args.env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = '/content/gdrive/My Drive/dqn_{}_weights.h5f'.format(
        args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True)
def main():

    # Get the environment and extract the number of actions.
    environment_name = "FlappyBird-v0"
    environment = gym.make(environment_name)
    np.random.seed(666)
    nb_actions = environment.action_space.n

    # Build the model.
    model = build_model((WINDOW_LENGTH, ) + INPUT_SHAPE, nb_actions)
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
    processor = FlappyBirdProcessor()

    # Select a policy. We use eps-greedy action selection, which means that a random action is selected
    # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
    # the agent initially explores the environment (high eps) and then gradually sticks to what it knows
    # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
    # so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=1000000)

    # The trade-off between exploration and exploitation is difficult and an on-going research topic.
    # If you want, you can experiment with the parameters or use a different policy. Another popular one
    # is Boltzmann-style exploration:
    # policy = BoltzmannQPolicy(tau=1.)
    # Feel free to give it a try!

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=50000,
                   gamma=.99,
                   target_model_update=10000,
                   train_interval=4,
                   delta_clip=1.)
    dqn.compile(optimizers.Adam(lr=.00025), metrics=['mae'])

    weights_filename = 'dqn_{}_weights.h5f'.format(environment_name)

    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks!
    checkpoint_weights_filename = 'dqn_' + environment_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(environment_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [TensorboardCallback()]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(environment,
            callbacks=callbacks,
            nb_steps=1750000,
            log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(environment, nb_episodes=10, visualize=False)