Exemplo n.º 1
0
    def __init__(self, env: gym.Env, logger=Logger()):
        nb_actions = env.action_space.shape[0]

        model = Sequential()
        model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions))
        model.add(Activation('linear'))

        policy = BoltzmannQPolicy()
        memory = SequentialMemory(limit=100000, window_length=1)
        agent = DQNAgent(model=model,
                         nb_actions=nb_actions,
                         memory=memory,
                         nb_steps_warmup=10,
                         target_model_update=1e-2,
                         policy=policy)
        agent.compile(Adam(lr=1e-3), metrics=['mae'])
        self.agent = agent
        self.env = env
        super().__init__(env, logger)
Exemplo n.º 2
0
def run():
    env = game_env.MeleeEnv()
    nb_actions = env.action_space.shape[0]
    actor = build_network(env, nb_actions)
    critic, action_input = build_critic(env, nb_actions)
    memory = SequentialMemory(limit=25000)
    #random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3,  size=nb_actions)
    agent = DQNAgent(
        batch_size=1000,
        nb_actions=nb_actions,
        model=
        actor,  #processor=Process(), #window_length=4,#critic_action_input=action_input,
        memory=memory,
        nb_steps_warmup=100
    )  # nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
    #random_process=random_process, gamma=.95, target_model_update=1e-1)#,
    ##delta_range=(-10., 10.))
    agent.compile(RMSprop(lr=.0005), metrics=['mae'])

    agent.fit(env,
              nb_steps=100000,
              visualize=True,
              verbose=1,
              nb_max_start_steps=100,
              start_step_policy=lambda x: np.random.randint(nb_actions))
    # After training is done, we save the final weights.
    agent.save_weights('ddpg_{}_weights.h5f'.format(
        str(random.randrange(0, 100000))),
                       overwrite=True)
    def agent(self):
        nb_actions = self.env.action_space.n
        model = self.build()
        print(model.summary())

        memory = SequentialMemory(limit=50000, window_length=1)
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=32,
                       enable_dueling_network=True,
                       target_model_update=1e-2,
                       policy=InformedBoltzmannGumbelQPolicy(self.env),
                       test_policy=InformedGreedyQPolicy(self.env),
                       batch_size=32,
                       train_interval=32)
        dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        if self.initial_weights_file is not None:
            try:
                dqn.load_weights(self.initial_weights_file)
            except:
                # just skip loading
                pass

        return dqn
def build_agent(model, actions):
    '''Build Agent'''
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr='eps',
        value_max=1.,
        value_min=.1,
        value_test=.2,
        nb_steps=10000
    )
    memory = SequentialMemory(
        limit=1000000,
        window_length=3
    )
    DQN_agent = DQNAgent(
        model=model,
        memory=memory,
        policy=policy,
        enable_dueling_network=True,
        dueling_type='avg',
        nb_actions=actions,
        nb_steps_warmup=1000
    )
    DQN_agent.compile(optimizer=Adam(lr=0.00025), metrics=['mae', 'accuracy'])
    return DQN_agent
Exemplo n.º 5
0
    def _build_dqn(nb_actions, nb_states):
        # build network
        model = Sequential()
        model.add(Flatten(input_shape=(1, nb_states)))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions, activation='linear'))

        # build agent
        memory = SequentialMemory(limit=10240, window_length=1)
        policy = BoltzmannQPolicy()
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=10,
                       enable_dueling_network=True,
                       dueling_type='avg',
                       target_model_update=1e-2,
                       policy=policy)
        dqn.compile(Adam(), metrics=['mae'])

        return dqn
    def agent(self):
        nb_actions = self.env.action_space.n
        obs_dim = self.env.observation_space.shape
        model = Sequential()
        model.add(Flatten(input_shape=(1, obs_dim)))
        model.add(Dense(nb_actions, activation='linear'))
        print(model.summary())

        memory = SequentialMemory(limit=50000, window_length=1)
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=256,
                       enable_dueling_network=True,
                       target_model_update=1e-2,
                       policy=InformedBoltzmannGumbelQPolicy(self.env),
                       test_policy=InformedGreedyQPolicy(self.env),
                       batch_size=128,
                       train_interval=128)
        dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        if self.initial_weights_file is not None:
            dqn.load_weights(self.initial_weights_file)
            self.train_episodes = 0

        return dqn
Exemplo n.º 7
0
def test_double_dqn():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1, )))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    memory = SequentialMemory(limit=1000, window_length=1)
    policy = EpsGreedyQPolicy(eps=.1)
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=50,
                   target_model_update=1e-1,
                   policy=policy,
                   enable_double_dqn=True)
    dqn.compile(Adam(lr=1e-3))

    dqn.fit(env, nb_steps=2000, visualize=False, verbose=0)
    policy.eps = 0.
    h = dqn.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
def build_agent(model, nb_actions):
    """
    build an agent
    """
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr='eps',
        value_max=MAX_EPSILON,
        value_min=MIN_EPSILON,
        value_test=TEST_EPSILON,
        nb_steps=MAX_STEPS
    )
    memory = SequentialMemory(
        limit=MAX_STEPS,
        window_length=WINDOW_WIDTH
    )
    dqn = DQNAgent(
        model=model,
        memory=memory,
        policy=policy,
        enable_dueling_network=True,
        dueling_type='avg',
        nb_actions=nb_actions,
        nb_steps_warmup=WARMUP_STEPS
    )
    dqn.compile(Adam(learning_rate=LEARNING_RATE), metrics=['mae'])
    return dqn
def init_dqn(env, nb_actions):
    """ Initialize the DQN agent using the keras-rl package.

    :param env: the environment to be played, required to determine the input size
    :param nb_actions: number of actions
    :return: DQN Agent
    """
    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # compile agent
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=10,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.model_name = f"DQN"
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])
    return dqn
Exemplo n.º 10
0
def make_dqn_rl_agent(processor: Processor_56x5,
                      nbr_layers=2,
                      enable_dueling_network: bool = False,
                      enable_double_dqn: bool = True):
    """
    
    :param processor: 
    :param nbr_layers: 
    :param enable_dueling_network:
    :param enable_double_dqn:
    :return: 
    """

    model = processor.create_model(nbr_layers=nbr_layers)
    test_policy = GreedyQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)

    dqn_agent = DQNAgent(model=model,
                         nb_actions=NBR_TICHU_ACTIONS,
                         memory=memory,
                         nb_steps_warmup=100,
                         target_model_update=1e-2,
                         test_policy=test_policy,
                         processor=processor,
                         enable_dueling_network=enable_dueling_network,
                         enable_double_dqn=enable_double_dqn)
    dqn_agent.compile(Adam(lr=1e-3), metrics=['mae'])
    return dqn_agent
Exemplo n.º 11
0
def build_model(env, num_actions):
    input = Input(shape=(1, env.observation_space.shape[0]))
    x = Flatten()(input)
    x = Dense(128, activation='relu')(x)  #128
    x = Dense(64, activation='relu')(x)  #64
    x = Dense(32, activation='relu')(x)  #32
    output = Dense(num_actions, activation='linear')(x)
    model = Model(inputs=input, outputs=output)
    print(model.summary())

    memory = SequentialMemory(limit=50000, window_length=1)
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=10000)
    # policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=num_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    return dqn
class DeepAgent:
    """
    This algorithm is trying to use a DQN agent that learns himself just given a gym.
    After quite some trouble with various error messages, this now at least runs and trains.
    It does not yet achieve good results.

    Best result: ???
    """
    def __init__(self, shape, action_count: int):
        super().__init__()

        inp = Input(shape=shape)
        flat = Flatten()(inp)

        # Activation: relu, sigmoid, ...
        hidden1 = Dense(256, activation='relu')(flat)
        hidden2 = Dense(64, activation='relu')(hidden1)
        hidden3 = Dense(16, activation='relu')(hidden2)
        output = Dense(action_count, activation='softmax')(hidden3)

        self.model = Model(inputs=inp, outputs=output)
        print(self.model.summary())

        self.memory = SequentialMemory(limit=50000,
                                       window_length=WINDOW_LENGTH)
        self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                           attr='eps',
                                           value_max=1.,
                                           value_min=.1,
                                           value_test=.05,
                                           nb_steps=1000)
        self.callbacks = self.build_callbacks("msnake")
        self.dqn = DQNAgent(model=self.model,
                            nb_actions=action_count,
                            memory=self.memory,
                            nb_steps_warmup=50,
                            target_model_update=1e-2,
                            policy=self.policy)

        Adam._name = "fix_bug"  # https://github.com/keras-rl/keras-rl/issues/345
        # Metrics: mae, mse, accuracy
        # LR: learning rate
        self.dqn.compile(Adam(lr=1e-5), metrics=['mse'])

    def build_callbacks(self, env_name):
        callbacks = []

        checkpoint_weights_filename = 'dqn_' + env_name + '_weights_{step}.h5f'
        callbacks += [
            ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000)
        ]

        log_filename = 'dqn_{}_log.json'.format(env_name)
        callbacks += [FileLogger(log_filename, interval=100)]
        return callbacks
Exemplo n.º 13
0
class QLearningAgent(Agent):
    def __init__(self, state_dim, action_space, epsilon, lr):
        self._model = self._get_model(state_dim, action_space)
        self.agent = DQNAgent(self._model,
                              policy=EpsGreedyQPolicy(epsilon),
                              test_policy=EpsGreedyQPolicy(eps=0.01))

        self.agent.compile(Adam(lr))

    def model_summary(self):
        print(self._model.summary())
Exemplo n.º 14
0
def train(learn_rate, model_update_interval, steps):
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=50000,
                   target_model_update=model_update_interval,
                   policy=policy,
                   gamma=.99,
                   train_interval=4)
    dqn.compile(Adam(lr=learn_rate), metrics=['mae'])
    dqn.fit(env, nb_steps=steps, verbose=2, visualize=VISUALIZE)
    dqn.save_weights(SAVEFILE_FOLDER + "/dqn_pong_params.h5f", overwrite=True)
Exemplo n.º 15
0
class DqnAgent(Agent):
    def __init__(self,
                 env: gym.Env,
                 memory=SequentialMemory(limit=50000, window_length=1),
                 logger=Logger(),
                 boxes_resolution=10,
                 nb_steps_warmup=20,
                 hidden_layers=[16, 16, 16],
                 policy=BoltzmannQPolicy(),
                 target_model_update=1e-2,
                 optimizer=Adam(lr=1e-3)):

        self.env = env

        if isinstance(boxes_resolution, int):
            boxes_resolution = (boxes_resolution, ) * len(
                env.action_space.shape)

        self.boxes_resolution = boxes_resolution
        self.nb_actions = np.zeros(boxes_resolution).size

        model = Sequential()
        model.add(Flatten(input_shape=(1, ) +
                          env.observation_space.shape))  # TODO check this
        for l in hidden_layers:
            model.add(Dense(l, activation='relu'))
        model.add(Dense(self.nb_actions,
                        activation='linear'))  # TODO move this to util file?

        self.model = model
        print("dqn model summary :{0}".format(model.summary()))

        self.dqn = DQNAgent(model=model,
                            nb_actions=self.nb_actions,
                            memory=memory,
                            nb_steps_warmup=nb_steps_warmup,
                            target_model_update=target_model_update,
                            policy=policy,
                            processor=DqnProcessor(self.boxes_resolution,
                                                   env.action_space.low,
                                                   env.action_space.high))
        self.dqn.compile(optimizer=optimizer, metrics=['mae'])
        super().__init__(env, logger)

    def act(self, state, explore):
        action = self.dqn.processor.process_action(self.dqn.forward(state))
        return action

    def train(self, nb_episodes=1000, verbose=2, visualize=True):
        self.dqn.fit(env=self.env,
                     nb_steps=nb_episodes,
                     verbose=verbose,
                     visualize=visualize)
Exemplo n.º 16
0
def setupDQN(cfg, nb_actions, processor):
    image_in = Input(shape=cfg.input_shape, name='main_input')
    input_perm = Permute((2, 3, 1), input_shape=cfg.input_shape)(image_in)
    conv1 = Conv2D(32, (8, 8), activation="relu", strides=(4, 4),
                   name='conv1')(input_perm)
    conv2 = Conv2D(64, (4, 4), activation="relu", strides=(2, 2),
                   name='conv2')(conv1)
    conv3 = Conv2D(64, (3, 3), activation="relu", strides=(1, 1),
                   name='conv3')(conv2)
    conv_out = Flatten(name='flat_feat')(conv3)
    dense_out = Dense(512, activation='relu')(conv_out)
    q_out = Dense(nb_actions, activation='linear')(dense_out)
    model = Model(inputs=[image_in], outputs=[q_out])
    print(model.summary())
    # hstate_size = int(np.prod(conv3.shape[1:]))

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=cfg.memory_limit,
                              window_length=cfg.WINDOW_LENGTH)

    # Select a policy. We use eps-greedy action selection, which means that a random action is selected
    # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
    # the agent initially explores the environment (high eps) and then gradually sticks to what it knows
    # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
    # so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=cfg.nb_steps_annealed_policy)

    # The trade-off between exploration and exploitation is difficult and an on-going research topic.
    # If you want, you can experiment with the parameters or use a different policy. Another popular one
    # is Boltzmann-style exploration:
    # policy = BoltzmannQPolicy(tau=1.)
    # Feel free to give it a try!

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=cfg.nb_steps_warmup_dqn_agent,
                   gamma=.99,
                   target_model_update=cfg.target_model_update_dqn_agent,
                   train_interval=4,
                   delta_clip=1.)
    dqn.compile(Adam(lr=.00025), metrics=['mae'])

    return dqn
class DeepAgentConvolution:
    """
    This algorithm is trying to use a DQN agent that learns himself just given a gym.
    At the moment, it cannot successfully work with convolution:
    Error when checking input: expected input_1 to have 4 dimensions, but got array with shape (1, 1, 20, 10, 3)

    Best result: ???
    """

    def __init__(self, shape, action_count: int):
        super().__init__()

        inp = Input(shape=shape)

        # Convolution part (image recognition / feature extraction)
        conv = Conv2D(16, kernel_size=2, padding="same")(inp)
        conv = Conv2D(8, kernel_size=2)(conv)

        # Classification (decision making)
        flat = Flatten()(conv)
        # Activation: relu, sigmoid, ...
        hidden = Dense(256, activation='relu')(flat)
        hidden = Dense(64, activation='relu')(hidden)
        hidden = Dense(16, activation='relu')(hidden)
        output = Dense(action_count, activation='softmax')(hidden)

        self.model = Model(inputs=inp, outputs=output)
        print(self.model.summary())

        self.memory = SequentialMemory(limit=50000, window_length=WINDOW_LENGTH)
        self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                                           nb_steps=1000)
        self.callbacks = self.build_callbacks("msnake")
        self.dqn = DQNAgent(model=self.model, nb_actions=action_count, memory=self.memory,  nb_steps_warmup=20,
                            target_model_update=1e-2, policy=self.policy)

        Adam._name = "fix_bug"  # https://github.com/keras-rl/keras-rl/issues/345
        # Metrics: mae, mse, accuracy
        # LR: learning rate
        self.dqn.compile(Adam(lr=1e-5), metrics=['mse'])

    def build_callbacks(self, env_name):
        callbacks = []

        checkpoint_weights_filename = 'dqn_' + env_name + '_weights_{step}.h5f'
        callbacks += [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000)]

        log_filename = 'dqn_{}_log.json'.format(env_name)
        callbacks += [FileLogger(log_filename, interval=100)]
        return callbacks
Exemplo n.º 18
0
    def build_agent(self, mem_file=None, w_file=None):
        #Create a dummy env to get size of input/output.
        #Makes it simpler if we ever choose to update env shapes.
        env = TradingEnv([], "", [])
        np.random.seed(314)
        env.seed(314)

        nb_actions = env.action_space.n
        obs_dim = env.observation_space.shape[0]
        model = Sequential()
        model.add(
            LSTM(5, input_shape=(7, 4),
                 return_sequences=True))  # 4 features + 1 bias term. 5 neurons
        model.add(Activation('tanh'))
        model.add(LSTM(4))
        model.add(Activation('tanh'))
        model.add(Dropout(0.2))
        model.add(Dense(4))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions))
        model.add(Activation('linear'))  #Best activation for BoltzmanPolicy

        #policy = EpsGreedyQPolicy(eps=EPS_VAL) #Off policy
        policy = BoltzmannQPolicy()  #Off-policy
        test_policy = MaxBoltzmannQPolicy()  #On-policy
        memory = None
        if mem_file is None:
            memory = SequentialMemory(
                limit=50000,
                window_length=7)  ## returns observations of len (7,)
        else:
            (memory, memory.actions, memory.rewards, memory.terminals,
             memory.observations) = pickle.load(open(mem_file, "rb"))

        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       gamma=GAMMA_VAL,
                       nb_steps_warmup=100,
                       policy=policy,
                       test_policy=test_policy)
        dqn.compile("adam", metrics=['mse'])

        if w_file is not None:
            model.load_weights(w_file)

        return dqn, env, memory
Exemplo n.º 19
0
class KerasDQNAgent(Agent):
    """ Wrapper on Keras DQN agent """

    _internal_agent: DQNAgent

    def __init__(self) -> None:
        super().__init__()

    def set_num_states(self, state_dimension: int, num_actions: int) -> None:
        model = self._build_model(state_dimension, num_actions)
        memory = SequentialMemory(limit=10000, window_length=1)
        self._internal_agent = DQNAgent(model=model,
                                        nb_actions=num_actions,
                                        memory=memory,
                                        nb_steps_warmup=1000,
                                        target_model_update=1000,
                                        gamma=0.99,
                                        delta_clip=1)

        self._internal_agent.compile(Adam(lr=0.0001), metrics=['mae'])

    def act(self, state: StateT) -> int:
        return self._internal_agent.forward(state)

    def update(self, state: StateT, action: int, reward: float,
               new_state: StateT) -> None:
        self._internal_agent.backward()

    def _build_model(self, state_dimension: int,
                     num_actions: int) -> Sequential:
        model = Sequential()
        model.add(
            Dense(units=64,
                  input_shape=(1, state_dimension),
                  activation='relu'))
        model.add(Dense(units=64, activation='relu'))
        model.add(Flatten())
        model.add(Dense(num_actions, activation='softmax'))
        return model
Exemplo n.º 20
0
def test_duel_dqn():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1,)))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))

    memory = SequentialMemory(limit=1000, window_length=1)
    policy = EpsGreedyQPolicy(eps=.1)
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50,
                   target_model_update=1e-1, policy=policy, enable_double_dqn=False, enable_dueling_network=True)
    dqn.compile(Adam(lr=1e-3))

    dqn.fit(env, nb_steps=2000, visualize=False, verbose=0)
    policy.eps = 0.
    h = dqn.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
Exemplo n.º 21
0
def create(env):
  nb_actions = env.action_space.n

  # Next, we build a very simple model.
  model = Sequential()
  model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
  model.add(Dense(config.current.agent_vfn_complexity))
  model.add(Activation('relu'))
  model.add(Dense(config.current.agent_vfn_complexity))
  model.add(Activation('relu'))
  model.add(Dense(config.current.agent_vfn_complexity))
  model.add(Activation('relu'))
  model.add(Dense(nb_actions))
  model.add(Activation('linear'))
  #print(model.summary())

  # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
  # even the metrics!
  memory = SequentialMemory(limit=50000, window_length=1)
  policy = BoltzmannQPolicy()
  dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
                 target_model_update=1e-2, policy=policy)
  dqn.compile(Adam(lr=1e-3), metrics=['mae'])
  return dqn
Exemplo n.º 22
0
model.add(Dropout(0.5))
model.add(Dense(1000, activation="relu"))
model.summary()

# %%
# Finally, we configure and compile our agent. You can use every built-in tensorflow.keras optimizer and
# even the metrics!
policy = EpsGreedyQPolicy()
memory = SequentialMemory(limit=5000, window_length=1)
agent = DQNAgent(model=model,
                 memory=memory,
                 policy=policy,
                 nb_actions=nb_actions,
                 nb_steps_warmup=500,
                 target_model_update=1e-2)
agent.compile(Adam(lr=1e-3), metrics=['mse'])

# %%
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env,
          nb_steps=50000,
          visualize=False,
          verbose=1,
          nb_max_episode_steps=1000)

# %%
# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
#policies:
#callback
class EpsDecayCallback(Callback):
    def __init__(self, eps_policy, decay_rate=0.95):
        self.eps_policy = eps_policy
        self.decay_rate = decay_rate

    def on_episode_begin(self, episode, logs={}):
        self.eps_policy.eps *= self.decay_rate


policy = EpsGreedyQPolicy(eps=1.0)
memory = SequentialMemory(limit=500000, window_length=1)

agent = DQNAgent(model=Network(),
                 policy=policy,
                 memory=memory,
                 enable_double_dqn=False,
                 nb_actions=env.action_space.n,
                 nb_steps_warmup=10,
                 target_model_update=1e-2)

agent.compile(optimizer=Adam(lr=0.002, decay=2.25e-05), metrics=['mse'])

agent.fit(env=env,
          callbacks=[EpsDecayCallback(eps_policy=policy, decay_rate=0.975)],
          verbose=2,
          nb_steps=300000)
agent.save_weights('model.hdf5')

agent.test(env=env, nb_episodes=100, visualize=True)
Exemplo n.º 24
0
    # inputs = layers.Input(shape=(84, 84, 4,))
    inputs = layers.Input(shape=(4, ) + state_size)
    layer1 = layers.Conv2D(32, 8, strides=4, activation="relu")(inputs)
    layer2 = layers.Conv2D(64, 4, strides=2, activation="relu")(layer1)
    layer3 = layers.Conv2D(64, 3, strides=1, activation="relu")(layer2)
    layer4 = layers.Flatten()(layer3)
    layer5 = layers.Dense(512, activation="relu")(layer4)
    action = layers.Dense(num_actions, activation="linear")(layer5)
    return k.Model(inputs=inputs, outputs=action)


model = build_model(state_size, num_actions)
model.summary()

policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                              attr='eps',
                              value_max=1.,
                              value_min=.1,
                              value_test=0.1,
                              nb_steps=1000000)
memory = SequentialMemory(limit=1000000, window_length=4)
agent = DQNAgent(model=model,
                 policy=policy,
                 nb_actions=num_actions,
                 memory=memory,
                 nb_steps_warmup=50000)
agent.compile(k.optimizers.Adam(learning_rate=.00025), metrics=['mae'])

agent.fit(env, nb_steps=100000, log_interval=1000, visualize=False, verbose=2)
agent.save_weights('policy.h5', overwrite=True)
Exemplo n.º 25
0
                        action='store_true',
                        help='reset weights on current model')
    parser.add_argument('--train',
                        action='store_true',
                        help='train with existing model')
    parser.add_argument('--visualize',
                        action='store_true',
                        help='visualize model')

    return parser.parse_args()


if __name__ == '__main__':
    args = build_arg_parser()
    env = gym.make('MountainCar-v0')
    model = build_model(env, args.reset_weights)

    dqn = DQNAgent(model=model,
                   nb_actions=env.action_space.n,
                   memory=SequentialMemory(limit=50000, window_length=1),
                   nb_steps_warmup=10,
                   target_model_update=1e-2,
                   policy=BoltzmannQPolicy())
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    if args.train:
        dqn.fit(env, nb_steps=150000, visualize=False, verbose=2)
        dqn.save_weights('model.mdl', overwrite=True)

    if args.visualize:
        dqn.test(env, nb_episodes=5, visualize=True)

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=1000000, window_length=10)
processor = AtariProcessor()

# Select a policy. We use eps-greedy action selection, which means that a random action is selected
# with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
# the agent initially explores the environment (high eps) and then gradually sticks to what it knows
# (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
# so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                              nb_steps=1000000)

# The trade-off between exploration and exploitation is difficult and an on-going research topic.
# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
               processor=processor, nb_steps_warmup=50, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

dqn.fit(env, nb_steps=1750000, log_interval=10000, nb_max_episode_steps=50)



Exemplo n.º 27
0
class Player:
    """Mandatory class with the player methods"""
    def __init__(self, name='DQN', load_model=None, env=None):
        """Initiaization of an agent"""
        self.equity_alive = 0
        self.actions = []
        self.last_action_in_stage = ''
        self.temp_stack = []
        self.name = name
        self.autoplay = True

        self.dqn = None
        self.model = None
        self.env = env

        # if load_model:
        #     self.model = self.load_model(load_model)

    def initiate_agent(self,
                       env,
                       model_name=None,
                       load_memory=None,
                       load_model=None,
                       load_optimizer=None,
                       load_dqn=None,
                       batch_size=500,
                       learn_rate=1e-3):
        """initiate a deep Q agent"""
        # tf.compat.v1.disable_eager_execution()

        self.env = env

        nb_actions = self.env.action_space.n

        if load_model:
            pass
        #     self.model, trainable_model, target_model = self.load_model(load_model)
        #     print(self.model.history)

        else:
            pass

        self.model = Sequential()
        self.model.add(
            Dense(512, activation='relu', input_shape=env.observation_space))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(nb_actions, activation='linear'))

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        if load_memory:
            # print(load_memory)
            # exit()
            try:
                memory = self.load_memory(load_memory)

            except:
                pass

        else:
            memory = SequentialMemory(limit=memory_limit,
                                      window_length=window_length)

        self.batch_size = batch_size
        self.policy = CustomEpsGreedyQPolicy()
        self.policy.env = self.env

        self.test_policy = CustomEpsGreedyQPolicy()
        self.test_policy.eps = 0.05
        self.test_policy.env = self.env

        self.reduce_lr = ReduceLROnPlateau(monitor='loss',
                                           factor=0.2,
                                           patience=5,
                                           min_lr=1e-4)

        nb_actions = env.action_space.n
        self.dqn = DQNAgent(model=self.model,
                            nb_actions=nb_actions,
                            memory=memory,
                            nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2,
                            policy=self.policy,
                            test_policy=self.test_policy,
                            processor=CustomProcessor(),
                            batch_size=self.batch_size,
                            train_interval=train_interval,
                            enable_double_dqn=enable_double_dqn)

        # timestr = time.strftime("%Y%m%d-%H%M%S") + "_" + str(model_name)
        # self.tensorboard = MyTensorBoard(log_dir='./Graph/{}'.format(timestr), player=self)
        self.dqn.compile(Adam(lr=learn_rate), metrics=['mae'])

        if load_model:
            self.load_model(load_model)
            # self.dqn.trainable_model = trainable_model
            # self.dqn.target_model = target_model

        # self.reduce_lr = ReduceLROnPlateau

        if load_optimizer:
            self.load_optimizer_weights(load_optimizer)

    def start_step_policy(self, observation):
        """Custom policy for random decisions for warm up."""
        log.info("Random action")
        _ = observation
        legal_moves_limit = [
            move.value for move in self.env.info['legal_moves']
        ]
        action = np.random.choice(legal_moves_limit)

        return action

    def train(self, env_name, batch_size=500, policy_epsilon=0.2):
        """Train a model"""
        # initiate training loop

        train_vars = {
            'batch_size': batch_size,
            'policy_epsilon': policy_epsilon
        }

        timestr = time.strftime("%Y%m%d-%H%M%S") + "_" + str(env_name)
        tensorboard = TensorBoard(log_dir='./Graph/{}'.format(timestr),
                                  histogram_freq=0,
                                  write_graph=True,
                                  write_images=False)
        self.dqn.fit(self.env,
                     nb_max_start_steps=nb_max_start_steps,
                     nb_steps=nb_steps,
                     visualize=False,
                     verbose=2,
                     start_step_policy=self.start_step_policy,
                     callbacks=[tensorboard])

        self.policy.eps = policy_epsilon

        self.dqn.save_weights("dqn_{}_model.h5".format(env_name),
                              overwrite=True)

        # Save memory
        pickle.dump(self.dqn.memory,
                    open("train_memory_{}.p".format(env_name), "wb"))

        # Save optimizer weights
        symbolic_weights = getattr(self.dqn.trainable_model.optimizer,
                                   'weights')
        optim_weight_values = K.batch_get_value(symbolic_weights)
        pickle.dump(optim_weight_values,
                    open('optimizer_weights_{}.p'.format(env_name), "wb"))

        # # Dump dqn
        # pickle.dump(self.dqn, open( "dqn_{}.p".format(env_name), "wb" ))

        # Finally, evaluate our algorithm for 5 episodes.
        self.dqn.test(self.env, nb_episodes=5, visualize=False)

    def load_model(self, env_name):
        """Load a model"""

        # Load the architecture
        # with open('dqn_{}_json.json'.format(env_name), 'r') as architecture_json:
        #     dqn_json = json.load(architecture_json)

        self.dqn.load_weights("dqn_{}_model.h5".format(env_name))
        # model = keras.models.load_model("dqn_{}_model.h5".format(env_name))
        # trainable_model = keras.models.load_model("dqn_{}_trainable_model.h5".format(env_name))
        # target_model = keras.models.load_model("dqn_{}_target_model.h5".format(env_name), overwrite=True)

        # return model, trainable_model, target_model

    def load_memory(self, model_name):
        memory = pickle.load(open('train_memory_{}.p'.format(model_name),
                                  "rb"))
        return memory

    def load_optimizer_weights(self, env_name):
        optim_weights = pickle.load(
            open('optimizer_weights_{}.p'.format(env_name), "rb"))
        self.dqn.trainable_model.optimizer.set_weights(optim_weights)

    def play(self, nb_episodes=5, render=False):
        """Let the agent play"""
        memory = SequentialMemory(limit=memory_limit,
                                  window_length=window_length)
        policy = CustomEpsGreedyQPolicy()

        class CustomProcessor(Processor):  # pylint: disable=redefined-outer-name
            """The agent and the environment"""
            def process_state_batch(self, batch):
                """
                Given a state batch, I want to remove the second dimension, because it's
                useless and prevents me from feeding the tensor into my CNN
                """
                return np.squeeze(batch, axis=1)

            def process_info(self, info):
                processed_info = info['player_data']
                if 'stack' in processed_info:
                    processed_info = {'x': 1}
                return processed_info

        nb_actions = self.env.action_space.n

        self.dqn = DQNAgent(model=self.model,
                            nb_actions=nb_actions,
                            memory=memory,
                            nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2,
                            policy=policy,
                            processor=CustomProcessor(),
                            batch_size=batch_size,
                            train_interval=train_interval,
                            enable_double_dqn=enable_double_dqn)
        self.dqn.compile(Adam(lr=1e-3), metrics=['mae'])  # pylint: disable=no-member

        self.dqn.test(self.env, nb_episodes=nb_episodes, visualize=render)

    def action(self, action_space, observation, info):  # pylint: disable=no-self-use
        """Mandatory method that calculates the move based on the observation array and the action space."""
        _ = observation  # not using the observation for random decision
        _ = info

        this_player_action_space = {
            Action.FOLD, Action.CHECK, Action.CALL, Action.RAISE_POT,
            Action.RAISE_HALF_POT, Action.RAISE_2POT
        }
        _ = this_player_action_space.intersection(set(action_space))

        action = None
        return action
Exemplo n.º 28
0
model = Sequential()
model.add(Flatten(input_shape=(1, ) + state_size))
model.add(Dense(params.SIZE_HIDDEN_LAYER, activation='relu'))
model.add(Dense(params.SIZE_HIDDEN_LAYER, activation='relu'))
model.add(Dense(params.SIZE_HIDDEN_LAYER, activation='relu'))
model.add(Dense(action_size))
model.add(Activation(params.ACTIVATION_OUTPUT))

## Set up the agent for training ##
memory = SequentialMemory(limit=params.REPLAY_BUFFER_SIZE, window_length=1)
agent = DQNAgent(model=model,
                 policy=BoltzmannQPolicy(),
                 memory=memory,
                 nb_actions=action_size)

agent.compile(Adam(lr=params.LR_MODEL), metrics=[params.METRICS])

## Train ##
if args.train:
    check_overwrite('DQN', params.ENV, args.model)
    history = agent.fit(env,
                        nb_steps=params.N_STEPS_TRAIN,
                        visualize=args.visualize,
                        verbose=1,
                        nb_max_episode_steps=env._max_episode_steps,
                        log_interval=params.LOG_INTERVAL)
    agent.save_weights(WEIGHTS_FILES, overwrite=True)
    save_plot_reward('DQN', params.ENV, history, args.model, params.PARAMS)

## Test ##
if not args.train:
Exemplo n.º 29
0
    def deep_q_learning():
        """Implementation of kreras-rl deep q learing."""
        env_name = 'neuron_poker-v0'
        stack = 100
        env = gym.make(env_name, num_of_players=5, initial_stacks=stack)

        np.random.seed(123)
        env.seed(123)

        env.add_player(
            EquityPlayer(name='equity/50/50',
                         min_call_equity=.5,
                         min_bet_equity=-.5))
        env.add_player(
            EquityPlayer(name='equity/50/80',
                         min_call_equity=.8,
                         min_bet_equity=-.8))
        env.add_player(
            EquityPlayer(name='equity/70/70',
                         min_call_equity=.7,
                         min_bet_equity=-.7))
        env.add_player(
            EquityPlayer(name='equity/20/30',
                         min_call_equity=.2,
                         min_bet_equity=-.3))
        env.add_player(RandomPlayer())
        env.add_player(PlayerShell(
            name='keras-rl',
            stack_size=stack))  # shell is used for callback to keras rl

        env.reset()

        nb_actions = len(env.action_space)

        # Next, we build a very simple model.
        from keras import Sequential
        from keras.optimizers import Adam
        from keras.layers import Dense, Dropout
        from rl.memory import SequentialMemory
        from rl.agents import DQNAgent
        from rl.policy import BoltzmannQPolicy

        model = Sequential()
        model.add(
            Dense(64, activation='relu', input_shape=env.observation_space))
        model.add(Dropout(0.2))
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(nb_actions, activation='linear'))
        print(model.summary())

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=50000, window_length=1)
        policy = BoltzmannQPolicy()
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=10,
                       target_model_update=1e-2,
                       policy=policy)
        dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        # Okay, now it's time to learn something! We visualize the training here for show, but this
        # slows down training quite a lot. You can always safely abort the training prematurely using
        # Ctrl + C.
        dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

        # After training is done, we save the final weights.
        dqn.save_weights('dqn_{}_weights.h5f'.format(env_name), overwrite=True)

        # Finally, evaluate our algorithm for 5 episodes.
        dqn.test(env, nb_episodes=5, visualize=True)
Exemplo n.º 30
0
    # print(model.summary())
    print(model.output._keras_shape)

    return model


if __name__ == '__main__':
    env = myTGym(episode_type='0', percent_goal_profit=2, percent_stop_loss=5)
    # s1, s2, s3 = env.reset()
    # state = aggregate_state(s1, s2, s3)

    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    model  = build_network()

    dqn = DQNAgent(model=model, nb_actions=2, memory=memory, nb_steps_warmup=10,
                   target_model_update=1e-2, policy=policy)

    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)

    # After training is done, we save the final weights.
    dqn.save_weights('dqn_{}_weights.h5f'.format('trading'), overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=True)
Exemplo n.º 31
0
class Player:
    """Mandatory class with the player methods"""
    def __init__(self, name='DQN', load_model=None, env=None):
        """Initiaization of an agent"""
        self.equity_alive = 0
        self.actions = []
        self.last_action_in_stage = ''
        self.temp_stack = []
        self.name = name
        self.autoplay = True

        self.dqn = None
        self.model = None
        self.env = env

        if load_model:
            self.load(load_model)

    def initiate_agent(self, env):
        """initiate a deep Q agent"""
        tf.compat.v1.disable_eager_execution()

        self.env = env

        nb_actions = self.env.action_space.n

        self.model = Sequential()
        self.model.add(
            Dense(512, activation='relu', input_shape=env.observation_space))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(nb_actions, activation='linear'))

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=memory_limit,
                                  window_length=window_length)
        policy = TrumpPolicy()

        nb_actions = env.action_space.n

        self.dqn = DQNAgent(model=self.model,
                            nb_actions=nb_actions,
                            memory=memory,
                            nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2,
                            policy=policy,
                            processor=CustomProcessor(),
                            batch_size=batch_size,
                            train_interval=train_interval,
                            enable_double_dqn=enable_double_dqn)
        self.dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    def start_step_policy(self, observation):
        """Custom policy for random decisions for warm up."""
        log.info("Random action")
        _ = observation
        action = self.env.action_space.sample()
        return action

    def train(self, env_name):
        """Train a model"""
        # initiate training loop
        timestr = time.strftime("%Y%m%d-%H%M%S") + "_" + str(env_name)
        tensorboard = TensorBoard(log_dir='./Graph/{}'.format(timestr),
                                  histogram_freq=0,
                                  write_graph=True,
                                  write_images=False)

        self.dqn.fit(self.env,
                     nb_max_start_steps=nb_max_start_steps,
                     nb_steps=nb_steps,
                     visualize=False,
                     verbose=2,
                     start_step_policy=self.start_step_policy,
                     callbacks=[tensorboard])

        # Save the architecture
        dqn_json = self.model.to_json()
        Path("dqn_results").mkdir(parents=True, exist_ok=True)
        with open("dqn_results/dqn_{}_json.json".format(env_name),
                  "w") as json_file:
            json.dump(dqn_json, json_file)

        # After training is done, we save the final weights.
        self.dqn.save_weights('dqn_results/dqn_{}_weights.h5'.format(env_name),
                              overwrite=True)

        # Finally, evaluate our algorithm for 5 episodes.
        self.dqn.test(self.env, nb_episodes=5, visualize=False)

    def load(self, env_name):
        """Load a model"""

        # Load the architecture
        with open('dqn_results/dqn_{}_json.json'.format(env_name),
                  'r') as architecture_json:
            dqn_json = json.load(architecture_json)

        self.model = model_from_json(dqn_json)
        self.model.load_weights(
            'dqn_results/dqn_{}_weights.h5'.format(env_name))

    def play(self, nb_episodes=5, render=False):
        """Let the agent play"""
        memory = SequentialMemory(limit=memory_limit,
                                  window_length=window_length)
        policy = TrumpPolicy()

        class CustomProcessor(Processor):  # pylint: disable=redefined-outer-name
            """The agent and the environment"""
            def process_state_batch(self, batch):
                """
                Given a state batch, I want to remove the second dimension, because it's
                useless and prevents me from feeding the tensor into my CNN
                """
                return np.squeeze(batch, axis=1)

            def process_info(self, info):
                processed_info = info['player_data']
                if 'stack' in processed_info:
                    processed_info = {'x': 1}
                return processed_info

        nb_actions = self.env.action_space.n

        self.dqn = DQNAgent(model=self.model,
                            nb_actions=nb_actions,
                            memory=memory,
                            nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2,
                            policy=policy,
                            processor=CustomProcessor(),
                            batch_size=batch_size,
                            train_interval=train_interval,
                            enable_double_dqn=enable_double_dqn)
        self.dqn.compile(Adam(lr=1e-3), metrics=['mae'])  # pylint: disable=no-member

        self.dqn.test(self.env, nb_episodes=nb_episodes, visualize=render)

    def action(self, action_space, observation, info):  # pylint: disable=no-self-use
        """Mandatory method that calculates the move based on the observation array and the action space."""
        _ = observation  # not using the observation for random decision
        _ = info

        this_player_action_space = {
            Action.FOLD, Action.CHECK, Action.CALL, Action.RAISE_POT,
            Action.RAISE_HALF_POT, Action.RAISE_2POT
        }
        _ = this_player_action_space.intersection(set(action_space))

        action = None
        return action
class DeepAgent:
    """
    This algorithm is trying to use a DQN agent that learns himself just given a gym.
    After quite some trouble with various error messages, this now at least runs and trains.
    It does not yet achieve good results.

    Best result: ???
    """
    def __init__(self, shape, initial_randomness: float, action_count: int):
        super().__init__()

        model = Sequential()
        model.add(Input(shape=shape))
        model.add(Conv2D(8, (3, 3), activation='relu', input_shape=shape))
        model.add(Conv2D(16, (3, 3), activation='relu', input_shape=shape))
        model.add(Conv2D(32, (3, 3), activation='relu', input_shape=shape))
        model.add(Flatten())
        model.add(Dense(64, activation='relu'))
        model.add(Dense(512, activation='relu'))
        model.add(Dense(action_count, activation='softmax'))

        print(model.summary())

        self.model = model

        self.callbacks = self.build_callbacks("msnake")

        self.processor = RemoveDimensionProcessor()

        self.memory = SequentialMemory(limit=50000, window_length=1)

        self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                           attr='eps',
                                           value_max=1.,
                                           value_min=.1,
                                           value_test=.05,
                                           nb_steps=1000)

        self.dqn = DQNAgent(model=self.model,
                            nb_actions=action_count,
                            memory=self.memory,
                            nb_steps_warmup=10,
                            target_model_update=1e-2,
                            policy=self.policy,
                            batch_size=1,
                            processor=self.processor)

        # https://github.com/keras-rl/keras-rl/issues/345
        Adam._name = "fix_bug"

        # Metrics: mae, mse, accuracy
        # LR: learning rate
        self.dqn.compile(Adam(lr=1e-3), metrics=['mse'])

        self.initial_randomness = initial_randomness

    def build_callbacks(self, env_name):
        callbacks = []

        checkpoint_weights_filename = 'dqn_' + env_name + '_weights_{step}.h5f'
        callbacks += [
            ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000)
        ]

        log_filename = 'dqn_{}_log.json'.format(env_name)
        callbacks += [FileLogger(log_filename, interval=100)]
        return callbacks