Exemplo n.º 1
0
def make_dqn_rl_agent(processor: Processor_56x5,
                      nbr_layers=2,
                      enable_dueling_network: bool = False,
                      enable_double_dqn: bool = True):
    """
    
    :param processor: 
    :param nbr_layers: 
    :param enable_dueling_network:
    :param enable_double_dqn:
    :return: 
    """

    model = processor.create_model(nbr_layers=nbr_layers)
    test_policy = GreedyQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)

    dqn_agent = DQNAgent(model=model,
                         nb_actions=NBR_TICHU_ACTIONS,
                         memory=memory,
                         nb_steps_warmup=100,
                         target_model_update=1e-2,
                         test_policy=test_policy,
                         processor=processor,
                         enable_dueling_network=enable_dueling_network,
                         enable_double_dqn=enable_double_dqn)
    dqn_agent.compile(Adam(lr=1e-3), metrics=['mae'])
    return dqn_agent
Exemplo n.º 2
0
    def __init__(self, env: gym.Env, logger=Logger()):
        nb_actions = env.action_space.shape[0]

        model = Sequential()
        model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions))
        model.add(Activation('linear'))

        policy = BoltzmannQPolicy()
        memory = SequentialMemory(limit=100000, window_length=1)
        agent = DQNAgent(model=model,
                         nb_actions=nb_actions,
                         memory=memory,
                         nb_steps_warmup=10,
                         target_model_update=1e-2,
                         policy=policy)
        agent.compile(Adam(lr=1e-3), metrics=['mae'])
        self.agent = agent
        self.env = env
        super().__init__(env, logger)
Exemplo n.º 3
0
def build_model(env, num_actions):
    input = Input(shape=(1, env.observation_space.shape[0]))
    x = Flatten()(input)
    x = Dense(128, activation='relu')(x)  #128
    x = Dense(64, activation='relu')(x)  #64
    x = Dense(32, activation='relu')(x)  #32
    output = Dense(num_actions, activation='linear')(x)
    model = Model(inputs=input, outputs=output)
    print(model.summary())

    memory = SequentialMemory(limit=50000, window_length=1)
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=10000)
    # policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=num_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    return dqn
def build_agent(model, actions):
    '''Build Agent'''
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr='eps',
        value_max=1.,
        value_min=.1,
        value_test=.2,
        nb_steps=10000
    )
    memory = SequentialMemory(
        limit=1000000,
        window_length=3
    )
    DQN_agent = DQNAgent(
        model=model,
        memory=memory,
        policy=policy,
        enable_dueling_network=True,
        dueling_type='avg',
        nb_actions=actions,
        nb_steps_warmup=1000
    )
    DQN_agent.compile(optimizer=Adam(lr=0.00025), metrics=['mae', 'accuracy'])
    return DQN_agent
def init_dqn(env, nb_actions):
    """ Initialize the DQN agent using the keras-rl package.

    :param env: the environment to be played, required to determine the input size
    :param nb_actions: number of actions
    :return: DQN Agent
    """
    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # compile agent
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=10,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.model_name = f"DQN"
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])
    return dqn
    def __init__(self, shape, action_count: int):
        super().__init__()

        inp = Input(shape=shape)
        flat = Flatten()(inp)

        # Activation: relu, sigmoid, ...
        hidden1 = Dense(256, activation='relu')(flat)
        hidden2 = Dense(64, activation='relu')(hidden1)
        hidden3 = Dense(16, activation='relu')(hidden2)
        output = Dense(action_count, activation='softmax')(hidden3)

        self.model = Model(inputs=inp, outputs=output)
        print(self.model.summary())

        self.memory = SequentialMemory(limit=50000,
                                       window_length=WINDOW_LENGTH)
        self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                           attr='eps',
                                           value_max=1.,
                                           value_min=.1,
                                           value_test=.05,
                                           nb_steps=1000)
        self.callbacks = self.build_callbacks("msnake")
        self.dqn = DQNAgent(model=self.model,
                            nb_actions=action_count,
                            memory=self.memory,
                            nb_steps_warmup=50,
                            target_model_update=1e-2,
                            policy=self.policy)

        Adam._name = "fix_bug"  # https://github.com/keras-rl/keras-rl/issues/345
        # Metrics: mae, mse, accuracy
        # LR: learning rate
        self.dqn.compile(Adam(lr=1e-5), metrics=['mse'])
Exemplo n.º 7
0
    def _build_dqn(nb_actions, nb_states):
        # build network
        model = Sequential()
        model.add(Flatten(input_shape=(1, nb_states)))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions, activation='linear'))

        # build agent
        memory = SequentialMemory(limit=10240, window_length=1)
        policy = BoltzmannQPolicy()
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=10,
                       enable_dueling_network=True,
                       dueling_type='avg',
                       target_model_update=1e-2,
                       policy=policy)
        dqn.compile(Adam(), metrics=['mae'])

        return dqn
def build_agent(model, nb_actions):
    """
    build an agent
    """
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr='eps',
        value_max=MAX_EPSILON,
        value_min=MIN_EPSILON,
        value_test=TEST_EPSILON,
        nb_steps=MAX_STEPS
    )
    memory = SequentialMemory(
        limit=MAX_STEPS,
        window_length=WINDOW_WIDTH
    )
    dqn = DQNAgent(
        model=model,
        memory=memory,
        policy=policy,
        enable_dueling_network=True,
        dueling_type='avg',
        nb_actions=nb_actions,
        nb_steps_warmup=WARMUP_STEPS
    )
    dqn.compile(Adam(learning_rate=LEARNING_RATE), metrics=['mae'])
    return dqn
Exemplo n.º 9
0
    def __init__(self, state_dim, action_space, epsilon, lr):
        self._model = self._get_model(state_dim, action_space)
        self.agent = DQNAgent(self._model,
                              policy=EpsGreedyQPolicy(epsilon),
                              test_policy=EpsGreedyQPolicy(eps=0.01))

        self.agent.compile(Adam(lr))
Exemplo n.º 10
0
    def initiate_agent(self, env):
        """initiate a deep Q agent"""
        tf.compat.v1.disable_eager_execution()
        self.env = env

        nb_actions = self.env.action_space.n

        self.model = Sequential()
        self.model.add(Dense(512, activation='relu', input_shape=env.observation_space))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(nb_actions, activation='linear'))

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=memory_limit, window_length=window_length)
        policy = TrumpPolicy()

        nb_actions = env.action_space.n

        self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2, policy=policy,
                            processor=CustomProcessor(),
                            batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn)
        self.dqn.compile(tf.keras.optimizers.Adam(lr=1e-3), metrics=['mae'])
Exemplo n.º 11
0
    def play(self, nb_episodes=5, render=False):
        """Let the agent play"""
        memory = SequentialMemory(limit=memory_limit, window_length=window_length)
        policy = TrumpPolicy()

        class CustomProcessor(Processor):
            """The agent and the environment"""

            def process_state_batch(self, batch):
                """
                Given a state batch, I want to remove the second dimension, because it's
                useless and prevents me from feeding the tensor into my CNN
                """
                return np.squeeze(batch, axis=1)

            def process_info(self, info):
                processed_info = info['player_data']
                if 'stack' in processed_info:
                    processed_info = {'x': 1}
                return processed_info

        nb_actions = self.env.action_space.n

        self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2, policy=policy,
                            processor=CustomProcessor(),
                            batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn)
        self.dqn.compile(tf.keras.optimizers.Adam(lr=1e-3), metrics=['mae'])  # pylint: disable=no-member

        self.dqn.test(self.env, nb_episodes=nb_episodes, visualize=render)
class DeepAgent:
    """
    This algorithm is trying to use a DQN agent that learns himself just given a gym.
    After quite some trouble with various error messages, this now at least runs and trains.
    It does not yet achieve good results.

    Best result: ???
    """
    def __init__(self, shape, action_count: int):
        super().__init__()

        inp = Input(shape=shape)
        flat = Flatten()(inp)

        # Activation: relu, sigmoid, ...
        hidden1 = Dense(256, activation='relu')(flat)
        hidden2 = Dense(64, activation='relu')(hidden1)
        hidden3 = Dense(16, activation='relu')(hidden2)
        output = Dense(action_count, activation='softmax')(hidden3)

        self.model = Model(inputs=inp, outputs=output)
        print(self.model.summary())

        self.memory = SequentialMemory(limit=50000,
                                       window_length=WINDOW_LENGTH)
        self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                           attr='eps',
                                           value_max=1.,
                                           value_min=.1,
                                           value_test=.05,
                                           nb_steps=1000)
        self.callbacks = self.build_callbacks("msnake")
        self.dqn = DQNAgent(model=self.model,
                            nb_actions=action_count,
                            memory=self.memory,
                            nb_steps_warmup=50,
                            target_model_update=1e-2,
                            policy=self.policy)

        Adam._name = "fix_bug"  # https://github.com/keras-rl/keras-rl/issues/345
        # Metrics: mae, mse, accuracy
        # LR: learning rate
        self.dqn.compile(Adam(lr=1e-5), metrics=['mse'])

    def build_callbacks(self, env_name):
        callbacks = []

        checkpoint_weights_filename = 'dqn_' + env_name + '_weights_{step}.h5f'
        callbacks += [
            ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000)
        ]

        log_filename = 'dqn_{}_log.json'.format(env_name)
        callbacks += [FileLogger(log_filename, interval=100)]
        return callbacks
Exemplo n.º 13
0
class QLearningAgent(Agent):
    def __init__(self, state_dim, action_space, epsilon, lr):
        self._model = self._get_model(state_dim, action_space)
        self.agent = DQNAgent(self._model,
                              policy=EpsGreedyQPolicy(epsilon),
                              test_policy=EpsGreedyQPolicy(eps=0.01))

        self.agent.compile(Adam(lr))

    def model_summary(self):
        print(self._model.summary())
Exemplo n.º 14
0
class DqnAgent(Agent):
    def __init__(self,
                 env: gym.Env,
                 memory=SequentialMemory(limit=50000, window_length=1),
                 logger=Logger(),
                 boxes_resolution=10,
                 nb_steps_warmup=20,
                 hidden_layers=[16, 16, 16],
                 policy=BoltzmannQPolicy(),
                 target_model_update=1e-2,
                 optimizer=Adam(lr=1e-3)):

        self.env = env

        if isinstance(boxes_resolution, int):
            boxes_resolution = (boxes_resolution, ) * len(
                env.action_space.shape)

        self.boxes_resolution = boxes_resolution
        self.nb_actions = np.zeros(boxes_resolution).size

        model = Sequential()
        model.add(Flatten(input_shape=(1, ) +
                          env.observation_space.shape))  # TODO check this
        for l in hidden_layers:
            model.add(Dense(l, activation='relu'))
        model.add(Dense(self.nb_actions,
                        activation='linear'))  # TODO move this to util file?

        self.model = model
        print("dqn model summary :{0}".format(model.summary()))

        self.dqn = DQNAgent(model=model,
                            nb_actions=self.nb_actions,
                            memory=memory,
                            nb_steps_warmup=nb_steps_warmup,
                            target_model_update=target_model_update,
                            policy=policy,
                            processor=DqnProcessor(self.boxes_resolution,
                                                   env.action_space.low,
                                                   env.action_space.high))
        self.dqn.compile(optimizer=optimizer, metrics=['mae'])
        super().__init__(env, logger)

    def act(self, state, explore):
        action = self.dqn.processor.process_action(self.dqn.forward(state))
        return action

    def train(self, nb_episodes=1000, verbose=2, visualize=True):
        self.dqn.fit(env=self.env,
                     nb_steps=nb_episodes,
                     verbose=verbose,
                     visualize=visualize)
Exemplo n.º 15
0
    def set_num_states(self, state_dimension: int, num_actions: int) -> None:
        model = self._build_model(state_dimension, num_actions)
        memory = SequentialMemory(limit=10000, window_length=1)
        self._internal_agent = DQNAgent(model=model,
                                        nb_actions=num_actions,
                                        memory=memory,
                                        nb_steps_warmup=1000,
                                        target_model_update=1000,
                                        gamma=0.99,
                                        delta_clip=1)

        self._internal_agent.compile(Adam(lr=0.0001), metrics=['mae'])
Exemplo n.º 16
0
def setupDQN(cfg, nb_actions, processor):
    image_in = Input(shape=cfg.input_shape, name='main_input')
    input_perm = Permute((2, 3, 1), input_shape=cfg.input_shape)(image_in)
    conv1 = Conv2D(32, (8, 8), activation="relu", strides=(4, 4),
                   name='conv1')(input_perm)
    conv2 = Conv2D(64, (4, 4), activation="relu", strides=(2, 2),
                   name='conv2')(conv1)
    conv3 = Conv2D(64, (3, 3), activation="relu", strides=(1, 1),
                   name='conv3')(conv2)
    conv_out = Flatten(name='flat_feat')(conv3)
    dense_out = Dense(512, activation='relu')(conv_out)
    q_out = Dense(nb_actions, activation='linear')(dense_out)
    model = Model(inputs=[image_in], outputs=[q_out])
    print(model.summary())
    # hstate_size = int(np.prod(conv3.shape[1:]))

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=cfg.memory_limit,
                              window_length=cfg.WINDOW_LENGTH)

    # Select a policy. We use eps-greedy action selection, which means that a random action is selected
    # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
    # the agent initially explores the environment (high eps) and then gradually sticks to what it knows
    # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
    # so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=cfg.nb_steps_annealed_policy)

    # The trade-off between exploration and exploitation is difficult and an on-going research topic.
    # If you want, you can experiment with the parameters or use a different policy. Another popular one
    # is Boltzmann-style exploration:
    # policy = BoltzmannQPolicy(tau=1.)
    # Feel free to give it a try!

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=cfg.nb_steps_warmup_dqn_agent,
                   gamma=.99,
                   target_model_update=cfg.target_model_update_dqn_agent,
                   train_interval=4,
                   delta_clip=1.)
    dqn.compile(Adam(lr=.00025), metrics=['mae'])

    return dqn
Exemplo n.º 17
0
    def initiate_agent(self, env):
        """initiate a deep Q agent"""
        tf.compat.v1.disable_eager_execution()
        self.env = env

        nb_actions = self.env.action_space.n

        self.model = Sequential()
        self.model.add(
            Dense(512, activation='relu', input_shape=env.observation_space))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(nb_actions, activation='linear'))

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=memory_limit,
                                  window_length=window_length)
        policy = TrumpPolicy()

        class CustomProcessor(Processor):
            """The agent and the environment"""
            def process_state_batch(self, batch):
                """
                Given a state batch, I want to remove the second dimension, because it's
                useless and prevents me from feeding the tensor into my CNN
                """
                return np.squeeze(batch, axis=1)

            def process_info(self, info):
                processed_info = info['player_data']
                if 'stack' in processed_info:
                    processed_info = {'x': 1}
                return processed_info

        nb_actions = env.action_space.n

        self.dqn = DQNAgent(model=self.model,
                            nb_actions=nb_actions,
                            memory=memory,
                            nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2,
                            policy=policy,
                            processor=CustomProcessor(),
                            batch_size=batch_size,
                            train_interval=train_interval,
                            enable_double_dqn=enable_double_dqn)
        self.dqn.compile(tf.optimizers.Adam(lr=1e-3), metrics=['mae'])
Exemplo n.º 18
0
def run():
    env = game_env.MeleeEnv()
    nb_actions = env.action_space.shape[0]
    actor = build_network(env, nb_actions)
    critic, action_input = build_critic(env, nb_actions)
    memory = SequentialMemory(limit=25000)
    #random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3,  size=nb_actions)
    agent = DQNAgent(
        batch_size=1000,
        nb_actions=nb_actions,
        model=
        actor,  #processor=Process(), #window_length=4,#critic_action_input=action_input,
        memory=memory,
        nb_steps_warmup=100
    )  # nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
    #random_process=random_process, gamma=.95, target_model_update=1e-1)#,
    ##delta_range=(-10., 10.))
    agent.compile(RMSprop(lr=.0005), metrics=['mae'])

    agent.fit(env,
              nb_steps=100000,
              visualize=True,
              verbose=1,
              nb_max_start_steps=100,
              start_step_policy=lambda x: np.random.randint(nb_actions))
    # After training is done, we save the final weights.
    agent.save_weights('ddpg_{}_weights.h5f'.format(
        str(random.randrange(0, 100000))),
                       overwrite=True)
class DeepAgentConvolution:
    """
    This algorithm is trying to use a DQN agent that learns himself just given a gym.
    At the moment, it cannot successfully work with convolution:
    Error when checking input: expected input_1 to have 4 dimensions, but got array with shape (1, 1, 20, 10, 3)

    Best result: ???
    """

    def __init__(self, shape, action_count: int):
        super().__init__()

        inp = Input(shape=shape)

        # Convolution part (image recognition / feature extraction)
        conv = Conv2D(16, kernel_size=2, padding="same")(inp)
        conv = Conv2D(8, kernel_size=2)(conv)

        # Classification (decision making)
        flat = Flatten()(conv)
        # Activation: relu, sigmoid, ...
        hidden = Dense(256, activation='relu')(flat)
        hidden = Dense(64, activation='relu')(hidden)
        hidden = Dense(16, activation='relu')(hidden)
        output = Dense(action_count, activation='softmax')(hidden)

        self.model = Model(inputs=inp, outputs=output)
        print(self.model.summary())

        self.memory = SequentialMemory(limit=50000, window_length=WINDOW_LENGTH)
        self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                                           nb_steps=1000)
        self.callbacks = self.build_callbacks("msnake")
        self.dqn = DQNAgent(model=self.model, nb_actions=action_count, memory=self.memory,  nb_steps_warmup=20,
                            target_model_update=1e-2, policy=self.policy)

        Adam._name = "fix_bug"  # https://github.com/keras-rl/keras-rl/issues/345
        # Metrics: mae, mse, accuracy
        # LR: learning rate
        self.dqn.compile(Adam(lr=1e-5), metrics=['mse'])

    def build_callbacks(self, env_name):
        callbacks = []

        checkpoint_weights_filename = 'dqn_' + env_name + '_weights_{step}.h5f'
        callbacks += [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000)]

        log_filename = 'dqn_{}_log.json'.format(env_name)
        callbacks += [FileLogger(log_filename, interval=100)]
        return callbacks
Exemplo n.º 20
0
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.05,
                                  value_test=.05,
                                  nb_steps=150_000)
    #policy = EpsGreedyQPolicy(eps=.1)
    #policy = GreedyQPolicy()

    #policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=30000, window_length=1)
    dqn = DQNAgent(model=model,
                   memory=memory,
                   policy=policy,
                   processor=processor,
                   nb_actions=actions,
                   nb_steps_warmup=100,
                   target_model_update=1e-3,
                   enable_double_dqn=True,
                   enable_dueling_network=True,
                   dueling_type='avg',
                   batch_size=8,
                   gamma=.95)
    return dqn
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., 
                                  value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                   enable_dueling_network=True, dueling_type='avg', nb_actions=actions, nb_steps_warmup=1000)
    return dqn
    def __init__(self, shape, initial_randomness: float, action_count: int):
        super().__init__()

        model = Sequential()
        model.add(Input(shape=shape))
        model.add(Conv2D(8, (3, 3), activation='relu', input_shape=shape))
        model.add(Conv2D(16, (3, 3), activation='relu', input_shape=shape))
        model.add(Conv2D(32, (3, 3), activation='relu', input_shape=shape))
        model.add(Flatten())
        model.add(Dense(64, activation='relu'))
        model.add(Dense(512, activation='relu'))
        model.add(Dense(action_count, activation='softmax'))

        print(model.summary())

        self.model = model

        self.callbacks = self.build_callbacks("msnake")

        self.processor = RemoveDimensionProcessor()

        self.memory = SequentialMemory(limit=50000, window_length=1)

        self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                           attr='eps',
                                           value_max=1.,
                                           value_min=.1,
                                           value_test=.05,
                                           nb_steps=1000)

        self.dqn = DQNAgent(model=self.model,
                            nb_actions=action_count,
                            memory=self.memory,
                            nb_steps_warmup=10,
                            target_model_update=1e-2,
                            policy=self.policy,
                            batch_size=1,
                            processor=self.processor)

        # https://github.com/keras-rl/keras-rl/issues/345
        Adam._name = "fix_bug"

        # Metrics: mae, mse, accuracy
        # LR: learning rate
        self.dqn.compile(Adam(lr=1e-3), metrics=['mse'])

        self.initial_randomness = initial_randomness
Exemplo n.º 23
0
    def build_agent(self, mem_file=None, w_file=None):
        #Create a dummy env to get size of input/output.
        #Makes it simpler if we ever choose to update env shapes.
        env = TradingEnv([], "", [])
        np.random.seed(314)
        env.seed(314)

        nb_actions = env.action_space.n
        obs_dim = env.observation_space.shape[0]
        model = Sequential()
        model.add(
            LSTM(5, input_shape=(7, 4),
                 return_sequences=True))  # 4 features + 1 bias term. 5 neurons
        model.add(Activation('tanh'))
        model.add(LSTM(4))
        model.add(Activation('tanh'))
        model.add(Dropout(0.2))
        model.add(Dense(4))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions))
        model.add(Activation('linear'))  #Best activation for BoltzmanPolicy

        #policy = EpsGreedyQPolicy(eps=EPS_VAL) #Off policy
        policy = BoltzmannQPolicy()  #Off-policy
        test_policy = MaxBoltzmannQPolicy()  #On-policy
        memory = None
        if mem_file is None:
            memory = SequentialMemory(
                limit=50000,
                window_length=7)  ## returns observations of len (7,)
        else:
            (memory, memory.actions, memory.rewards, memory.terminals,
             memory.observations) = pickle.load(open(mem_file, "rb"))

        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       gamma=GAMMA_VAL,
                       nb_steps_warmup=100,
                       policy=policy,
                       test_policy=test_policy)
        dqn.compile("adam", metrics=['mse'])

        if w_file is not None:
            model.load_weights(w_file)

        return dqn, env, memory
Exemplo n.º 24
0
def test_double_dqn():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1, )))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    memory = SequentialMemory(limit=1000, window_length=1)
    policy = EpsGreedyQPolicy(eps=.1)
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=50,
                   target_model_update=1e-1,
                   policy=policy,
                   enable_double_dqn=True)
    dqn.compile(Adam(lr=1e-3))

    dqn.fit(env, nb_steps=2000, visualize=False, verbose=0)
    policy.eps = 0.
    h = dqn.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
Exemplo n.º 25
0
    def get_agent(self):
        agent = DQNAgent(model=self.model,
                         policy=self.policy,
                         nb_steps_warmup=10,
                         target_model_update=1e-2,
                         nb_actions=self.action,
                         memory=self.memory,
                         enable_double_dnq=False)

        return agent
Exemplo n.º 26
0
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model,
                   memory=memory, 
                   policy=policy, 
                   nb_actions=actions, 
                   nb_steps_warmup=10, 
                   target_model_update=1e-2)
    return dqn
Exemplo n.º 27
0
def train(learn_rate, model_update_interval, steps):
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=50000,
                   target_model_update=model_update_interval,
                   policy=policy,
                   gamma=.99,
                   train_interval=4)
    dqn.compile(Adam(lr=learn_rate), metrics=['mae'])
    dqn.fit(env, nb_steps=steps, verbose=2, visualize=VISUALIZE)
    dqn.save_weights(SAVEFILE_FOLDER + "/dqn_pong_params.h5f", overwrite=True)
def build_agent(model, actions):
    """
    Builds an Epsilon Greedy Deep Q Learning Agent
    """
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1, value_min=0.1, value_test=0.2,
                                  nb_steps=10000)
    memory = SequentialMemory(limit=2000, window_length=3)

    dqn = DQNAgent(model, policy, memory=memory, enable_dueling_network=True, dueling_type='avg', nb_actions=actions,
                   nb_steps_warmup=1000)
    return dqn
    def agent(self):
        nb_actions = self.env.action_space.n
        obs_dim = self.env.observation_space.shape
        model = Sequential()
        model.add(Flatten(input_shape=(1, obs_dim)))
        model.add(Dense(nb_actions, activation='linear'))
        print(model.summary())

        memory = SequentialMemory(limit=50000, window_length=1)
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=256,
                       enable_dueling_network=True,
                       target_model_update=1e-2,
                       policy=InformedBoltzmannGumbelQPolicy(self.env),
                       test_policy=InformedGreedyQPolicy(self.env),
                       batch_size=128,
                       train_interval=128)
        dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        if self.initial_weights_file is not None:
            dqn.load_weights(self.initial_weights_file)
            self.train_episodes = 0

        return dqn
    def agent(self):
        nb_actions = self.env.action_space.n
        model = self.build()
        print(model.summary())

        memory = SequentialMemory(limit=50000, window_length=1)
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=32,
                       enable_dueling_network=True,
                       target_model_update=1e-2,
                       policy=InformedBoltzmannGumbelQPolicy(self.env),
                       test_policy=InformedGreedyQPolicy(self.env),
                       batch_size=32,
                       train_interval=32)
        dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        if self.initial_weights_file is not None:
            try:
                dqn.load_weights(self.initial_weights_file)
            except:
                # just skip loading
                pass

        return dqn
Exemplo n.º 31
0
def test_duel_dqn():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1,)))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))

    memory = SequentialMemory(limit=1000, window_length=1)
    policy = EpsGreedyQPolicy(eps=.1)
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50,
                   target_model_update=1e-1, policy=policy, enable_double_dqn=False, enable_dueling_network=True)
    dqn.compile(Adam(lr=1e-3))

    dqn.fit(env, nb_steps=2000, visualize=False, verbose=0)
    policy.eps = 0.
    h = dqn.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
Exemplo n.º 32
0
    # print(model.summary())
    print(model.output._keras_shape)

    return model


if __name__ == '__main__':
    env = myTGym(episode_type='0', percent_goal_profit=2, percent_stop_loss=5)
    # s1, s2, s3 = env.reset()
    # state = aggregate_state(s1, s2, s3)

    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    model  = build_network()

    dqn = DQNAgent(model=model, nb_actions=2, memory=memory, nb_steps_warmup=10,
                   target_model_update=1e-2, policy=policy)

    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)

    # After training is done, we save the final weights.
    dqn.save_weights('dqn_{}_weights.h5f'.format('trading'), overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=True)