Пример #1
0
    def __init__(self, map_env):
        self.num_action = 6  #  (left, right, stay_turn, speed up, speed down, stay_speed),  turn 10 degrees
        self.map_env = map_env
        self.game = self.map_env.parent

        # Building the AI
        self.cnn = CNN(self.num_action)
        self.softmaxBody = SoftmaxBosy(T=1.0)
        self.ai = AI(brain=self.cnn, body=self.softmaxBody)

        # Setting up Experience Replay
        self.n_steps = experience_replay.NStepProgress(env=self.game,
                                                       ai=self.ai,
                                                       n_step=10)
        self.mem = experience_replay.ReplayMemory(n_steps=self.n_steps,
                                                  capacity=10000)

        # movinfg average recorder of 100
        self.ma = MA(100)

        # Training AI
        self.epoch = 1
        self.loss = nn.MSELoss()

        self.ai.brain.load()
        self.pause = True
Пример #2
0
 def init_model(self,
                config: Config,
                env,
                checkpoint: AlienGymCheckpoint = None) -> AlienGymAI:
     image_size: ImageSize = ImageSize.from_str(config.image_size)
     cnn = CNN(env.action_space.n,
               image_w=image_size.w,
               image_h=image_size.h)
     cnn.to(self.device)
     if checkpoint is not None:
         cnn.load_state_dict(checkpoint.model_state_dict)
         cnn.eval()
     body = SoftmaxBody(config.softmax_temp)
     body.to(self.device)
     optimizer = optim.Adam(cnn.parameters(), lr=config.optimizer_lr)
     if checkpoint is not None:
         optimizer.load_state_dict(checkpoint.optimizer_state_dict)
     ai = AI(brain=cnn, body=body, device=self.device)
     n_steps = experience_replay.NStepProgress(env=env,
                                               ai=ai,
                                               n_step=config.n_step)
     memory = experience_replay.ReplayMemory(
         n_steps=n_steps, capacity=config.memory_capacity)
     return AlienGymAI(cnn=cnn,
                       ai=ai,
                       loss=nn.MSELoss(),
                       optimizer=optimizer,
                       n_step=n_steps,
                       replay_memory=memory)
#Getting number of actions from doom_enviroment
number_actions = doom_env.action_space.n

#Building an AI
#Creating an object of our CNN class
cnn = CNN(number_actions)
#Creating an object of our SoftmaxBoddy class and inputing temperature
softmax_body = SoftmaxBody(T=1.0)
#Creating an object of our AI class and inputing the brain and body
ai = AI(cnn, softmax_body)

#Setting up Experiance Replay
#10 step learning with a capacity of 10,000
n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai, n_step=10)
#Replay memory, create mini batches of 10 steps from 10,000 capacity
memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)


#Implementing Elgibility Trace
#n-step Q-learning (Not Asynchronous because we only have one agent)
#AKA sarsa?
#Training on batches
def eligibility_trace(batch):
    gamma = 0.99
    #Prediction
    inputs = []
    #Target
    targets = []

    #Going through the batch
    for series in batch:
Пример #4
0
    "LEARN_FREQ" : 5,
    "TRAIN_EPISODE" : 2000 # 训练的总episode数
}

if __name__ == "__main__":
    writer = SummaryWriter()


    env_name = "CartPole-v0"
    env = env.ContinuousCartPoleEnv()

    # env_name = "Pendulum-v0"
    # env = NormalizedActions(gym.make("Pendulum-v0"))

    # env_name = "Quadrotor"
    # env = make_env(env_name, task="hovering_control")

    logging.basicConfig(filename="{}.log".format(env_name))
    # print("DQN trained on {}".format(env_name))
    # logging.warning("DQN trained on {}".format(env_name))
    # print(opt)
    # logging.warning(opt)
    act_dim = env.action_space.shape[0]
    obs_dim = env.observation_space.shape[0]
    rpm = experience_replay.ReplayMemory(opt["MEMORY_SIZE"])
    agent = agent.DDPG_agent(obs_dim = obs_dim, act_dim = act_dim, actor_lr = opt["ACTOR_LR"], critic_lr = opt["CRITIC_LR"], tau = opt["TAU"], gamma = opt["GAMMA"])
    # train(env, env_name, agent, opt["TRAIN_EPISODE"], rpm)
    agent.load("CartPole-v0.pth")
    evaluate(10, env, agent, render=True)

    def __init__(self,
                 available_actions_count,
                 learning_rate=0.00025,
                 discount_factor=0.99,
                 epochs=20,
                 hidden_nodes=4608,
                 conv1_filters=32,
                 conv2_filters=64,
                 learning_steps_per_epoch=2000,
                 replay_memory_size=10000,
                 batch_size=64,
                 test_episodes_per_epoch=2,
                 frame_repeat=12,
                 update_every=4,
                 p_decay=0.95,
                 e_start=1,
                 reward_exploration=False,
                 reward_shooting=False,
                 resolution=(30, 45),
                 sequence_length=10,
                 observation_history=4,
                 death_match=False,
                 model_loadfile="/tmp/model.ckpt",
                 model_savefile="/tmp/model.ckpt",
                 start_from=0,
                 save_model=True,
                 load_model=False):

        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epochs = epochs
        self.learning_steps_per_epoch = learning_steps_per_epoch
        self.replay_memory_size = replay_memory_size
        self.batch_size = batch_size
        self.test_episodes_per_epoch = test_episodes_per_epoch
        self.frame_repeat = frame_repeat
        self.p_decay = p_decay
        self.e_start = e_start
        self.resolution = resolution
        self.available_actions_count = available_actions_count
        self.model_savefile = model_savefile
        self.save_model = save_model
        self.load_model = load_model
        self.death_match = death_match
        self.reward_exploration = reward_exploration
        self.sequence_length = sequence_length
        self.observation_history = observation_history
        self.update_every = update_every
        self.start_from = start_from
        self.model_loadfile = model_loadfile
        self.reward_shooting = reward_shooting

        # Positions traversed during an episode
        self.positions = []

        # Create replay memory which will store the transitions
        print("Creating replay memory")
        self.memory = er.ReplayMemory(capacity=replay_memory_size,
                                      resolution=resolution)

        # Start TF session
        print("Starting session")
        self.session = tf.Session()

        print("Creating model")
        # Create the input variables
        s1_ = tf.placeholder(tf.float32, [None] + list(self.resolution) + [1],
                             name="State")
        a_ = tf.placeholder(tf.int32, [None], name="Action")
        target_q_ = tf.placeholder(tf.float32, [None, available_actions_count],
                                   name="TargetQ")

        # Add 2 convolutional layers with ReLu activation
        conv1 = tf.contrib.layers.convolution2d(
            s1_,
            num_outputs=conv1_filters,
            kernel_size=[6, 6],
            stride=[3, 3],
            activation_fn=tf.nn.relu,
            weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
            biases_initializer=tf.constant_initializer(0.1))
        conv2 = tf.contrib.layers.convolution2d(
            conv1,
            num_outputs=conv2_filters,
            kernel_size=[3, 3],
            stride=[2, 2],
            activation_fn=tf.nn.relu,
            weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
            biases_initializer=tf.constant_initializer(0.1))
        conv2_flat = tf.contrib.layers.flatten(conv2)

        #conv2_flat = tf.contrib.layers.DropoutLayer(conv2_flat, keep=0.5, name='dropout')

        fc1 = tf.contrib.layers.fully_connected(
            conv2_flat,
            num_outputs=hidden_nodes,
            activation_fn=tf.nn.relu,
            weights_initializer=tf.contrib.layers.xavier_initializer(),
            biases_initializer=tf.constant_initializer(0.1))

        #fc1 = tf.contrib.layers.DropoutLayer(fc1, keep=0.5, name='dropout')

        #gru = tf.tensorlayer.RNNLayer(fc1, cell_fn=tf.nn.rnn_cell.GRUCell, n_hidden=128, n_steps=1, return_seq_2d=False)

        #gru = tf.contrib.layers.DropoutLayer(gru, keep=0.5, name='dropout')

        q = tf.contrib.layers.fully_connected(
            fc1,
            num_outputs=self.available_actions_count,
            activation_fn=None,
            weights_initializer=tf.contrib.layers.xavier_initializer(),
            biases_initializer=tf.constant_initializer(0.1))
        best_a = tf.argmax(q, 1)

        loss = tf.contrib.losses.mean_squared_error(q, target_q_)

        optimizer = tf.train.RMSPropOptimizer(self.learning_rate)
        # Update the parameters according to the computed gradient using RMSProp.
        train_step = optimizer.minimize(loss)

        def function_learn(s1, target_q):
            feed_dict = {s1_: s1, target_q_: target_q}
            l, _ = self.session.run([loss, train_step], feed_dict=feed_dict)
            return l

        def function_get_q_values(state):
            return self.session.run(q, feed_dict={s1_: state})

        def function_get_best_action(state):
            return self.session.run(best_a, feed_dict={s1_: state})

        def function_simple_get_best_action(state):
            return function_get_best_action(
                state.reshape([1, self.resolution[0], self.resolution[1],
                               1]))[0]

        self.fn_learn = function_learn
        self.fn_get_q_values = function_get_q_values
        self.fn_get_best_action = function_simple_get_best_action

        print("Model created")