示例#1
0
    def __init__(self, map_env):
        self.num_action = 6  #  (left, right, stay_turn, speed up, speed down, stay_speed),  turn 10 degrees
        self.map_env = map_env
        self.game = self.map_env.parent

        # Building the AI
        self.cnn = CNN(self.num_action)
        self.softmaxBody = SoftmaxBosy(T=1.0)
        self.ai = AI(brain=self.cnn, body=self.softmaxBody)

        # Setting up Experience Replay
        self.n_steps = experience_replay.NStepProgress(env=self.game,
                                                       ai=self.ai,
                                                       n_step=10)
        self.mem = experience_replay.ReplayMemory(n_steps=self.n_steps,
                                                  capacity=10000)

        # movinfg average recorder of 100
        self.ma = MA(100)

        # Training AI
        self.epoch = 1
        self.loss = nn.MSELoss()

        self.ai.brain.load()
        self.pause = True
示例#2
0
 def init_model(self,
                config: Config,
                env,
                checkpoint: AlienGymCheckpoint = None) -> AlienGymAI:
     image_size: ImageSize = ImageSize.from_str(config.image_size)
     cnn = CNN(env.action_space.n,
               image_w=image_size.w,
               image_h=image_size.h)
     cnn.to(self.device)
     if checkpoint is not None:
         cnn.load_state_dict(checkpoint.model_state_dict)
         cnn.eval()
     body = SoftmaxBody(config.softmax_temp)
     body.to(self.device)
     optimizer = optim.Adam(cnn.parameters(), lr=config.optimizer_lr)
     if checkpoint is not None:
         optimizer.load_state_dict(checkpoint.optimizer_state_dict)
     ai = AI(brain=cnn, body=body, device=self.device)
     n_steps = experience_replay.NStepProgress(env=env,
                                               ai=ai,
                                               n_step=config.n_step)
     memory = experience_replay.ReplayMemory(
         n_steps=n_steps, capacity=config.memory_capacity)
     return AlienGymAI(cnn=cnn,
                       ai=ai,
                       loss=nn.MSELoss(),
                       optimizer=optimizer,
                       n_step=n_steps,
                       replay_memory=memory)
doom_env = gym.wrappers.Monitor(doom_env, 'videos', force=True)

#Getting number of actions from doom_enviroment
number_actions = doom_env.action_space.n

#Building an AI
#Creating an object of our CNN class
cnn = CNN(number_actions)
#Creating an object of our SoftmaxBoddy class and inputing temperature
softmax_body = SoftmaxBody(T=1.0)
#Creating an object of our AI class and inputing the brain and body
ai = AI(cnn, softmax_body)

#Setting up Experiance Replay
#10 step learning with a capacity of 10,000
n_steps = experience_replay.NStepProgress(env=doom_env, ai=ai, n_step=10)
#Replay memory, create mini batches of 10 steps from 10,000 capacity
memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)


#Implementing Elgibility Trace
#n-step Q-learning (Not Asynchronous because we only have one agent)
#AKA sarsa?
#Training on batches
def eligibility_trace(batch):
    gamma = 0.99
    #Prediction
    inputs = []
    #Target
    targets = []
示例#4
0

# Part 2 - Training the AI with Deep Convolutional Q-Learning

# Getting the Doom environment
number_actions = 2925

# Building an AI
#cnn = CNN(2925).to(device)
cnn = torch.load("D:\\envPython\\OsuIA\\training\\brain-30.ty").to(device)
cnn.eval()
softmax_body = SoftmaxBody(T=1.0)
ai = AI(brain=cnn, body=softmax_body)

# Setting up Experience Replay
n_steps = experience_replay.NStepProgress(ai=ai, n_step=10)
memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=20000)


# Implementing Eligibility Trace
def eligibility_trace(batch):
    gamma = 0.99
    inputs = []
    targets = []
    for series in batch:
        input = Variable(
            torch.from_numpy(
                np.array([series[0].state, series[-1].state],
                         dtype=np.float32)))
        output = cnn(input)
        cumul_reward = 0.0 if series[-1].done else output[1].data.max()
示例#5
0
# train deep q conv
doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(
    ToDiscrete("minimal")(gym.make("ppaquette/DoomDefendLine-v0"))),
                                               height=80,
                                               width=80,
                                               grayscale=True)
doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True)
number_actions = doom_env.action_space.n

cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T=1.0)
ai = AI(brain=cnn, body=softmax_body)

# setting up exp Replay
n_steps = experience_replay.NStepProgress(doom_env, ai, 10)
memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)

#eligibility retrace ...  n-step Q-Learning


def eligibility_trace(batch):
    gamma = 0.99
    inputs = []
    targets = []
    for series in batch:
        input = Variable(
            torch.from_numpy(
                np.array([series[0].state, series[-1].state],
                         dtype=np.float32)))
        output = cnn(input)
示例#6
0
文件: ai.py 项目: imdiptanu/doom-bot
        actions = self.body(output)
        return actions.data.numpy()


# Getting the Doom environment
doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width = 80, height = 80, grayscale = True)
doom_env = gym.wrappers.Monitor(doom_env, "videos", force = True)
number_actions = doom_env.action_space.n

# Building an Model
cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T = 1.0)
Model = Model(brModeln = cnn, body = softmax_body)

# Setting up Experience Replay
n_steps = experience_replay.NStepProgress(env = doom_env, Model = Model, n_step = 10)
memory = experience_replay.ReplayMemory(n_steps = n_steps, capacity = 10000)
    
# Implementing Eligibility Trace
def eligibility_trace(batch):
    gamma = 0.99
    inputs = []
    targets = []
    for series in batch:
        input = Variable(torch.from_numpy(np.array([series[0].state, series[-1].state], dtype = np.float32)))
        output = cnn(input)
        cumul_reward = 0.0 if series[-1].done else output[1].data.max()
        for step in reversed(series[:-1]):
            cumul_reward = step.reward + gamma * cumul_reward
        state = series[0].state
        target = output[0].data
# Getting the Warrior environment
warrior_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(
    ToDiscrete("minimal")(gym.make("ppaquette/WarriorCorridor-v0"))),
                                                  width=80,
                                                  height=80,
                                                  grayscale=True)
warrior_env = gym.wrappers.Monitor(warrior_env, "videos", force=True)
number_actions = warrior_env.action_space.n

# Building an AI
cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T=1.0)
ai = AI(brain=cnn, body=softmax_body)

# Setting up Experience Replay
n_steps = experience_replay.NStepProgress(env=warrior_env, ai=ai, n_step=10)
memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)


# Implementing Eligibility Trace
def eligibility_trace(batch):
    gamma = 0.99
    inputs = []
    targets = []
    for series in batch:
        input = Variable(
            torch.from_numpy(
                np.array([series[0].state, series[-1].state],
                         dtype=np.float32)))
        output = cnn(input)
        cumul_reward = 0.0 if series[-1].done else output[1].data.max()
示例#8
0
# Getting the Doom environment
doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(
    ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))),
                                               width=80,
                                               height=80,
                                               grayscale=True)
doom_env = gym.wrappers.Monitor(doom_env, "videos", force=True)
number_actions = doom_env.action_space.n  #N is the number of actions that we can take in this environment

# building an AI
cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T=1.0)
ai = AI(brain=cnn, body=softmax_body)

# setting up the Experience Replay
n_steps = experience_replay.NStepProgress(
    env=doom_env, ai=ai, n_step=10)  #learning is happening every 10 steps
memory = experience_replay.ReplayMemory(n_steps=n_steps, capacity=10000)


#n step Q learning is almost like A3C
def eligibility_trace(batch):
    gamma = 0.99
    inputs = []
    targets = []
    for series in batch:
        input = Variable(
            torch.from_numpy(
                np.array([series[0].state, series[-1].state],
                         dtype=np.float32)))
        output = cnn(input)
        cumul_reward = 0.0 if series[-1].done else output[1].data.max()