Пример #1
0
    def __init__(self):
        super().__init__()

        self.util = Util()

        self.net = FullyConnected(128, 10)
        self.net.float()

        if torch.cuda.is_available():
            self.device = torch.device('cuda')
            self.net.cuda()
            #self.net.load_state_dict(torch.load('../res/models/transfer.pth'))
        else:
            self.device = torch.device('cpu')
            #self.net.load_state_dict(torch.load('../res/models/transfer.pth', map_location=('cpu')))

        print('device: ' + str(self.device) + '\n')

        # Create a gym environment (game environment)
        self.env = gym.make('Breakout-ram-v0')
        self.env.frameskip = 0

        self.explorationRate = 0.05

        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(self.net.parameters())

        self.replayMemory = []
        self.explorationRate = 1.0
        self.decayRate = 0.001
Пример #2
0
    def __init__(self, load=True, save=True, saveFile=''):

        self.load = load
        self.save = save

        # object for utiliy functions
        self.util = Util()

        # load correct network weights
        if saveFile == '':
            self.saveFile = '../res/models/supervised.pth'
        else:
            self.saveFile = '../res/models/' + saveFile + '.pth'
        
        # Create network and load weights
        self.net = FullyConnected(128, 10)
        self.net = self.net.float()
        if self.load:
            self.net.load_state_dict(torch.load(self.saveFile))

        # Load training and testing data
        self.trainingData = self.util.loadTrainingData('../res/training data/evolvedObservation.txt')
        self.testingData = self.util.loadTestingData('../res/training data/evolvedAction.txt')

        # Training parameters
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.net.parameters())
    def __init__(self,
                 agent='supervised',
                 render=False,
                 debug=True,
                 record=False):

        self.util = Util()

        # Create a gym environment (game environment)
        self.env = gym.make('Breakout-ram-v0')
        self.env.frameskip = 0

        # Create an agent for the simulation
        if agent == 'supervised':
            self.agent = SupervisedAgent()
        elif agent == 'evolvedreinforced':
            self.agent = EvolvedReinforcedAgent()
        else:
            self.agent = TrivialAgent()

        # Initialise other variables
        self.record = record
        self.render = render
        self.debug = debug
        self.agentType = type(self.agent).__name__
Пример #4
0
    def __init__(self, save=False, elite=False, selfLearn=False):
        super().__init__()

        self.util = Util()

        self.net = FullyConnected(128, 10)
        self.net.float()

        self.save = save

        if elite:
            print('elite')
            self.ramPath = '../res/training data/ram100.txt'
            self.actionPath = '../res/training data/action100.txt'
        else:
            self.ramPath = '../res/training data/ram.txt'
            self.actionPath = '../res/training data/action.txt'

        if selfLearn:
            self.weightPath = '../res/models/transferSelfLearn.pth'
        else:
            self.weightPath = '../res/models/transfer.pth'

        if torch.cuda.is_available():
            self.device = torch.device('cuda')
            self.net.cuda()
            self.net.load_state_dict(torch.load(self.weightPath))
        else:
            self.device = torch.device('cpu')
            self.net.load_state_dict(
                torch.load(self.weightPath, map_location=('cpu')))

        print('device: ' + str(self.device) + '\n')

        # Create a gym environment (game environment)
        self.env = gym.make('Breakout-ram-v0')
        self.env.frameskip = 0

        self.trainingData = self.util.loadTrainingData(self.ramPath)
        self.testingData = self.util.loadTestingData(self.actionPath)

        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.net.parameters())

        # Create supervised agent
        self.supervisedAgent = SupervisedAgent()
Пример #5
0
class DQNAgent():
    def __init__(self):
        super().__init__()

        self.util = Util()

        self.net = FullyConnected(128, 10)
        self.net.float()

        if torch.cuda.is_available():
            self.device = torch.device('cuda')
            self.net.cuda()
            #self.net.load_state_dict(torch.load('../res/models/transfer.pth'))
        else:
            self.device = torch.device('cpu')
            #self.net.load_state_dict(torch.load('../res/models/transfer.pth', map_location=('cpu')))

        print('device: ' + str(self.device) + '\n')

        # Create a gym environment (game environment)
        self.env = gym.make('Breakout-ram-v0')
        self.env.frameskip = 0

        self.explorationRate = 0.05

        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(self.net.parameters())

        self.replayMemory = []
        self.explorationRate = 1.0
        self.decayRate = 0.001

    def getReplayMemory(self, numGames):

        for i in range(0, numGames):

            observation = self.env.reset()
            t = 0

            while True:

                #self.env.render()

                ram = self.util.observationToTensor(observation)

                ballY = int(observation[101])

                if random.uniform(0, 1) < self.explorationRate:
                    action = random.randrange(4)
                elif t == 0 or ballY > 200 or ballY == 0:
                    action = 1
                else:
                    action = self.util.tensorAction(self.net, ram)

                newObservation, reward, done, info = self.env.step(action)

                self.replayMemory.append(
                    (self.util.observationToTensor(observation),
                     action, reward,
                     self.util.observationToTensor(newObservation), done))

                observation = newObservation

                t += 1
                self.explorationRate -= self.explorationRate * self.decayRate

                if done:
                    break

    def train(self):

        self.getReplayMemory(5)

        for replay in self.replayMemory:

            observation = replay[0]
            action = replay[1]
            reward = replay[2]
            nextObservation = replay[3]
            done = replay[4]

            output = self.util.getOutput(self.net, observation)
            q_value = torch.max(output)

            target = reward + 0.5 * self.util.tensorAction(
                self.net, nextObservation)

            self.optimizer.zero_grad()

            loss = self.criterion(q_value, torch.tensor(target))
            loss.backward()
            self.optimizer.step()
class BasicReinforcedAgent():
    def __init__(self):
        super().__init__()

        self.util = Util()

        self.net = FullyConnected(128, 10)
        self.net.float()

        if torch.cuda.is_available():
            self.device = torch.device('cuda')
            self.net.cuda()
            self.net.load_state_dict(torch.load('../res/models/transfer.pth'))
        else:
            self.device = torch.device('cpu')
            self.net.load_state_dict(
                torch.load('../res/models/transfer.pth', map_location=('cpu')))

        print('device: ' + str(self.device) + '\n')

        # Create a gym environment (game environment)
        self.env = gym.make('Breakout-ram-v0')
        self.env.frameskip = 0

        self.explorationRate = 0.05

        self.optimizer = optim.Adam(self.net.parameters())

    def calculateLoss(self, sampleReward, sampleLength):

        if sampleReward < 0:
            denominator = (0.6 * sampleReward) + (sampleLength + 1)
        else:
            denominator = (sampleReward *
                           (sampleLength * 2) + sampleLength) * 50

        if denominator == 0:
            return 0.0

        return (sampleLength / denominator)

    # One game played by the agent
    def gameCycle(self, observation, t, iteration_reward):

        sampleLength = 100
        sampleCounter = 0
        sampleReward = 0

        deathPenalty = 50
        lives = 5

        while True:
            #self.env.render()

            paddleMid = int(observation[72]) + 8
            ballMid = int(observation[99]) + 1
            ballY = int(observation[101])

            ram = self.util.observationToTensor(observation)
            output = self.util.getScaledOutput(self.net, ram)

            if t == 0 or ballY > 200 or ballY <= 0:
                action = 1  #config.ACTION_FIRE
            else:
                action = self.util.tensorAction(self.net, ram)

            observation, reward, done, info = self.env.step(action)

            # If agent dies incur a large penalty
            if lives > info['ale.lives']:
                sampleReward -= deathPenalty
                lives -= 1

            #if ballY >= 175:
            #    self.processLoss(paddleMid, ballMid, output)

            iteration_reward += reward
            sampleReward += reward

            t += 1
            sampleCounter += 1

            if sampleCounter == sampleLength:
                targetLoss = self.calculateLoss(sampleReward, sampleLength)
                #print('reward: ' + str(sampleReward) + ' loss: ' + str(targetLoss))
                self.processLoss(output, targetLoss)
                sampleCounter = 0
                sampleReward = 0

            if done:
                return iteration_reward, t

    # Calculate and apply loss
    def processLoss(self, output, targetLoss):
        self.optimizer.zero_grad()

        loss = self.util.applyLoss(output, targetLoss)
        loss.backward()

        self.optimizer.step()

    # At the end of training (or after fixed number of cycles) print statistics and save model
    def processTraining(self, totalReward, iterations, maxReward,
                        elapsed_time):

        minutes = math.floor(elapsed_time / 60)
        seconds = math.floor(elapsed_time - minutes * 60)

        average_reward = totalReward / iterations

        print('average reward: ' + str(average_reward))
        print('max reward: ' + str(maxReward))
        print('Training took: ' + str(minutes) + ':' + str(seconds))
        print()

        #file = open('../res/learning_progress2.txt', 'a+')
        #file.write(str(average_reward) + ' ' + str(maxReward) + '\n')

        torch.save(self.net.state_dict(), '../res/models/evolved2.pth')

        maxReward = 0
        total = 0

    # Train the agent
    def train(self, iterations):

        maxReward = 0
        totalReward = 0
        startTime = time.time()

        # How many iterations of the game should be played
        for i_episode in range(iterations):

            observation = self.env.reset()

            iteration_reward = 0
            t = 0

            # One complete game
            iteration_reward, t = self.gameCycle(observation, t,
                                                 iteration_reward)
            print(iteration_reward)

            totalReward += iteration_reward
            if iteration_reward > maxReward:
                maxReward = iteration_reward

            if i_episode != 0 and i_episode % 1000 == 0:
                # Calcuate how long this training took
                elapsed_time = time.time() - startTime
                self.processTraining(totalReward, iterations, maxReward,
                                     elapsed_time)
                startTime = time.time()

        # One all games finished process training again
        elapsed_time = time.time() - startTime
        self.processTraining(totalReward, iterations, maxReward, elapsed_time)
        startTime = time.time()
Пример #7
0
class TransferAgent():
    def __init__(self, save=False, elite=False, selfLearn=False):
        super().__init__()

        self.util = Util()

        self.net = FullyConnected(128, 10)
        self.net.float()

        self.save = save

        if elite:
            print('elite')
            self.ramPath = '../res/training data/ram100.txt'
            self.actionPath = '../res/training data/action100.txt'
        else:
            self.ramPath = '../res/training data/ram.txt'
            self.actionPath = '../res/training data/action.txt'

        if selfLearn:
            self.weightPath = '../res/models/transferSelfLearn.pth'
        else:
            self.weightPath = '../res/models/transfer.pth'

        if torch.cuda.is_available():
            self.device = torch.device('cuda')
            self.net.cuda()
            self.net.load_state_dict(torch.load(self.weightPath))
        else:
            self.device = torch.device('cpu')
            self.net.load_state_dict(
                torch.load(self.weightPath, map_location=('cpu')))

        print('device: ' + str(self.device) + '\n')

        # Create a gym environment (game environment)
        self.env = gym.make('Breakout-ram-v0')
        self.env.frameskip = 0

        self.trainingData = self.util.loadTrainingData(self.ramPath)
        self.testingData = self.util.loadTestingData(self.actionPath)

        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.net.parameters())

        # Create supervised agent
        self.supervisedAgent = SupervisedAgent()

    # Get the action from a trained supervised agent
    def getSupervisedAgentAction(self, observation):
        action = self.supervisedAgent.action(observation)
        return action

    # Get the action from raw observation
    def observationAction(self, observation):
        scaled_RAM = self.util.scaleRAM(observation)

        output = self.net(scaled_RAM)

        maxVal = torch.max(output, 0)
        return int(maxVal[1])

    # Transfor tensor to have values [0,1]
    def normaliseTensor(self, tensor):
        minValue = torch.min(tensor)
        print('min: ' + str(minValue))
        maxValue = torch.max(tensor)
        print('max: ' + str(maxValue))
        normalised = []

        for value in tensor:
            normalised.append(
                (value.item() - minValue) / (maxValue - minValue))

        return torch.tensor(normalised, device=self.device)

    # Based on where the ball landed in relation to the paddle, find the target
    def getTarget(self, paddleMid, ballMid):
        arr = []

        # If the ball hits the paddle, remain in the same place
        if abs(paddleMid - ballMid) < 7:
            arr = [1.0, 0.0, 0.0, 0.0]
        # paddle left of ball so move right
        if paddleMid < ballMid:
            arr = [0.0, 0.0, 1.0, 0.0]
        # paddle right of ball so move left
        elif paddleMid > ballMid:
            arr = [0.0, 0.0, 0.0, 1.0]

        return torch.tensor(arr, device=self.device)

    def supervisedLearn(self, iterations):
        for iteration in range(0, iterations):

            running_loss = 0.0
            for i in range(0, len(self.trainingData)):
                # zero the parameter gradients
                self.optimizer.zero_grad()

                # Scale the RAM values to be between 0-1
                scaled_RAM = self.util.scaleRAM(self.trainingData[i])

                # Get the outputs and target
                outputs = self.net(scaled_RAM)
                target = self.testingData[i].clone()

                # Unsqueeze outputs for loss function
                outputs = outputs.unsqueeze(dim=0)

                loss = self.criterion(outputs, target)
                loss.backward()
                self.optimizer.step()

                running_loss += loss.item()

                # Every 2000 mini-batches print the loss and save the model
                if i % 2000 == 1999:
                    print('[%d, %5d] loss: %.3f' %
                          (iteration + 1, i + 1, running_loss / 2000))

                    file = open('../res/transferProgress.txt', 'a+')
                    file.write('[%d, %5d] loss: %.3f' %
                               (iteration + 1, i + 1, running_loss / 2000) +
                               '\n')

                    running_loss = 0.0

                    if self.save:
                        torch.save(self.net.state_dict(), self.weightPath)

    def selfLearn(self, iterations):

        # How many iterations of the game should be played
        for i_episode in range(iterations):

            observation = self.env.reset()

            iteration_reward = 0
            t = 0

            # One game iteration
            while True:

                # self.env.render()

                paddleMid = int(observation[72]) + 8
                ballMid = int(observation[99]) + 1
                ballY = int(observation[101])

                # Get scaled ram tensor for input to network
                scaled_RAM = self.util.scaleRAM(observation)

                if t == 0 or ballY > 200 or ballY <= 0:
                    action = 1  # config.ACTION_FIRE
                else:
                    action = self.util.tensorAction(scaled_RAM)

                observation, reward, done, info = self.env.step(action)

                if ballY <= 25:

                    self.optimizer.zero_grad()

                    output = self.util.getOutput(scaled_RAM)
                    output = F.softmax(output, dim=0)
                    target = self.getTarget(paddleMid, ballMid)

                    criterion = nn.MSELoss()
                    loss = criterion(output, target)

                    loss.backward()
                    self.optimizer.step()

                iteration_reward += reward
                t += 1

                if done:

                    print(iteration_reward)

                    file = open('../res/transferProgressSelfLearn.txt', 'a+')
                    file.write(str(iteration_reward) + '\n')

                    if self.save:
                        torch.save(self.net.state_dict(),
                                   '../res/models/transferSelfLearn.pth')

                    break
Пример #8
0
class SupervisedAgent():
    """ Supervised Learning agent

    Trained using training and testing data generated from expert system
    """

    def __init__(self, load=True, save=True, saveFile=''):

        self.load = load
        self.save = save

        # object for utiliy functions
        self.util = Util()

        # load correct network weights
        if saveFile == '':
            self.saveFile = '../res/models/supervised.pth'
        else:
            self.saveFile = '../res/models/' + saveFile + '.pth'
        
        # Create network and load weights
        self.net = FullyConnected(128, 10)
        self.net = self.net.float()
        if self.load:
            self.net.load_state_dict(torch.load(self.saveFile))

        # Load training and testing data
        self.trainingData = self.util.loadTrainingData('../res/training data/evolvedObservation.txt')
        self.testingData = self.util.loadTestingData('../res/training data/evolvedAction.txt')

        # Training parameters
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.net.parameters())


    def train(self, epochs):
        """ Train the agent for a given number of epochs

        epochs : int = number of training epochs

        """

        for epoch in range(0, epochs):

            running_loss = 0.0

            for i in range(0, len(self.trainingData)):

                self.optimizer.zero_grad()

                # Get network output and target
                tensorInput = self.trainingData[i]
                outputs = self.net(tensorInput.float())
                outputs = outputs.unsqueeze(dim=0)
                target = self.testingData[i].clone()

                # Calcuate loss between network output and target from testing dataset
                loss = self.criterion(outputs, target)
                loss.backward()
                self.optimizer.step()

                # Calculate running loss and print every 2000 mini-batches
                running_loss += loss.item()
                if i % 2000 == 1999:    # print every 2000 mini-batches
                    print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 2000))
                    running_loss = 0.0

                    if self.save:
                        torch.save(self.net.state_dict(), self.saveFile)
    
    def observationAction(self, observation):
        """ Get the agents action 

        observation : list = observation representing game state

        """

        observationTensor = torch.tensor(observation)

        outputs = self.net(observationTensor.float())

        maxVal = torch.max(outputs, 0)
        return int(maxVal[1])