Exemplo n.º 1
0
def run():
    # Initialize supervisor object
    # Whenever we want to access attributes, etc., from the supervisor controller we use
    # supervisorPre.
    supervisorPre = PitEscapeSupervisor()
    # Wrap the Pit Escape supervisor in the custom keyboard printer
    supervisorEnv = KeyboardControllerPitEscape(supervisorPre)

    # The agent used here is trained with the PPO algorithm (https://arxiv.org/abs/1707.06347).
    agent = PPOAgent(supervisorPre.observationSpace, supervisorPre.actionSpace)

    episodeCount = 0
    episodeLimit = 10000
    solved = False  # Whether the solved requirement is met
    repeatActionSteps = 1  # Amount of steps for which to repeat a certain action
    averageEpisodeActionProbs = [
    ]  # Save average episode taken actions probability to plot later

    # Run outer loop until the episodes limit is reached or the task is solved
    while not solved and episodeCount < episodeLimit:
        state = supervisorEnv.reset(
        )  # Reset robot and get starting observation
        supervisorPre.episodeScore = 0
        actionProbs = [
        ]  # This list holds the probability of each chosen action

        # Inner loop is the episode loop
        step = 0
        # Episode is terminated based on time elapsed and not on number of steps
        while True:
            # In training mode the agent samples from the probability distribution, naturally implementing exploration
            actionValues, actionProb = agent.work(state, type_="selectAction")
            # Save the current selectedAction's probability
            actionProbs.append(actionProb)

            # Step the supervisor to get the current action's reward, the new state and whether we reached the done
            # condition
            newState, reward, done, info = supervisorEnv.step(
                [actionValues], repeatActionSteps)

            # Save the current state transition in agent's memory
            trans = Transition(state, actionValues, actionProb, reward,
                               newState)
            agent.storeTransition(trans)

            supervisorPre.episodeScore += reward  # Accumulate episode reward
            if done:
                # Save the episode's score
                supervisorPre.episodeScoreList.append(
                    supervisorPre.episodeScore)
                agent.trainStep(batchSize=step + 1)
                solved = supervisorPre.solved(
                )  # Check whether the task is solved
                break

            state = newState  # state for next step is current step's newState
            step += 1

        if supervisorPre.test:  # If test flag is externally set to True, agent is deployed
            break

        print("Episode #", episodeCount, "score:", supervisorPre.episodeScore)
        # The average action probability tells us how confident the agent was of its actions.
        # By looking at this we can check whether the agent is converging to a certain policy.
        avgActionProb = mean(actionProbs)
        averageEpisodeActionProbs.append(avgActionProb)
        print("Avg action prob:", avgActionProb)

        episodeCount += 1  # Increment episode counter

    # np.convolve is used as a moving average, see https://stackoverflow.com/a/22621523
    movingAvgN = 10
    plotData(
        convolve(supervisorPre.episodeScoreList,
                 ones((movingAvgN, )) / movingAvgN,
                 mode='valid'), "episode", "episode score",
        "Episode scores over episodes")
    plotData(
        convolve(averageEpisodeActionProbs,
                 ones((movingAvgN, )) / movingAvgN,
                 mode='valid'), "episode",
        "average episode action probability",
        "Average episode action probability over episodes")

    if not solved and not supervisorPre.test:
        print("Reached episode limit and task was not solved.")
    else:
        if not solved:
            print("Task is not solved, deploying agent for testing...")
        elif solved:
            print("Task is solved, deploying agent for testing...")
    print("Press R to reset.")
    state = supervisorEnv.reset()
    supervisorPre.test = True
    supervisorPre.episodeScore = 0
    while True:
        actionValues, _ = agent.work(state, type_="selectActionMax")
        state, reward, done, _ = supervisorEnv.step([actionValues],
                                                    repeatActionSteps)
        supervisorPre.episodeScore += reward  # Accumulate episode reward

        if done:
            print("Reward accumulated =", supervisorPre.episodeScore)
            supervisorPre.episodeScore = 0
            state = supervisorEnv.reset()
Exemplo n.º 2
0
def run():
    # Initialize supervisor object
    # Whenever we want to access attributes, etc., from the supervisor controller we use
    # supervisorPre.
    supervisorPre = CartPoleSupervisor()
    # Wrap the CartPole supervisor in the custom keyboard controller
    supervisorEnv = KeyboardControllerCartPole(supervisorPre)

    # The agent used here is trained with the DDPG algorithm (https://arxiv.org/abs/1509.02971).
    agent = DDPGAgent(supervisorPre.observationSpace,
                      supervisorPre.actionSpace,
                      lr_actor=0.000025,
                      lr_critic=0.00025,
                      layer1_size=30,
                      layer2_size=50,
                      layer3_size=30,
                      batch_size=64)

    episodeCount = 0
    episodeLimit = 10000
    solved = False  # Whether the solved requirement is met

    # Run outer loop until the episodes limit is reached or the task is solved
    while not solved and episodeCount < episodeLimit:
        state = supervisorEnv.reset(
        )  # Reset robot and get starting observation
        supervisorPre.episodeScore = 0

        # Inner loop is the episode loop
        for step in range(supervisorPre.stepsPerEpisode):
            # In training mode the agent returns the action plus OU noise for exploration
            selectedAction = agent.choose_action_train(state)

            # Step the supervisor to get the current selectedAction reward, the new state and whether we reached
            # the done condition
            newState, reward, done, info = supervisorEnv.step(selectedAction)

            # Save the current state transition in agent's memory
            agent.remember(state, selectedAction, reward, newState, int(done))

            supervisorPre.episodeScore += reward  # Accumulate episode reward
            # Perform a learning step
            agent.learn()
            if done or step == supervisorPre.stepsPerEpisode - 1:
                # Save the episode's score
                supervisorPre.episodeScoreList.append(
                    supervisorPre.episodeScore)
                solved = supervisorPre.solved(
                )  # Check whether the task is solved
                break

            state = newState  # state for next step is current step's newState

        if supervisorPre.test:  # If test flag is externally set to True, agent is deployed
            break

        print("Episode #", episodeCount, "score:", supervisorPre.episodeScore)
        episodeCount += 1  # Increment episode counter

    # np.convolve is used as a moving average, see https://stackoverflow.com/a/22621523
    # this is done to smooth out the plots
    movingAvgN = 10
    plotData(
        convolve(supervisorPre.episodeScoreList,
                 ones((movingAvgN, )) / movingAvgN,
                 mode='valid'), "episode", "episode score",
        "Episode scores over episodes")

    if not solved and not supervisorPre.test:
        print("Reached episode limit and task was not solved.")
    else:
        if not solved:
            print("Task is not solved, deploying agent for testing...")
        elif solved:
            print("Task is solved, deploying agent for testing...")
    print("Press R to reset.")
    state = supervisorEnv.reset()
    supervisorPre.test = True
    supervisorPre.episodeScore = 0
    while True:
        selectedAction = agent.choose_action_test(state)
        state, reward, done, _ = supervisorEnv.step(selectedAction)
        supervisorPre.episodeScore += reward  # Accumulate episode reward

        if done:
            print("Reward accumulated =", supervisorPre.episodeScore)
            supervisorPre.episodeScore = 0
            state = supervisorEnv.reset()
Exemplo n.º 3
0
        test1 = open('inputs/test01.txt')
        test1Data = np.loadtxt(test1, delimiter=',')
        test1Data = util.normalizeData(test1Data)
        errors = []
        NUM_INPUTS = len(test1Data) - offset
        for i in range(1750, NUM_INPUTS):
            current = test1Data[i]
            features = util.createFeatureRow(test1Data, i, offset, current)
            td = np.array(features)
            predX = neigh.predict(td)
            predY = neighY.predict(td)

            actual = test1Data[i + offset]
            prediction = [predX[0], predY[0]]
            actuals.append(actual)
            predictions.append(prediction)
            errors.append(util.error([actual], [prediction]))

if onlyTrainingAndCV == False:
    #    util.plotLines(actuals, predictions, 'Actual position', 'Predicted position')
    #    util.plotGraph(actuals, predictions, 'Actual position', 'Predicted position')
    util.plotData(actuals, 'Actual position')
    util.plotData(predictions, 'Predicted position')
    util.plotLine(errors, 'Error graph')
    print len(actuals), len(predictions)
    print np.sum(errors)
else:
    util.plotLine(cvScoresX, 'CV label x')
    util.plotLine(cvScoresY, 'CV label y')
#    util.plotLines(cvScoresX, cvScoresY, 'CV label X', 'CV label y')
Exemplo n.º 4
0
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 27 20:00:14 2016

@author: badarim
"""

import numpy as np
import utilities as util

t = open('inputs/test01.txt');
f = open('training_data.txt');
data = np.loadtxt(f, delimiter = ','); 

util.plotData(data, 'Training data')
data = np.loadtxt(t, delimiter = ','); 
util.plotData(data, 'Test1 data')