예제 #1
0
 def __init__(self, question, testDict):
     super(ApproximateQLearningTest, self).__init__(question, testDict)
     self.discount = float(testDict['discount'])
     self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
     if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
     if 'livingReward' in testDict:
         self.grid.setLivingReward(float(testDict['livingReward']))
     self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
     self.env = gridworld.GridworldEnvironment(self.grid)
     self.epsilon = float(testDict['epsilon'])
     self.learningRate = float(testDict['learningRate'])
     self.extractor = 'IdentityExtractor'
     if 'extractor' in testDict:
         self.extractor = testDict['extractor']
     self.opts = {
         'actionFn': self.env.getPossibleActions,
         'epsilon': self.epsilon,
         'gamma': self.discount,
         'alpha': self.learningRate
     }
     numExperiences = int(testDict['numExperiences'])
     maxPreExperiences = 10
     self.numsExperiencesForDisplay = list(
         range(min(numExperiences, maxPreExperiences)))
     self.testOutFile = testDict['test_out_file']
     if sys.platform == 'win32':
         _, question_name, test_name = testDict['test_out_file'].split('\\')
     else:
         _, question_name, test_name = testDict['test_out_file'].split('/')
     self.experiences = Experiences(test_name.split('.')[0])
     if maxPreExperiences < numExperiences:
         self.numsExperiencesForDisplay.append(numExperiences)
예제 #2
0
    def __init__(self, question, testDict):
        super(EpsilonGreedyTest, self).__init__(question, testDict)
        self.discount = float(testDict['discount'])
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
        if 'livingReward' in testDict:
            self.grid.setLivingReward(float(testDict['livingReward']))

        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        self.env = gridworld.GridworldEnvironment(self.grid)
        self.epsilon = float(testDict['epsilon'])
        self.learningRate = float(testDict['learningRate'])
        self.numExperiences = int(testDict['numExperiences'])
        self.numIterations = int(testDict['iterations'])
        self.opts = {
            'actionFn': self.env.getPossibleActions,
            'epsilon': self.epsilon,
            'gamma': self.discount,
            'alpha': self.learningRate
        }
        if sys.platform == 'win32':
            _, question_name, test_name = testDict['test_out_file'].split('\\')
        else:
            _, question_name, test_name = testDict['test_out_file'].split('/')
        self.experiences = Experiences(test_name.split('.')[0])
예제 #3
0
    def __init__(self, question, testDict):
        super(EpsilonGreedyTest, self).__init__(question, testDict)
        self.discount = float(testDict["discount"])
        self.grid = gridworld.Gridworld(parseGrid(testDict["grid"]))
        if "noise" in testDict:
            self.grid.setNoise(float(testDict["noise"]))
        if "livingReward" in testDict:
            self.grid.setLivingReward(float(testDict["livingReward"]))

        self.grid = gridworld.Gridworld(parseGrid(testDict["grid"]))
        self.env = gridworld.GridworldEnvironment(self.grid)
        self.epsilon = float(testDict["epsilon"])
        self.learningRate = float(testDict["learningRate"])
        self.numExperiences = int(testDict["numExperiences"])
        self.numIterations = int(testDict["iterations"])
        self.opts = {
            "actionFn": self.env.getPossibleActions,
            "epsilon": self.epsilon,
            "gamma": self.discount,
            "alpha": self.learningRate,
        }
        if sys.platform == "win32":
            _, question_name, test_name = testDict["test_out_file"].split("\\")
        else:
            _, question_name, test_name = testDict["test_out_file"].split("/")
        self.experiences = Experiences(test_name.split(".")[0])
예제 #4
0
 def __init__(self, question, testDict):
     super(QLearningTest, self).__init__(question, testDict)
     self.discount = float(testDict["discount"])
     self.grid = gridworld.Gridworld(parseGrid(testDict["grid"]))
     if "noise" in testDict:
         self.grid.setNoise(float(testDict["noise"]))
     if "livingReward" in testDict:
         self.grid.setLivingReward(float(testDict["livingReward"]))
     self.grid = gridworld.Gridworld(parseGrid(testDict["grid"]))
     self.env = gridworld.GridworldEnvironment(self.grid)
     self.epsilon = float(testDict["epsilon"])
     self.learningRate = float(testDict["learningRate"])
     self.opts = {
         "actionFn": self.env.getPossibleActions,
         "epsilon": self.epsilon,
         "gamma": self.discount,
         "alpha": self.learningRate,
     }
     numExperiences = int(testDict["numExperiences"])
     maxPreExperiences = 10
     self.numsExperiencesForDisplay = list(
         range(min(numExperiences, maxPreExperiences)))
     self.testOutFile = testDict["test_out_file"]
     if sys.platform == "win32":
         _, question_name, test_name = testDict["test_out_file"].split("\\")
     else:
         _, question_name, test_name = testDict["test_out_file"].split("/")
     self.experiences = Experiences(test_name.split(".")[0])
     if maxPreExperiences < numExperiences:
         self.numsExperiencesForDisplay.append(numExperiences)
예제 #5
0
class EpsilonGreedyTest(testClasses.TestCase):
    def __init__(self, question, testDict):
        super(EpsilonGreedyTest, self).__init__(question, testDict)
        self.discount = float(testDict['discount'])
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
        if 'livingReward' in testDict:
            self.grid.setLivingReward(float(testDict['livingReward']))

        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        self.env = gridworld.GridworldEnvironment(self.grid)
        self.epsilon = float(testDict['epsilon'])
        self.learningRate = float(testDict['learningRate'])
        self.numExperiences = int(testDict['numExperiences'])
        self.numIterations = int(testDict['iterations'])
        self.opts = {
            'actionFn': self.env.getPossibleActions,
            'epsilon': self.epsilon,
            'gamma': self.discount,
            'alpha': self.learningRate
        }
        if sys.platform == 'win32':
            _, question_name, test_name = testDict['test_out_file'].split('\\')
        else:
            _, question_name, test_name = testDict['test_out_file'].split('/')
        self.experiences = Experiences(test_name.split('.')[0])

    def execute(self, grades, moduleDict, solutionDict):
        if self.testEpsilonGreedy(moduleDict):
            return self.testPass(grades)
        else:
            return self.testFail(grades)

    def writeSolution(self, moduleDict, filePath):
        with open(filePath, 'w') as handle:
            handle.write('# This is the solution file for %s.\n' % self.path)
            handle.write('# File intentionally blank.\n')
        return True

    def runAgent(self, moduleDict):
        agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
        states = [
            state for state in self.grid.getStates()
            if len(self.grid.getPossibleActions(state)) > 0
        ]
        states.sort()
        for i in range(self.numExperiences):
            lastExperience = self.experiences.get_experience()
            agent.update(*lastExperience)
        return agent

    def testEpsilonGreedy(self, moduleDict, tolerance=0.025):
        agent = self.runAgent(moduleDict)
        for state in self.grid.getStates():
            numLegalActions = len(agent.getLegalActions(state))
            if numLegalActions <= 1:
                continue
            numGreedyChoices = 0
            optimalAction = agent.computeActionFromQValues(state)
            for iteration in range(self.numIterations):
                # assume that their computeActionFromQValues implementation is correct (q4 tests this)
                if agent.getAction(state) == optimalAction:
                    numGreedyChoices += 1
            # e = epsilon, g = # greedy actions, n = numIterations, k = numLegalActions
            # g = n * [(1-e) + e/k] -> e = (n - g) / (n - n/k)
            empiricalEpsilonNumerator = self.numIterations - numGreedyChoices
            empiricalEpsilonDenominator = self.numIterations - self.numIterations / float(
                numLegalActions)
            empiricalEpsilon = empiricalEpsilonNumerator / empiricalEpsilonDenominator
            error = abs(empiricalEpsilon - self.epsilon)
            if error > tolerance:
                self.addMessage(
                    "Epsilon-greedy action selection is not correct.")
                self.addMessage(
                    "Actual epsilon = %f; student empirical epsilon = %f; error = %f > tolerance = %f"
                    % (self.epsilon, empiricalEpsilon, error, tolerance))
                return False
        return True
예제 #6
0
class ApproximateQLearningTest(testClasses.TestCase):
    def __init__(self, question, testDict):
        super(ApproximateQLearningTest, self).__init__(question, testDict)
        self.discount = float(testDict['discount'])
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
        if 'livingReward' in testDict:
            self.grid.setLivingReward(float(testDict['livingReward']))
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        self.env = gridworld.GridworldEnvironment(self.grid)
        self.epsilon = float(testDict['epsilon'])
        self.learningRate = float(testDict['learningRate'])
        self.extractor = 'IdentityExtractor'
        if 'extractor' in testDict:
            self.extractor = testDict['extractor']
        self.opts = {
            'actionFn': self.env.getPossibleActions,
            'epsilon': self.epsilon,
            'gamma': self.discount,
            'alpha': self.learningRate
        }
        numExperiences = int(testDict['numExperiences'])
        maxPreExperiences = 10
        self.numsExperiencesForDisplay = list(
            range(min(numExperiences, maxPreExperiences)))
        self.testOutFile = testDict['test_out_file']
        if sys.platform == 'win32':
            _, question_name, test_name = testDict['test_out_file'].split('\\')
        else:
            _, question_name, test_name = testDict['test_out_file'].split('/')
        self.experiences = Experiences(test_name.split('.')[0])
        if maxPreExperiences < numExperiences:
            self.numsExperiencesForDisplay.append(numExperiences)

    def writeFailureFile(self, string):
        with open(self.testOutFile, 'w') as handle:
            handle.write(string)

    def removeFailureFileIfExists(self):
        if os.path.exists(self.testOutFile):
            os.remove(self.testOutFile)

    def execute(self, grades, moduleDict, solutionDict):
        failureOutputFileString = ''
        failureOutputStdString = ''
        for n in self.numsExperiencesForDisplay:
            testPass, stdOutString, fileOutString = self.executeNExperiences(
                grades, moduleDict, solutionDict, n)
            failureOutputStdString += stdOutString
            failureOutputFileString += fileOutString
            if not testPass:
                self.addMessage(failureOutputStdString)
                self.addMessage(
                    'For more details to help you debug, see test output file %s\n\n'
                    % self.testOutFile)
                self.writeFailureFile(failureOutputFileString)
                return self.testFail(grades)
        self.removeFailureFileIfExists()
        return self.testPass(grades)

    def executeNExperiences(self, grades, moduleDict, solutionDict, n):
        testPass = True
        qValuesPretty, weights, actions, lastExperience = self.runAgent(
            moduleDict, n)
        stdOutString = ''
        fileOutString = "==================== Iteration %d ====================\n" % n
        if lastExperience is not None:
            fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n" % lastExperience
        weightsKey = 'weights_k_%d' % n
        if weights == eval(solutionDict[weightsKey]):
            fileOutString += "Weights at iteration %d are correct." % n
            fileOutString += "   Student/correct solution:\n\n%s\n\n" % pp.pformat(
                weights)
        for action in actions:
            qValuesKey = 'q_values_k_%d_action_%s' % (n, action)
            qValues = qValuesPretty[action]
            if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
                fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (
                    n, action)
                fileOutString += "   Student/correct solution:\n\t%s" % self.prettyValueSolutionString(
                    qValuesKey, qValues)
            else:
                testPass = False
                outString = "Q-Values at iteration %d for action '%s' are NOT correct." % (
                    n, action)
                outString += "   Student solution:\n\t%s" % self.prettyValueSolutionString(
                    qValuesKey, qValues)
                outString += "   Correct solution:\n\t%s" % self.prettyValueSolutionString(
                    qValuesKey, solutionDict[qValuesKey])
                stdOutString += outString
                fileOutString += outString
        return testPass, stdOutString, fileOutString

    def writeSolution(self, moduleDict, filePath):
        with open(filePath, 'w') as handle:
            for n in self.numsExperiencesForDisplay:
                qValuesPretty, weights, actions, _ = self.runAgent(
                    moduleDict, n)
                handle.write(
                    self.prettyValueSolutionString('weights_k_%d' % n,
                                                   pp.pformat(weights)))
                for action in actions:
                    handle.write(
                        self.prettyValueSolutionString(
                            'q_values_k_%d_action_%s' % (n, action),
                            qValuesPretty[action]))
        return True

    def runAgent(self, moduleDict, numExperiences):
        agent = moduleDict['qlearningAgents'].ApproximateQAgent(
            extractor=self.extractor, **self.opts)
        states = [
            state for state in self.grid.getStates()
            if len(self.grid.getPossibleActions(state)) > 0
        ]
        states.sort()
        lastExperience = None
        for i in range(numExperiences):
            lastExperience = self.experiences.get_experience()
            agent.update(*lastExperience)
        actions = list(
            reduce(lambda a, b: set(a).union(b),
                   [self.grid.getPossibleActions(state) for state in states]))
        qValues = {}
        weights = agent.getWeights()
        for state in states:
            possibleActions = self.grid.getPossibleActions(state)
            for action in actions:
                if action not in qValues:
                    qValues[action] = {}
                if action in possibleActions:
                    qValues[action][state] = agent.getQValue(state, action)
                else:
                    qValues[action][state] = None
        qValuesPretty = {}
        for action in actions:
            qValuesPretty[action] = self.prettyValues(qValues[action])
        return (qValuesPretty, weights, actions, lastExperience)

    def prettyPrint(self, elements, formatString):
        pretty = ''
        states = self.grid.getStates()
        for ybar in range(self.grid.grid.height):
            y = self.grid.grid.height - 1 - ybar
            row = []
            for x in range(self.grid.grid.width):
                if (x, y) in states:
                    value = elements[(x, y)]
                    if value is None:
                        row.append('   illegal')
                    else:
                        row.append(formatString.format(elements[(x, y)]))
                else:
                    row.append('_' * 10)
            pretty += '        %s\n' % ("   ".join(row), )
        pretty += '\n'
        return pretty

    def prettyValues(self, values):
        return self.prettyPrint(values, '{0:10.4f}')

    def prettyPolicy(self, policy):
        return self.prettyPrint(policy, '{0:10s}')

    def prettyValueSolutionString(self, name, pretty):
        return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())

    def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
        aList = self.parsePrettyValues(aPretty)
        bList = self.parsePrettyValues(bPretty)
        if len(aList) != len(bList):
            return False
        for a, b in zip(aList, bList):
            try:
                aNum = float(a)
                bNum = float(b)
                # error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
                error = abs(aNum - bNum)
                if error > tolerance:
                    return False
            except ValueError:
                if a.strip() != b.strip():
                    return False
        return True

    def parsePrettyValues(self, pretty):
        values = pretty.split()
        return values
예제 #7
0
class QLearningTest(testClasses.TestCase):
    def __init__(self, question, testDict):
        super(QLearningTest, self).__init__(question, testDict)
        self.discount = float(testDict["discount"])
        self.grid = gridworld.Gridworld(parseGrid(testDict["grid"]))
        if "noise" in testDict:
            self.grid.setNoise(float(testDict["noise"]))
        if "livingReward" in testDict:
            self.grid.setLivingReward(float(testDict["livingReward"]))
        self.grid = gridworld.Gridworld(parseGrid(testDict["grid"]))
        self.env = gridworld.GridworldEnvironment(self.grid)
        self.epsilon = float(testDict["epsilon"])
        self.learningRate = float(testDict["learningRate"])
        self.opts = {
            "actionFn": self.env.getPossibleActions,
            "epsilon": self.epsilon,
            "gamma": self.discount,
            "alpha": self.learningRate,
        }
        numExperiences = int(testDict["numExperiences"])
        maxPreExperiences = 10
        self.numsExperiencesForDisplay = list(
            range(min(numExperiences, maxPreExperiences)))
        self.testOutFile = testDict["test_out_file"]
        if sys.platform == "win32":
            _, question_name, test_name = testDict["test_out_file"].split("\\")
        else:
            _, question_name, test_name = testDict["test_out_file"].split("/")
        self.experiences = Experiences(test_name.split(".")[0])
        if maxPreExperiences < numExperiences:
            self.numsExperiencesForDisplay.append(numExperiences)

    def writeFailureFile(self, string):
        with open(self.testOutFile, "w") as handle:
            handle.write(string)

    def removeFailureFileIfExists(self):
        if os.path.exists(self.testOutFile):
            os.remove(self.testOutFile)

    def execute(self, grades, moduleDict, solutionDict):
        failureOutputFileString = ""
        failureOutputStdString = ""
        for n in self.numsExperiencesForDisplay:
            checkValuesAndPolicy = n == self.numsExperiencesForDisplay[-1]
            testPass, stdOutString, fileOutString = self.executeNExperiences(
                grades, moduleDict, solutionDict, n, checkValuesAndPolicy)
            failureOutputStdString += stdOutString
            failureOutputFileString += fileOutString
            if not testPass:
                self.addMessage(failureOutputStdString)
                self.addMessage(
                    "For more details to help you debug, see test output file %s\n\n"
                    % self.testOutFile)
                self.writeFailureFile(failureOutputFileString)
                return self.testFail(grades)
        self.removeFailureFileIfExists()
        return self.testPass(grades)

    def executeNExperiences(self, grades, moduleDict, solutionDict, n,
                            checkValuesAndPolicy):
        testPass = True
        (
            valuesPretty,
            qValuesPretty,
            actions,
            policyPretty,
            lastExperience,
        ) = self.runAgent(moduleDict, n)
        stdOutString = ""
        # fileOutString = "==================== Iteration %d ====================\n" % n
        fileOutString = ""
        if lastExperience is not None:
            # fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n\n" % lastExperience
            pass
        for action in actions:
            qValuesKey = "q_values_k_%d_action_%s" % (n, action)
            qValues = qValuesPretty[action]

            if self.comparePrettyValues(qValues, solutionDict[qValuesKey]):
                # fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action)
                # fileOutString += "   Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)
                pass
            else:
                testPass = False
                outString = (
                    "Q-Values at iteration %d for action '%s' are NOT correct."
                    % (n, action))
                outString += (
                    "   Student solution:\n\t%s" %
                    self.prettyValueSolutionString(qValuesKey, qValues))
                outString += ("   Correct solution:\n\t%s" %
                              self.prettyValueSolutionString(
                                  qValuesKey, solutionDict[qValuesKey]))
                stdOutString += outString
                fileOutString += outString
        if checkValuesAndPolicy:
            if not self.comparePrettyValues(valuesPretty,
                                            solutionDict["values"]):
                testPass = False
                outString = "Values are NOT correct."
                outString += (
                    "   Student solution:\n\t%s" %
                    self.prettyValueSolutionString("values", valuesPretty))
                outString += ("   Correct solution:\n\t%s" %
                              self.prettyValueSolutionString(
                                  "values", solutionDict["values"]))
                stdOutString += outString
                fileOutString += outString
            if not self.comparePrettyValues(policyPretty,
                                            solutionDict["policy"]):
                testPass = False
                outString = "Policy is NOT correct."
                outString += (
                    "   Student solution:\n\t%s" %
                    self.prettyValueSolutionString("policy", policyPretty))
                outString += ("   Correct solution:\n\t%s" %
                              self.prettyValueSolutionString(
                                  "policy", solutionDict["policy"]))
                stdOutString += outString
                fileOutString += outString
        return testPass, stdOutString, fileOutString

    def writeSolution(self, moduleDict, filePath):
        with open(filePath, "w") as handle:
            valuesPretty = ""
            policyPretty = ""
            for n in self.numsExperiencesForDisplay:
                (
                    valuesPretty,
                    qValuesPretty,
                    actions,
                    policyPretty,
                    _,
                ) = self.runAgent(moduleDict, n)
                for action in actions:
                    handle.write(
                        self.prettyValueSolutionString(
                            "q_values_k_%d_action_%s" % (n, action),
                            qValuesPretty[action],
                        ))
            handle.write(self.prettyValueSolutionString(
                "values", valuesPretty))
            handle.write(self.prettyValueSolutionString(
                "policy", policyPretty))
        return True

    def runAgent(self, moduleDict, numExperiences):
        agent = moduleDict["qlearningAgents"].QLearningAgent(**self.opts)
        states = [
            state for state in self.grid.getStates()
            if len(self.grid.getPossibleActions(state)) > 0
        ]
        states.sort()
        lastExperience = None
        for i in range(numExperiences):
            lastExperience = self.experiences.get_experience()
            agent.update(*lastExperience)
        actions = list(
            reduce(
                lambda a, b: set(a).union(b),
                [self.grid.getPossibleActions(state) for state in states],
            ))
        values = {}
        qValues = {}
        policy = {}
        for state in states:
            values[state] = agent.computeValueFromQValues(state)
            policy[state] = agent.computeActionFromQValues(state)
            possibleActions = self.grid.getPossibleActions(state)
            for action in actions:
                if action not in qValues:
                    qValues[action] = {}
                if action in possibleActions:
                    qValues[action][state] = agent.getQValue(state, action)
                else:
                    qValues[action][state] = None
        valuesPretty = self.prettyValues(values)
        policyPretty = self.prettyPolicy(policy)
        qValuesPretty = {}
        for action in actions:
            qValuesPretty[action] = self.prettyValues(qValues[action])
        return (
            valuesPretty,
            qValuesPretty,
            actions,
            policyPretty,
            lastExperience,
        )

    def prettyPrint(self, elements, formatString):
        pretty = ""
        states = self.grid.getStates()
        for ybar in range(self.grid.grid.height):
            y = self.grid.grid.height - 1 - ybar
            row = []
            for x in range(self.grid.grid.width):
                if (x, y) in states:
                    value = elements[(x, y)]
                    if value is None:
                        row.append("   illegal")
                    else:
                        row.append(formatString.format(elements[(x, y)]))
                else:
                    row.append("_" * 10)
            pretty += "        %s\n" % ("   ".join(row), )
        pretty += "\n"
        return pretty

    def prettyValues(self, values):
        return self.prettyPrint(values, "{0:10.4f}")

    def prettyPolicy(self, policy):
        return self.prettyPrint(policy, "{0:10s}")

    def prettyValueSolutionString(self, name, pretty):
        return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip())

    def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01):
        aList = self.parsePrettyValues(aPretty)
        bList = self.parsePrettyValues(bPretty)
        if len(aList) != len(bList):
            return False
        for a, b in zip(aList, bList):
            try:
                aNum = float(a)
                bNum = float(b)
                # error = abs((aNum - bNum) / ((aNum + bNum) / 2.0))
                error = abs(aNum - bNum)
                if error > tolerance:
                    return False
            except ValueError:
                if a.strip() != b.strip():
                    return False
        return True

    def parsePrettyValues(self, pretty):
        values = pretty.split()
        return values