def __init__(self, question, testDict): super(ApproximateQLearningTest, self).__init__(question, testDict) self.discount = float(testDict['discount']) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) if 'noise' in testDict: self.grid.setNoise(float(testDict['noise'])) if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward'])) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) self.env = gridworld.GridworldEnvironment(self.grid) self.epsilon = float(testDict['epsilon']) self.learningRate = float(testDict['learningRate']) self.extractor = 'IdentityExtractor' if 'extractor' in testDict: self.extractor = testDict['extractor'] self.opts = { 'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate } numExperiences = int(testDict['numExperiences']) maxPreExperiences = 10 self.numsExperiencesForDisplay = list( range(min(numExperiences, maxPreExperiences))) self.testOutFile = testDict['test_out_file'] if sys.platform == 'win32': _, question_name, test_name = testDict['test_out_file'].split('\\') else: _, question_name, test_name = testDict['test_out_file'].split('/') self.experiences = Experiences(test_name.split('.')[0]) if maxPreExperiences < numExperiences: self.numsExperiencesForDisplay.append(numExperiences)
def __init__(self, question, testDict): super(EpsilonGreedyTest, self).__init__(question, testDict) self.discount = float(testDict['discount']) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) if 'noise' in testDict: self.grid.setNoise(float(testDict['noise'])) if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward'])) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) self.env = gridworld.GridworldEnvironment(self.grid) self.epsilon = float(testDict['epsilon']) self.learningRate = float(testDict['learningRate']) self.numExperiences = int(testDict['numExperiences']) self.numIterations = int(testDict['iterations']) self.opts = { 'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate } if sys.platform == 'win32': _, question_name, test_name = testDict['test_out_file'].split('\\') else: _, question_name, test_name = testDict['test_out_file'].split('/') self.experiences = Experiences(test_name.split('.')[0])
def __init__(self, question, testDict): super(EpsilonGreedyTest, self).__init__(question, testDict) self.discount = float(testDict["discount"]) self.grid = gridworld.Gridworld(parseGrid(testDict["grid"])) if "noise" in testDict: self.grid.setNoise(float(testDict["noise"])) if "livingReward" in testDict: self.grid.setLivingReward(float(testDict["livingReward"])) self.grid = gridworld.Gridworld(parseGrid(testDict["grid"])) self.env = gridworld.GridworldEnvironment(self.grid) self.epsilon = float(testDict["epsilon"]) self.learningRate = float(testDict["learningRate"]) self.numExperiences = int(testDict["numExperiences"]) self.numIterations = int(testDict["iterations"]) self.opts = { "actionFn": self.env.getPossibleActions, "epsilon": self.epsilon, "gamma": self.discount, "alpha": self.learningRate, } if sys.platform == "win32": _, question_name, test_name = testDict["test_out_file"].split("\\") else: _, question_name, test_name = testDict["test_out_file"].split("/") self.experiences = Experiences(test_name.split(".")[0])
def __init__(self, question, testDict): super(QLearningTest, self).__init__(question, testDict) self.discount = float(testDict["discount"]) self.grid = gridworld.Gridworld(parseGrid(testDict["grid"])) if "noise" in testDict: self.grid.setNoise(float(testDict["noise"])) if "livingReward" in testDict: self.grid.setLivingReward(float(testDict["livingReward"])) self.grid = gridworld.Gridworld(parseGrid(testDict["grid"])) self.env = gridworld.GridworldEnvironment(self.grid) self.epsilon = float(testDict["epsilon"]) self.learningRate = float(testDict["learningRate"]) self.opts = { "actionFn": self.env.getPossibleActions, "epsilon": self.epsilon, "gamma": self.discount, "alpha": self.learningRate, } numExperiences = int(testDict["numExperiences"]) maxPreExperiences = 10 self.numsExperiencesForDisplay = list( range(min(numExperiences, maxPreExperiences))) self.testOutFile = testDict["test_out_file"] if sys.platform == "win32": _, question_name, test_name = testDict["test_out_file"].split("\\") else: _, question_name, test_name = testDict["test_out_file"].split("/") self.experiences = Experiences(test_name.split(".")[0]) if maxPreExperiences < numExperiences: self.numsExperiencesForDisplay.append(numExperiences)
class EpsilonGreedyTest(testClasses.TestCase): def __init__(self, question, testDict): super(EpsilonGreedyTest, self).__init__(question, testDict) self.discount = float(testDict['discount']) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) if 'noise' in testDict: self.grid.setNoise(float(testDict['noise'])) if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward'])) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) self.env = gridworld.GridworldEnvironment(self.grid) self.epsilon = float(testDict['epsilon']) self.learningRate = float(testDict['learningRate']) self.numExperiences = int(testDict['numExperiences']) self.numIterations = int(testDict['iterations']) self.opts = { 'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate } if sys.platform == 'win32': _, question_name, test_name = testDict['test_out_file'].split('\\') else: _, question_name, test_name = testDict['test_out_file'].split('/') self.experiences = Experiences(test_name.split('.')[0]) def execute(self, grades, moduleDict, solutionDict): if self.testEpsilonGreedy(moduleDict): return self.testPass(grades) else: return self.testFail(grades) def writeSolution(self, moduleDict, filePath): with open(filePath, 'w') as handle: handle.write('# This is the solution file for %s.\n' % self.path) handle.write('# File intentionally blank.\n') return True def runAgent(self, moduleDict): agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts) states = [ state for state in self.grid.getStates() if len(self.grid.getPossibleActions(state)) > 0 ] states.sort() for i in range(self.numExperiences): lastExperience = self.experiences.get_experience() agent.update(*lastExperience) return agent def testEpsilonGreedy(self, moduleDict, tolerance=0.025): agent = self.runAgent(moduleDict) for state in self.grid.getStates(): numLegalActions = len(agent.getLegalActions(state)) if numLegalActions <= 1: continue numGreedyChoices = 0 optimalAction = agent.computeActionFromQValues(state) for iteration in range(self.numIterations): # assume that their computeActionFromQValues implementation is correct (q4 tests this) if agent.getAction(state) == optimalAction: numGreedyChoices += 1 # e = epsilon, g = # greedy actions, n = numIterations, k = numLegalActions # g = n * [(1-e) + e/k] -> e = (n - g) / (n - n/k) empiricalEpsilonNumerator = self.numIterations - numGreedyChoices empiricalEpsilonDenominator = self.numIterations - self.numIterations / float( numLegalActions) empiricalEpsilon = empiricalEpsilonNumerator / empiricalEpsilonDenominator error = abs(empiricalEpsilon - self.epsilon) if error > tolerance: self.addMessage( "Epsilon-greedy action selection is not correct.") self.addMessage( "Actual epsilon = %f; student empirical epsilon = %f; error = %f > tolerance = %f" % (self.epsilon, empiricalEpsilon, error, tolerance)) return False return True
class ApproximateQLearningTest(testClasses.TestCase): def __init__(self, question, testDict): super(ApproximateQLearningTest, self).__init__(question, testDict) self.discount = float(testDict['discount']) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) if 'noise' in testDict: self.grid.setNoise(float(testDict['noise'])) if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward'])) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) self.env = gridworld.GridworldEnvironment(self.grid) self.epsilon = float(testDict['epsilon']) self.learningRate = float(testDict['learningRate']) self.extractor = 'IdentityExtractor' if 'extractor' in testDict: self.extractor = testDict['extractor'] self.opts = { 'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate } numExperiences = int(testDict['numExperiences']) maxPreExperiences = 10 self.numsExperiencesForDisplay = list( range(min(numExperiences, maxPreExperiences))) self.testOutFile = testDict['test_out_file'] if sys.platform == 'win32': _, question_name, test_name = testDict['test_out_file'].split('\\') else: _, question_name, test_name = testDict['test_out_file'].split('/') self.experiences = Experiences(test_name.split('.')[0]) if maxPreExperiences < numExperiences: self.numsExperiencesForDisplay.append(numExperiences) def writeFailureFile(self, string): with open(self.testOutFile, 'w') as handle: handle.write(string) def removeFailureFileIfExists(self): if os.path.exists(self.testOutFile): os.remove(self.testOutFile) def execute(self, grades, moduleDict, solutionDict): failureOutputFileString = '' failureOutputStdString = '' for n in self.numsExperiencesForDisplay: testPass, stdOutString, fileOutString = self.executeNExperiences( grades, moduleDict, solutionDict, n) failureOutputStdString += stdOutString failureOutputFileString += fileOutString if not testPass: self.addMessage(failureOutputStdString) self.addMessage( 'For more details to help you debug, see test output file %s\n\n' % self.testOutFile) self.writeFailureFile(failureOutputFileString) return self.testFail(grades) self.removeFailureFileIfExists() return self.testPass(grades) def executeNExperiences(self, grades, moduleDict, solutionDict, n): testPass = True qValuesPretty, weights, actions, lastExperience = self.runAgent( moduleDict, n) stdOutString = '' fileOutString = "==================== Iteration %d ====================\n" % n if lastExperience is not None: fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n" % lastExperience weightsKey = 'weights_k_%d' % n if weights == eval(solutionDict[weightsKey]): fileOutString += "Weights at iteration %d are correct." % n fileOutString += " Student/correct solution:\n\n%s\n\n" % pp.pformat( weights) for action in actions: qValuesKey = 'q_values_k_%d_action_%s' % (n, action) qValues = qValuesPretty[action] if self.comparePrettyValues(qValues, solutionDict[qValuesKey]): fileOutString += "Q-Values at iteration %d for action '%s' are correct." % ( n, action) fileOutString += " Student/correct solution:\n\t%s" % self.prettyValueSolutionString( qValuesKey, qValues) else: testPass = False outString = "Q-Values at iteration %d for action '%s' are NOT correct." % ( n, action) outString += " Student solution:\n\t%s" % self.prettyValueSolutionString( qValuesKey, qValues) outString += " Correct solution:\n\t%s" % self.prettyValueSolutionString( qValuesKey, solutionDict[qValuesKey]) stdOutString += outString fileOutString += outString return testPass, stdOutString, fileOutString def writeSolution(self, moduleDict, filePath): with open(filePath, 'w') as handle: for n in self.numsExperiencesForDisplay: qValuesPretty, weights, actions, _ = self.runAgent( moduleDict, n) handle.write( self.prettyValueSolutionString('weights_k_%d' % n, pp.pformat(weights))) for action in actions: handle.write( self.prettyValueSolutionString( 'q_values_k_%d_action_%s' % (n, action), qValuesPretty[action])) return True def runAgent(self, moduleDict, numExperiences): agent = moduleDict['qlearningAgents'].ApproximateQAgent( extractor=self.extractor, **self.opts) states = [ state for state in self.grid.getStates() if len(self.grid.getPossibleActions(state)) > 0 ] states.sort() lastExperience = None for i in range(numExperiences): lastExperience = self.experiences.get_experience() agent.update(*lastExperience) actions = list( reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states])) qValues = {} weights = agent.getWeights() for state in states: possibleActions = self.grid.getPossibleActions(state) for action in actions: if action not in qValues: qValues[action] = {} if action in possibleActions: qValues[action][state] = agent.getQValue(state, action) else: qValues[action][state] = None qValuesPretty = {} for action in actions: qValuesPretty[action] = self.prettyValues(qValues[action]) return (qValuesPretty, weights, actions, lastExperience) def prettyPrint(self, elements, formatString): pretty = '' states = self.grid.getStates() for ybar in range(self.grid.grid.height): y = self.grid.grid.height - 1 - ybar row = [] for x in range(self.grid.grid.width): if (x, y) in states: value = elements[(x, y)] if value is None: row.append(' illegal') else: row.append(formatString.format(elements[(x, y)])) else: row.append('_' * 10) pretty += ' %s\n' % (" ".join(row), ) pretty += '\n' return pretty def prettyValues(self, values): return self.prettyPrint(values, '{0:10.4f}') def prettyPolicy(self, policy): return self.prettyPrint(policy, '{0:10s}') def prettyValueSolutionString(self, name, pretty): return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip()) def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01): aList = self.parsePrettyValues(aPretty) bList = self.parsePrettyValues(bPretty) if len(aList) != len(bList): return False for a, b in zip(aList, bList): try: aNum = float(a) bNum = float(b) # error = abs((aNum - bNum) / ((aNum + bNum) / 2.0)) error = abs(aNum - bNum) if error > tolerance: return False except ValueError: if a.strip() != b.strip(): return False return True def parsePrettyValues(self, pretty): values = pretty.split() return values
class QLearningTest(testClasses.TestCase): def __init__(self, question, testDict): super(QLearningTest, self).__init__(question, testDict) self.discount = float(testDict["discount"]) self.grid = gridworld.Gridworld(parseGrid(testDict["grid"])) if "noise" in testDict: self.grid.setNoise(float(testDict["noise"])) if "livingReward" in testDict: self.grid.setLivingReward(float(testDict["livingReward"])) self.grid = gridworld.Gridworld(parseGrid(testDict["grid"])) self.env = gridworld.GridworldEnvironment(self.grid) self.epsilon = float(testDict["epsilon"]) self.learningRate = float(testDict["learningRate"]) self.opts = { "actionFn": self.env.getPossibleActions, "epsilon": self.epsilon, "gamma": self.discount, "alpha": self.learningRate, } numExperiences = int(testDict["numExperiences"]) maxPreExperiences = 10 self.numsExperiencesForDisplay = list( range(min(numExperiences, maxPreExperiences))) self.testOutFile = testDict["test_out_file"] if sys.platform == "win32": _, question_name, test_name = testDict["test_out_file"].split("\\") else: _, question_name, test_name = testDict["test_out_file"].split("/") self.experiences = Experiences(test_name.split(".")[0]) if maxPreExperiences < numExperiences: self.numsExperiencesForDisplay.append(numExperiences) def writeFailureFile(self, string): with open(self.testOutFile, "w") as handle: handle.write(string) def removeFailureFileIfExists(self): if os.path.exists(self.testOutFile): os.remove(self.testOutFile) def execute(self, grades, moduleDict, solutionDict): failureOutputFileString = "" failureOutputStdString = "" for n in self.numsExperiencesForDisplay: checkValuesAndPolicy = n == self.numsExperiencesForDisplay[-1] testPass, stdOutString, fileOutString = self.executeNExperiences( grades, moduleDict, solutionDict, n, checkValuesAndPolicy) failureOutputStdString += stdOutString failureOutputFileString += fileOutString if not testPass: self.addMessage(failureOutputStdString) self.addMessage( "For more details to help you debug, see test output file %s\n\n" % self.testOutFile) self.writeFailureFile(failureOutputFileString) return self.testFail(grades) self.removeFailureFileIfExists() return self.testPass(grades) def executeNExperiences(self, grades, moduleDict, solutionDict, n, checkValuesAndPolicy): testPass = True ( valuesPretty, qValuesPretty, actions, policyPretty, lastExperience, ) = self.runAgent(moduleDict, n) stdOutString = "" # fileOutString = "==================== Iteration %d ====================\n" % n fileOutString = "" if lastExperience is not None: # fileOutString += "Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)\n\n\n" % lastExperience pass for action in actions: qValuesKey = "q_values_k_%d_action_%s" % (n, action) qValues = qValuesPretty[action] if self.comparePrettyValues(qValues, solutionDict[qValuesKey]): # fileOutString += "Q-Values at iteration %d for action '%s' are correct." % (n, action) # fileOutString += " Student/correct solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues) pass else: testPass = False outString = ( "Q-Values at iteration %d for action '%s' are NOT correct." % (n, action)) outString += ( " Student solution:\n\t%s" % self.prettyValueSolutionString(qValuesKey, qValues)) outString += (" Correct solution:\n\t%s" % self.prettyValueSolutionString( qValuesKey, solutionDict[qValuesKey])) stdOutString += outString fileOutString += outString if checkValuesAndPolicy: if not self.comparePrettyValues(valuesPretty, solutionDict["values"]): testPass = False outString = "Values are NOT correct." outString += ( " Student solution:\n\t%s" % self.prettyValueSolutionString("values", valuesPretty)) outString += (" Correct solution:\n\t%s" % self.prettyValueSolutionString( "values", solutionDict["values"])) stdOutString += outString fileOutString += outString if not self.comparePrettyValues(policyPretty, solutionDict["policy"]): testPass = False outString = "Policy is NOT correct." outString += ( " Student solution:\n\t%s" % self.prettyValueSolutionString("policy", policyPretty)) outString += (" Correct solution:\n\t%s" % self.prettyValueSolutionString( "policy", solutionDict["policy"])) stdOutString += outString fileOutString += outString return testPass, stdOutString, fileOutString def writeSolution(self, moduleDict, filePath): with open(filePath, "w") as handle: valuesPretty = "" policyPretty = "" for n in self.numsExperiencesForDisplay: ( valuesPretty, qValuesPretty, actions, policyPretty, _, ) = self.runAgent(moduleDict, n) for action in actions: handle.write( self.prettyValueSolutionString( "q_values_k_%d_action_%s" % (n, action), qValuesPretty[action], )) handle.write(self.prettyValueSolutionString( "values", valuesPretty)) handle.write(self.prettyValueSolutionString( "policy", policyPretty)) return True def runAgent(self, moduleDict, numExperiences): agent = moduleDict["qlearningAgents"].QLearningAgent(**self.opts) states = [ state for state in self.grid.getStates() if len(self.grid.getPossibleActions(state)) > 0 ] states.sort() lastExperience = None for i in range(numExperiences): lastExperience = self.experiences.get_experience() agent.update(*lastExperience) actions = list( reduce( lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states], )) values = {} qValues = {} policy = {} for state in states: values[state] = agent.computeValueFromQValues(state) policy[state] = agent.computeActionFromQValues(state) possibleActions = self.grid.getPossibleActions(state) for action in actions: if action not in qValues: qValues[action] = {} if action in possibleActions: qValues[action][state] = agent.getQValue(state, action) else: qValues[action][state] = None valuesPretty = self.prettyValues(values) policyPretty = self.prettyPolicy(policy) qValuesPretty = {} for action in actions: qValuesPretty[action] = self.prettyValues(qValues[action]) return ( valuesPretty, qValuesPretty, actions, policyPretty, lastExperience, ) def prettyPrint(self, elements, formatString): pretty = "" states = self.grid.getStates() for ybar in range(self.grid.grid.height): y = self.grid.grid.height - 1 - ybar row = [] for x in range(self.grid.grid.width): if (x, y) in states: value = elements[(x, y)] if value is None: row.append(" illegal") else: row.append(formatString.format(elements[(x, y)])) else: row.append("_" * 10) pretty += " %s\n" % (" ".join(row), ) pretty += "\n" return pretty def prettyValues(self, values): return self.prettyPrint(values, "{0:10.4f}") def prettyPolicy(self, policy): return self.prettyPrint(policy, "{0:10s}") def prettyValueSolutionString(self, name, pretty): return '%s: """\n%s\n"""\n\n' % (name, pretty.rstrip()) def comparePrettyValues(self, aPretty, bPretty, tolerance=0.01): aList = self.parsePrettyValues(aPretty) bList = self.parsePrettyValues(bPretty) if len(aList) != len(bList): return False for a, b in zip(aList, bList): try: aNum = float(a) bNum = float(b) # error = abs((aNum - bNum) / ((aNum + bNum) / 2.0)) error = abs(aNum - bNum) if error > tolerance: return False except ValueError: if a.strip() != b.strip(): return False return True def parsePrettyValues(self, pretty): values = pretty.split() return values