def main(): aiType = 3 worldSize = 6 game = Game(aiType, worldSize) agent = Agent() pc = None policy = None if aiType == 1: policy = ValueIteration() pc = PolicyConfiguration(inpRewards=[1, -1, 0, 10, -1], inpDiscounts=[1, .1, .1], inpStochastic=[[100, 0, 0], [0, 100, 0], [0, 0, 100]]) elif aiType == 2: policy = PolicyIteration() pc = PolicyConfiguration(inpRewards=[1, -1, 0, 10, -1], inpDiscounts=[1, .1, .1], inpStochastic=[[100, 0, 0], [0, 100, 0], [0, 0, 100]]) elif aiType == 3: policy = qLearningAgent() pc = PolicyConfiguration(inpRewards=[0, -1, 0, 10, -1], inpDiscounts=[1, .1, .1], inpStochastic=[[100, 0, 0], [0, 100, 0], [0, 0, 100]], inpFile="QLValues.p", inpTrainingLimit=1000) elif aiType == 4: policy = approximateQLearning() pc = PolicyConfiguration(inpRewards=[2, -1, 0, 0, -1], inpDiscounts=[0.9, .2, .1], inpStochastic=[[100, 0, 0], [0, 100, 0], [0, 0, 100]], inpFile="AQLWeights.json", inpTrainingLimit=500) else: policy = ValueIteration() pc = PolicyConfiguration() policy.config = pc agent.policy = policy game.agent = agent game.mainLoop()
def __init__(self, can, direction, inpAIType): self.flag = 1 self.can = can self.direction = direction self.aiType = inpAIType self.agent = Agent() pc = None policy = None #inpRewards = [food reward, hazard reward, living reward, good location reward, bad location reward] #good and bad location is only used for qlearning #tried to use to cause graph searching #not really used and can give wonky results #inpDiscounts = [gamma discount, alpha discount, epsilon explore chance] #inpStochastic = [forward action[forward chance, left chance, right chance] #left action[forward chance, left chance, right chance] #right action[forward chance, left chance, right chance]] #inpFile file for weight or qvalues if self.aiType == 1: policy = ValueIteration() pc = PolicyConfiguration(inpRewards = [1,-1,0,10,-1], inpDiscounts = [1,.1,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]]) elif self.aiType == 2: policy = PolicyIteration() pc = PolicyConfiguration(inpRewards = [1,-1,0,10,-1], inpDiscounts = [1,.1,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]]) elif self.aiType == 3: policy = qLearningAgent() #risk aversion aka rarely go off best path seems to work best #This one seemed to work #pc = PolicyConfiguration(inpRewards = [2,-1,0,0,-1], inpDiscounts = [0.9,.2,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]]) pc = PolicyConfiguration(inpRewards = [2,-1,0,0,0], inpDiscounts = [0.9,.2,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]], inpFile = None, inpTrainingLimit = 20000) elif self.aiType == 4: policy = approximateQLearning() pc = PolicyConfiguration(inpRewards = [2,-1,0,0,-1], inpDiscounts = [0.9,.2,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]], inpFile = None, inpTrainingLimit = 5000) else: policy = ValueIteration() pc = PolicyConfiguration() policy.config = pc self.agent.policy = policy