def DoEpisodes(episodes, boardSize, maxRemovePegs, boardType, epsilon=0.5, learningRate=0.9, policyTable={}, valueTable={}): TotalError = 0 stepsTaken = 1 actor = Actor(0.9, learningRate, epsilon, policyTable) critic = Critic(0.9, learningRate, valueTable) for i in range(episodes): world = GetRandomizedBoard(boardSize, maxRemovePegs, boardType) actor.resetEligibility() critic.resetEligibility() critic.tdError = 0 reward = 0 state = world.stateToHash() chosenAction = actor.ChooseActionByPolicy(world) while True: reward = world.makeAction(chosenAction) nextAction = actor.ChooseActionByPolicy(world) nextState = world.stateToHash() actor.eligibility[state + str(chosenAction)] = 1 critic.updateTDError(reward, state, nextState) critic.eligibility[state] = 1 TotalError += abs(critic.tdError) for SAP in world.getGameLog(): critic.updateValue(SAP) critic.decayEligibility(SAP) actor.updatePolicy(SAP, critic.tdError) actor.decayEligibility(SAP) if reward == 10: #print(world.startRemoveLocations, stepsTaken, world.getGameLog()[-1].stateHash) updateSolvableStates(boardType + str(boardSize), world.startRemoveLocations) if chosenAction == None: break chosenAction = nextAction state = nextState stepsTaken += 1 print('Episode:', i, 'MeanError', TotalError / stepsTaken) WriteTables(critic.getValueTable(), actor.getPolicyTable(), boardType, boardSize)
def TestModel(boardSize, maxRemovePegs, boardType, index): _, actorTable = ReadTables(boardType, boardSize) actor = Actor(0.9, 0.1, 0, actorTable) stepNumber = 0 #world = GetRandomizedBoard(boardSize, maxRemovePegs, boardType) world = GetSolvableBoard(boardSize, boardType, index) chosenAction = actor.ChooseActionByPolicy(world) visualizer.VisualizePegs(world.getState(), stepNumber) reward = 0 while True: world.makeAction(chosenAction) visualizer.VisualizePegs(world.getState(), stepNumber, chosenAction) chosenAction = actor.ChooseActionByPolicy(world) if chosenAction == None: endstate = str(world._boardState.state) reward = world.makeAction(chosenAction) print("EndState:", endstate, 'reward:', reward) break stepNumber += 1 visualizer.GenerateVideo(stepNumber, index) return reward