def loadQTable(fileName, color): print('loading qtable, this might take a while...') with open(fileName) as f_obj: lines = f_obj.readlines() index = 0 qtable = defaultdict(lambda: np.zeros(7)) while index < len(lines): state = lines[index][:len(lines[index]) - 1] values = lines[index + 1][:len(lines[index + 1]) - 1] ''' parse values to get numbers ''' fvalues = [] value = '' for c in values: if c == ' ': fvalues.append(float(value)) value = '' continue value += c for i in range(len(qtable[state])): qtable[state][i] += fvalues[i] #print(qtable[state]) #print(fvalues) index += 2 return rl(color, cFour.cFour(), qtable)
def testHuman(self, eta=0.2, gamma=0.9, epsilon=0.1): q = self.q self.board = cFour.cFour() c4 = self.board state = c4.state() for t in itertools.count(): c4.display() whosturn = 'black' print(whosturn + '\'s turn') nextState, reward, action, done = self.stateReward( q, state, self.color, c4, eta, gamma, epsilon) if done != 'not full': c4.display() winner = done break c4.display() whosturn = 'red' print(whosturn + '\' turn') p2a = int( input('Enter the column you wish to place a piece in (1-7): ')) while p2a < 1 or p2a > 7: print('Invalid input, try again') p2a = int( input( 'Enter the column you wish to place a piece in (1-7): ' )) nextState, p2r, p2d = c4.step(whosturn, p2a - 1) if p2d != 'not full': c4.display() winner = p2d break state = nextState return winner
def main(): if len(sys.argv) > 3: print( 'usage: python3 connectFour.py [String] [String] || python3 connectFour.py' ) return if 1 < len(sys.argv) < 3: print( 'usage: python3 connectFour.py [String] [String] || python3 connectFour.py' ) return c = cFour.cFour() turn = 1 while True: c.display() # player = '' if turn % 2 == 1: player = 'black' else: player = 'red' print(player + '\'s turn') i = int(input("Select column to place piece (0-6): ")) flag, r = c.put(player, i) if flag == -5: print('Invalid column, try again.') continue winner, r = c.win(r, i) if winner == 'black' or winner == 'red': c.display() print(winner + ' wins!') return turn += 1
def rlvsrl(bot): winners = [] c4 = bot.board for game in range(10): c4 = cFour.cFour() state = c4.state() print('Game ' + str(game)) c4.display() while True: print('black\'s turn') nextState, reward, action, done = bot.stateReward( bot.q, state, 'black', c4, 0.2, 0.9, 0.1) state = nextState c4.display() if done != 'not full': if done == 'full': print('draw') winners.append('draw') break print('black wins') winners.append('black') break print('red\'s turn') nextState, reward, action, done = bot.stateReward( bot.q, state, 'red', c4, 0.2, 0.9, 0.1) state = nextState c4.display() if done != 'not full': if done == 'full': print('draw') winners.append('draw') break print('red wins') winners.append('red') break return winners
import cFour c = cFour.cFour() c.put('black', 0) c.put('red', 6) c.display() state = c.state() print(state) print(len(state))
def humanvsrl(bot): winners = [] c4 = bot.board print( 'For this test, the 10 games will be split in two; one set with the AI as player one and one witht eh AI as player two' ) print('First set: AI = player one') for game in range(5): c4 = cFour.cFour() state = c4.state() print('Game ' + str(game)) c4.display() while True: print('AI turn') nextState, reward, action, done = bot.stateReward( bot.q, state, 'black', c4, 0.2, 0.9, 0.1) state = nextState c4.display() if done != 'not full': if done == 'full': print('draw') winners.append('draw') break print('AI wins') winners.append('rl') break print('Your turn') i = 0 while state == nextState: while i < 1 or i > 7: i = int( input( 'Enter a column number (1-7) to place a piece: ')) if i < 1 or i > 7: print('Out of bounds column! Try again') nextState, r, done = c4.step('red', i - 1) if nextState == state: print('Sorry, that column is full, try again') state = nextState if done != 'not full': if done == 'full': print('draw') winners.append('draw') break print('You win') winners.append('player') break print('Second set: AI = player two') bot.color = 'red' for game in range(5): c4 = cFour.cFour() state = c4.state() print('Game ' + str(game)) c4.display() while True: print('Your turn') i = 0 nextState = state while state == nextState: while i < 1 or i > 7: i = int( input( 'Enter a column number (1-7) to place a piece: ')) if i < 1 or i > 7: print('Out of bounds column! Try again') nextState, r, done = c4.step('black', i - 1) if nextState == state: print('Sorry, that column is full, try again') state = nextState c4.display() if done != 'not full': if done == 'full': print('draw') winners.append('draw') break print('You win') winners.append('player') break print('AI turn') nextState, reward, action, done = bot.stateReward( bot.q, state, 'red', c4, 0.2, 0.9, 0.1) state = nextState c4.display() if done != 'not full': if done == 'full': print('draw') winners.append('draw') break print('AI wins') winners.append('rl') break return winners
def setUp(self): self.cfour = cFour.cFour()
def qLearningInit(self, episodes=100000, eta=0.5, gamma=0.9, epsilon=0.1): q = defaultdict(lambda: np.zeros(self.actionSpace)) #q2 = defaultdict(lambda: np.zeros(self.actionSpace)) player2 = None if self.color == 'black': player2 = 'red' else: player2 = 'black' g1rewards = np.zeros(episodes) g2rewards = np.zeros(episodes) #p2Rewards = np.zeros(episodes) g1 = self.board g2 = cFour.cFour() #self.board = c4 #player2.board = c4 #get initial state g1state = g1.state() g2state = g2.state() g1done = 'not full' g1rDone = 'not full' g2done = 'not full' g2rDone = 'not full' for t in itertools.count(): if g1done == 'not full' and g1rDone == 'not full': g1tempnextState, g1reward, g1action, g1done = self.stateReward( q, g1state, self.color, g1, eta, gamma, epsilon) g1nextState, g1r, g1rDone = g1.step(player2, random.randint(0, 6)) while g1nextState == g1tempnextState: #prevent same state assignment g1nextState, g1r, g1rDone = g1.step( player2, random.randint(0, 6)) if g1done == self.color: g1reward += 50 if g1done == 'full' or g2rDone == 'full': g1reward += 10 if g1rDone == player2: g1reward -= 50 g1rewards[0] += g1reward g1nextAction = np.argmax(q[g1nextState]) value = eta * (g1reward + (gamma * q[g1nextState][g1nextAction]) - q[g1state][g1action]) q[g1state][g1action] += value g1state = g1nextState continue if g2done == 'not full' and g1rDone == 'not full': g2nextState, g2r, g2rDone = g2.step('black', random.randint(0, 6)) while g2nextState == g2state: # prevent same state assignment g2nextState, g2r, g2rDone = g2.step( 'black', random.randint(0, 6)) g2nextState, g2reward, g2action, g2done = self.stateReward( q, g2state, 'red', g2, eta, gamma, epsilon) if g2done == 'red': g2reward += 50 if g2done == 'full' or g2rDone == 'full': g2reward += 10 if g2rDone == 'black': g2reward -= 50 g2rewards[0] += g2reward g2nextAction = np.argmax(q[g2nextState]) value = eta * (g2reward + (gamma * q[g2nextState][g2nextAction]) - q[g2state][g2action]) q[g2state][g2action] += value g2state = g2nextState continue self.q = q break return self.qLearning(player2, episodes - 1, g1rewards, g2rewards, eta, gamma, epsilon)
def qLearning(self, player2, episodes, g1episodeRewards, g2episodeRewards, eta, gamma, epsilon): p1q = copyQ(self.q, self.actionSpace) p2q = defaultdict(lambda: np.zeros(self.actionSpace)) for e in range(1, episodes): g1 = cFour.cFour() g2 = cFour.cFour() self.board = g1 g1state = g1.state() g2state = g2.state() g1done = 'not full' g1p2done = 'not full' g2done = 'not full' g2p2done = 'not full' for t in itertools.count(): #print('Training - episode: ' + str(e + 1) + ' step: ' + str(t + 1)) if g1done == 'not full' and g1p2done == 'not full': g1nextState, g1reward, g1action, g1done = self.stateReward( p1q, g1state, self.color, g1, eta, gamma, epsilon) if g1done == 'not full': g1nextState, g1p2reward, g1p2action, g1p2done = self.stateReward( p2q, g1nextState, player2, g1, eta, gamma, epsilon) if g1done == self.color: g1reward += 50 if g1done == 'full' or g1p2done == 'full': g1reward += 10 if g1p2done == player2: g1reward -= 50 g1episodeRewards[e] += g1reward g1nextAction = np.argmax(p1q[g1nextState]) value = eta * (g1reward + (gamma * p1q[g1nextState][g1nextAction]) - p1q[g1state][g1action]) p1q[g1state][g1action] += value g1state = g1nextState continue if g2done == 'not full' and g2p2done == 'not full': g2nextState, g2p2reward, g2p2action, g2p2done = self.stateReward( p2q, g2state, 'black', g2, eta, gamma, epsilon) if g2done == 'not full': g2nextState, g2reward, g2action, g2done = self.stateReward( p1q, g2nextState, 'red', g2, eta, gamma, epsilon) if g2done == 'red': g2reward += 50 if g2done == 'full' or g2p2done == 'full': g2reward += 10 if g1p2done == player2: g2reward -= 50 g2episodeRewards[e] += g2reward g2nextAction = np.argmax(p1q[g2nextState]) value = eta * (g2reward + (gamma * p1q[g2nextState][g2nextAction]) - p1q[g2state][g2action]) p1q[g2state][g2action] += value g2state = g2nextState continue self.q = p1q p2q = copyQ(p1q, self.actionSpace) break # decrease epsilon if (e + 1) % 50 == 0: if epsilon > 0: epsilon -= .001 return self.q, g1episodeRewards, g2episodeRewards