示例#1
0
def loadQTable(fileName, color):
    print('loading qtable, this might take a while...')
    with open(fileName) as f_obj:
        lines = f_obj.readlines()
    index = 0
    qtable = defaultdict(lambda: np.zeros(7))
    while index < len(lines):
        state = lines[index][:len(lines[index]) - 1]
        values = lines[index + 1][:len(lines[index + 1]) - 1]
        '''
        parse values to get numbers
        '''
        fvalues = []
        value = ''
        for c in values:
            if c == ' ':
                fvalues.append(float(value))
                value = ''
                continue
            value += c

        for i in range(len(qtable[state])):
            qtable[state][i] += fvalues[i]
        #print(qtable[state])
        #print(fvalues)
        index += 2
    return rl(color, cFour.cFour(), qtable)
示例#2
0
 def testHuman(self, eta=0.2, gamma=0.9, epsilon=0.1):
     q = self.q
     self.board = cFour.cFour()
     c4 = self.board
     state = c4.state()
     for t in itertools.count():
         c4.display()
         whosturn = 'black'
         print(whosturn + '\'s turn')
         nextState, reward, action, done = self.stateReward(
             q, state, self.color, c4, eta, gamma, epsilon)
         if done != 'not full':
             c4.display()
             winner = done
             break
         c4.display()
         whosturn = 'red'
         print(whosturn + '\' turn')
         p2a = int(
             input('Enter the column you wish to place a piece in (1-7): '))
         while p2a < 1 or p2a > 7:
             print('Invalid input, try again')
             p2a = int(
                 input(
                     'Enter the column you wish to place a piece in (1-7): '
                 ))
         nextState, p2r, p2d = c4.step(whosturn, p2a - 1)
         if p2d != 'not full':
             c4.display()
             winner = p2d
             break
         state = nextState
     return winner
示例#3
0
def main():
    if len(sys.argv) > 3:
        print(
            'usage: python3 connectFour.py [String] [String] || python3 connectFour.py'
        )
        return
    if 1 < len(sys.argv) < 3:
        print(
            'usage: python3 connectFour.py [String] [String] || python3 connectFour.py'
        )
        return
    c = cFour.cFour()
    turn = 1
    while True:
        c.display()
        # player = ''
        if turn % 2 == 1:
            player = 'black'
        else:
            player = 'red'
        print(player + '\'s turn')
        i = int(input("Select column to place piece (0-6): "))
        flag, r = c.put(player, i)
        if flag == -5:
            print('Invalid column, try again.')
            continue
        winner, r = c.win(r, i)
        if winner == 'black' or winner == 'red':
            c.display()
            print(winner + ' wins!')
            return
        turn += 1
示例#4
0
def rlvsrl(bot):
    winners = []
    c4 = bot.board
    for game in range(10):
        c4 = cFour.cFour()
        state = c4.state()
        print('Game ' + str(game))
        c4.display()
        while True:
            print('black\'s turn')
            nextState, reward, action, done = bot.stateReward(
                bot.q, state, 'black', c4, 0.2, 0.9, 0.1)
            state = nextState
            c4.display()
            if done != 'not full':
                if done == 'full':
                    print('draw')
                    winners.append('draw')
                    break
                print('black wins')
                winners.append('black')
                break
            print('red\'s turn')
            nextState, reward, action, done = bot.stateReward(
                bot.q, state, 'red', c4, 0.2, 0.9, 0.1)
            state = nextState
            c4.display()
            if done != 'not full':
                if done == 'full':
                    print('draw')
                    winners.append('draw')
                    break
                print('red wins')
                winners.append('red')
                break
    return winners
示例#5
0
import cFour

c = cFour.cFour()
c.put('black', 0)
c.put('red', 6)
c.display()
state = c.state()
print(state)
print(len(state))
示例#6
0
def humanvsrl(bot):
    winners = []
    c4 = bot.board
    print(
        'For this test, the 10 games will be split in two; one set with the AI as player one and one witht eh AI as player two'
    )
    print('First set: AI = player one')
    for game in range(5):
        c4 = cFour.cFour()
        state = c4.state()
        print('Game ' + str(game))
        c4.display()
        while True:
            print('AI turn')
            nextState, reward, action, done = bot.stateReward(
                bot.q, state, 'black', c4, 0.2, 0.9, 0.1)
            state = nextState
            c4.display()
            if done != 'not full':
                if done == 'full':
                    print('draw')
                    winners.append('draw')
                    break
                print('AI wins')
                winners.append('rl')
                break
            print('Your turn')
            i = 0
            while state == nextState:
                while i < 1 or i > 7:
                    i = int(
                        input(
                            'Enter a column number (1-7) to place a piece: '))
                    if i < 1 or i > 7:
                        print('Out of bounds column! Try again')
                nextState, r, done = c4.step('red', i - 1)
                if nextState == state:
                    print('Sorry, that column is full, try again')
            state = nextState
            if done != 'not full':
                if done == 'full':
                    print('draw')
                    winners.append('draw')
                    break
                print('You win')
                winners.append('player')
                break
    print('Second set: AI = player two')
    bot.color = 'red'
    for game in range(5):
        c4 = cFour.cFour()
        state = c4.state()
        print('Game ' + str(game))
        c4.display()
        while True:
            print('Your turn')
            i = 0
            nextState = state
            while state == nextState:
                while i < 1 or i > 7:
                    i = int(
                        input(
                            'Enter a column number (1-7) to place a piece: '))
                    if i < 1 or i > 7:
                        print('Out of bounds column! Try again')
                nextState, r, done = c4.step('black', i - 1)
                if nextState == state:
                    print('Sorry, that column is full, try again')
            state = nextState
            c4.display()
            if done != 'not full':
                if done == 'full':
                    print('draw')
                    winners.append('draw')
                    break
                print('You win')
                winners.append('player')
                break
            print('AI turn')
            nextState, reward, action, done = bot.stateReward(
                bot.q, state, 'red', c4, 0.2, 0.9, 0.1)
            state = nextState
            c4.display()
            if done != 'not full':
                if done == 'full':
                    print('draw')
                    winners.append('draw')
                    break
                print('AI wins')
                winners.append('rl')
                break
    return winners
示例#7
0
 def setUp(self):
     self.cfour = cFour.cFour()
示例#8
0
 def qLearningInit(self, episodes=100000, eta=0.5, gamma=0.9, epsilon=0.1):
     q = defaultdict(lambda: np.zeros(self.actionSpace))
     #q2 = defaultdict(lambda: np.zeros(self.actionSpace))
     player2 = None
     if self.color == 'black':
         player2 = 'red'
     else:
         player2 = 'black'
     g1rewards = np.zeros(episodes)
     g2rewards = np.zeros(episodes)
     #p2Rewards = np.zeros(episodes)
     g1 = self.board
     g2 = cFour.cFour()
     #self.board = c4
     #player2.board = c4
     #get initial state
     g1state = g1.state()
     g2state = g2.state()
     g1done = 'not full'
     g1rDone = 'not full'
     g2done = 'not full'
     g2rDone = 'not full'
     for t in itertools.count():
         if g1done == 'not full' and g1rDone == 'not full':
             g1tempnextState, g1reward, g1action, g1done = self.stateReward(
                 q, g1state, self.color, g1, eta, gamma, epsilon)
             g1nextState, g1r, g1rDone = g1.step(player2,
                                                 random.randint(0, 6))
             while g1nextState == g1tempnextState:  #prevent same state assignment
                 g1nextState, g1r, g1rDone = g1.step(
                     player2, random.randint(0, 6))
             if g1done == self.color:
                 g1reward += 50
             if g1done == 'full' or g2rDone == 'full':
                 g1reward += 10
             if g1rDone == player2:
                 g1reward -= 50
             g1rewards[0] += g1reward
             g1nextAction = np.argmax(q[g1nextState])
             value = eta * (g1reward +
                            (gamma * q[g1nextState][g1nextAction]) -
                            q[g1state][g1action])
             q[g1state][g1action] += value
             g1state = g1nextState
             continue
         if g2done == 'not full' and g1rDone == 'not full':
             g2nextState, g2r, g2rDone = g2.step('black',
                                                 random.randint(0, 6))
             while g2nextState == g2state:  # prevent same state assignment
                 g2nextState, g2r, g2rDone = g2.step(
                     'black', random.randint(0, 6))
             g2nextState, g2reward, g2action, g2done = self.stateReward(
                 q, g2state, 'red', g2, eta, gamma, epsilon)
             if g2done == 'red':
                 g2reward += 50
             if g2done == 'full' or g2rDone == 'full':
                 g2reward += 10
             if g2rDone == 'black':
                 g2reward -= 50
             g2rewards[0] += g2reward
             g2nextAction = np.argmax(q[g2nextState])
             value = eta * (g2reward +
                            (gamma * q[g2nextState][g2nextAction]) -
                            q[g2state][g2action])
             q[g2state][g2action] += value
             g2state = g2nextState
             continue
         self.q = q
         break
     return self.qLearning(player2, episodes - 1, g1rewards, g2rewards, eta,
                           gamma, epsilon)
示例#9
0
 def qLearning(self, player2, episodes, g1episodeRewards, g2episodeRewards,
               eta, gamma, epsilon):
     p1q = copyQ(self.q, self.actionSpace)
     p2q = defaultdict(lambda: np.zeros(self.actionSpace))
     for e in range(1, episodes):
         g1 = cFour.cFour()
         g2 = cFour.cFour()
         self.board = g1
         g1state = g1.state()
         g2state = g2.state()
         g1done = 'not full'
         g1p2done = 'not full'
         g2done = 'not full'
         g2p2done = 'not full'
         for t in itertools.count():
             #print('Training - episode: ' + str(e + 1) + ' step: ' + str(t + 1))
             if g1done == 'not full' and g1p2done == 'not full':
                 g1nextState, g1reward, g1action, g1done = self.stateReward(
                     p1q, g1state, self.color, g1, eta, gamma, epsilon)
                 if g1done == 'not full':
                     g1nextState, g1p2reward, g1p2action, g1p2done = self.stateReward(
                         p2q, g1nextState, player2, g1, eta, gamma, epsilon)
                 if g1done == self.color:
                     g1reward += 50
                 if g1done == 'full' or g1p2done == 'full':
                     g1reward += 10
                 if g1p2done == player2:
                     g1reward -= 50
                 g1episodeRewards[e] += g1reward
                 g1nextAction = np.argmax(p1q[g1nextState])
                 value = eta * (g1reward +
                                (gamma * p1q[g1nextState][g1nextAction]) -
                                p1q[g1state][g1action])
                 p1q[g1state][g1action] += value
                 g1state = g1nextState
                 continue
             if g2done == 'not full' and g2p2done == 'not full':
                 g2nextState, g2p2reward, g2p2action, g2p2done = self.stateReward(
                     p2q, g2state, 'black', g2, eta, gamma, epsilon)
                 if g2done == 'not full':
                     g2nextState, g2reward, g2action, g2done = self.stateReward(
                         p1q, g2nextState, 'red', g2, eta, gamma, epsilon)
                 if g2done == 'red':
                     g2reward += 50
                 if g2done == 'full' or g2p2done == 'full':
                     g2reward += 10
                 if g1p2done == player2:
                     g2reward -= 50
                 g2episodeRewards[e] += g2reward
                 g2nextAction = np.argmax(p1q[g2nextState])
                 value = eta * (g2reward +
                                (gamma * p1q[g2nextState][g2nextAction]) -
                                p1q[g2state][g2action])
                 p1q[g2state][g2action] += value
                 g2state = g2nextState
                 continue
             self.q = p1q
             p2q = copyQ(p1q, self.actionSpace)
             break
         # decrease epsilon
         if (e + 1) % 50 == 0:
             if epsilon > 0:
                 epsilon -= .001
     return self.q, g1episodeRewards, g2episodeRewards