def humanvmodel(sess, X, Y, humanfirst=False): game = Game() if humanfirst: game.printState() check = game.move(int(input("You go first..."))) while True: if humanfirst: pos = np.concatenate((game.noughts, game.crosses)) else: pos = np.concatenate((game.crosses, game.noughts)) out = sess.run(Y, feed_dict={X: pos}) bestprob, bestmove = -10000.0, 0 for i in range(0, len(out)): if out[i] > bestprob and game.spots[0, i] == 0.0: bestprob = out[i] bestmove = i print(bestprob) check = game.move(bestmove) game.printState() if check != 0.0: print("Game over!") break check = game.move(int(input("Your turn..."))) if check != 0.0: game.printState() if check == 1000.0: print("It's a draw!") else: print("You win!") break
def train(sess, X, Y, optimizer, cost, boards, outcomes, ends, wons, moves, explore_rate, memsize, batchsize, saver, directory, chckptrate, Ytest): boardm = np.zeros((memsize, 84)) outcomem = np.zeros((memsize, 84)) endm = np.zeros((memsize, 1)) wonm = np.zeros((memsize, 1)) movem = np.zeros((memsize, 1), dtype=int) movem = movem - 1 test = np.zeros((memsize, 84)) game = Game() firsttrain = True counter = 0 noughtmoveip = False boardtemp = np.zeros((1, 84)) movetemp = 0 boardtemp2 = np.zeros((1, 84)) movetemp2 = 0 avcost = 0 print("Beginning training...") while True: for i in range(0, chckptrate): boardtemp = np.concatenate((game.crosses, game.noughts)).T out = sess.run(Y, feed_dict={X: boardtemp.T}) movetemp = findbestmove(out, game, explore_rate) check = game.move(movetemp) #game.printState() #input("Press any key") if check == 1.0: counter = addtomemory(boardm, outcomem, endm, wonm, movem, boardtemp, np.zeros((1, 84)), 1.0, 1.0, movetemp, counter) if noughtmoveip: counter = addtomemory(boardm, outcomem, endm, wonm, movem, boardtemp2, np.zeros((1, 84)), 1.0, -1.0, movetemp2, counter) game.reset() noughtmoveip = False #print("Crosses won") if check == 1000.0: counter = addtomemory(boardm, outcomem, endm, wonm, movem, boardtemp, np.zeros((1, 84)), 1.0, 0.0, movetemp, counter) if noughtmoveip: counter = addtomemory(boardm, outcomem, endm, wonm, movem, boardtemp2, np.zeros((1, 84)), 1.0, 0.0, movetemp2, counter) game.reset() noughtmoveip = False #print("Draw") if check == 0.0: if noughtmoveip: counter = addtomemory( boardm, outcomem, endm, wonm, movem, boardtemp2, np.concatenate((game.noughts, game.crosses)).T, 0.0, 0.0, movetemp2, counter) noughtmoveip = True boardtemp2 = np.concatenate((game.noughts, game.crosses)).T out = sess.run(Y, feed_dict={X: boardtemp2.T}) movetemp2 = findbestmove(out, game, explore_rate) check2 = game.move(movetemp2) #game.printState() #input("Press any key") if check2 == -1.0: counter = addtomemory(boardm, outcomem, endm, wonm, movem, boardtemp, np.zeros((1, 84)), 1.0, -1.0, movetemp, counter) counter = addtomemory(boardm, outcomem, endm, wonm, movem, boardtemp2, np.zeros((1, 84)), 1.0, 1.0, movetemp2, counter) game.reset() noughtmoveip = False #print("Noughts won") if check2 == 1000.0: counter = addtomemory(boardm, outcomem, endm, wonm, movem, boardtemp, np.zeros((1, 84)), 1.0, 0.0, movetemp, counter) counter = addtomemory(boardm, outcomem, endm, wonm, movem, boardtemp2, np.zeros((1, 84)), 1.0, 0.0, movetemp2, counter) game.reset() noughtmoveip = False #print("Draw") if check2 == 0.0: counter = addtomemory( boardm, outcomem, endm, wonm, movem, boardtemp, np.concatenate((game.crosses, game.noughts)).T, 0.0, 0.0, movetemp, counter) if movem[memsize - 1] != -1: if firsttrain: test = np.copy(boardm) test = test[np.random.choice(np.arange(memsize), size=1000, replace=False)] firsttrain = False sample = np.random.choice(np.arange(memsize), size=batchsize, replace=False) boardsample = boardm[sample] outcomesample = outcomem[sample] endsample = endm[sample] wonsample = wonm[sample] movesample = movem[sample] _, batchcost = sess.run( [optimizer, cost], feed_dict={ boards: boardsample.T, outcomes: outcomesample.T, ends: endsample.T, wons: wonsample.T, moves: movesample.T }) avcost = 0.9 * avcost + 0.1 * batchcost if not firsttrain: testvalue = np.average( sess.run(Ytest, feed_dict={outcomes: test.T})) print("Counter is " + str(counter) + " and average Qmax on test set is " + str(testvalue)) saver.save(sess, directory)
# -*- coding: utf-8 -*- """ Created on Wed May 24 21:22:30 2017 @author: Toby """ from connectfour import Game turncounter = 1 game = Game() while True: game.printState() check = game.move(int(input("No winner yet..."))) if check != 0.0: game.printState() print("End of Game! Player " + str(check) + " has won!") break turncounter = turncounter + 1