from tensorflow.keras import callbacks # import xdot EPOCHS = 20 EPISODES = 50 BOARDSIZE = 9 LEARNINGRATE = 0.0002 MAXTURNS = (BOARDSIZE * BOARDSIZE) - BOARDSIZE # logdir= "testlog" # tensorboard_callback = callbacks.TensorBoard(logdir, histogram_freq=1) # callbacks = [tensorboard_callback] callbacks = None env = GoGame(BOARDSIZE, maxTurns=MAXTURNS) model = Model(num_actions=env.action_space.n, callbacks=callbacks) model.action_value(env.reset()[None, :]) #build the model model.summary() agent = A2CAgent(model, lr=LEARNINGRATE) for epoch in range(EPOCHS): # gc.collect() backend.clear_session() # objgraph.show_growth() # print("--------------------------------") time_start = time.time() rewards_history, episode_wins, losses = agent.train(env, max_steps=MAXTURNS, episodes=EPISODES, info=False, info_step=50) print( "epoch = {:2} | won {:3}/{:3} matches | mean rewards = {:7.2f} | mean losses = {:7.2f} | epoch time = {:6.2f} sec"
@app.route('/background_process_test') def background_process_test(): global AI_TURN if(AI_TURN): AI_TURN = False flat_board = get_flat_board() action, _ = model.action_value(np.asarray(flat_board)[None, :]) coord = env.flatMoveToCoord(int(action)) if (flat_board[int(action)] == 0): stone = driver.find_element_by_css_selector("#board5 > div.row > div > div:nth-child({}) > div:nth-child({})".format(coord[0]+1,coord[1]+1)) stone.click() print ("put stone on ({},{})".format(coord[0],coord[1])) else: print("invalid move ({},{}) passing instead".format(coord[0],coord[1])) pass_move = driver.find_element_by_css_selector("#board5 > div.goPlayers > div.stats.white > div.pass") pass_move.click() return "Nothing" else: AI_TURN = True return "Nothing" if __name__ == "__main__": env = GoGame(5, maxTurns=50) model = Model(num_actions=env.action_space.n) model.action_value(env.reset()[None, :]) model.load_weights(MODEL_NAME) driver = webdriver.Chrome("C:\\Users\\flori\\Desktop\\q_learning\\go_game\\chromedriver.exe") driver.get("http://127.0.0.1:5000") app.run()