def main(_): FLAGS.agent = model(params=FLAGS) FLAGS.environment = get_env(FLAGS) FLAGS.act = action() FLAGS.step_max = FLAGS.environment.data_len() FLAGS.train_freq = 40 FLAGS.update_q_freq = 50 FLAGS.gamma = 0.97 FLAGS.show_log_freq = 5 FLAGS.memory = [] #Experience(FLAGS.memory_size) init = tf.global_variables_initializer() saver = tf.train.Saver() #创建用于保存模型的目录 if not os.path.exists(FLAGS.model_dir): os.makedirs(FLAGS.model_dir) start = time.time() with tf.Session() as sess: sess.run(init) eval = evaluation(FLAGS, sess) ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) if ckpt: print('Loading Model...') saver.restore(sess, ckpt.model_checkpoint_path) total_step = 1 print('\t'.join( map(str, [ "epoch", "epsilon", "total_step", "rewardPerEpoch", "profits", "lossPerBatch", "elapsed_time" ]))) for epoch in range(FLAGS.epoch_num): avg_loss_per_batch, total_reward, total_step, profits = run_epch( FLAGS, sess, total_step) # total_rewards.append(total_reward) # total_losses.append(total_loss) if (epoch + 1) % FLAGS.show_log_freq == 0: # log_reward = sum(total_rewards[((epoch+1)-FLAGS.show_log_freq):])/FLAGS.show_log_freq # log_loss = sum(total_losses[((epoch+1)-FLAGS.show_log_freq):])/FLAGS.show_log_freq elapsed_time = time.time() - start #print('\t'.join(map(str, [epoch+1, FLAGS.act.epsilon, total_step, log_reward, log_loss, elapsed_time]))) print('\t'.join( map(str, [ epoch + 1, FLAGS.act.epsilon, total_step, total_reward, profits, avg_loss_per_batch, elapsed_time ]))) start = time.time() saver.save( sess, FLAGS.model_dir + '\model-' + str(epoch + 1) + '.ckpt') eval.eval()
def play_a_game(commentary=False): board = init_board() # initialize the board player = np.random.randint(2) * 2 - 1 # which player begins? #nrMove = 0 # play on while not game_over(board) and not check_for_error(board): if commentary: print("lets go player ", player) # roll dice dice = roll_dice() if commentary: print("rolled dices:", dice) # make a move (2 moves if the same number appears on the dice) for i in range(1 + int(dice[0] == dice[1])): board_copy = np.copy(board) # make the move (agent vs agent): #move = agent.action(board_copy,dice,player,i) #agent.update(board_copy, nrMove) # if you're playing vs random agent: if player == 1: move = agent.action(board_copy, dice, player, i) #agent.update(board_copy, nrMove) elif player == -1: move = random_agent(board_copy, dice, player, i) # update the board if len(move) != 0: for m in move: board = update_board(board, m, player) # give status after every move: if commentary: print("move from player", player, ":") pretty_print(board) #nrMove += 1 # players take turns player = -player #reward = 1 if player == -1 else 0 #agent.update(np.copy(board), nrMove, reward=reward) # return the winner return -1 * player
def play_a_game(commentary=False, net=None): board = init_board() # initialize the board player = np.random.randint(2) * 2 - 1 # which player begins? net.i = 1 # play on while not game_over(board) and not check_for_error(board): if commentary: print("lets go player ", player) # roll dice dice = roll_dice() if commentary: print("rolled dices:", dice) # make a move (2 moves if the same number appears on the dice) for i in range(1 + int(dice[0] == dice[1])): board_copy = np.copy(board) # make the move (agent vs agent): # move = agent.action(board_copy, dice, player, i) # if you're playing vs random agent: if player == 1: move = agent.action(board_copy, dice, player, i, net) elif player == -1: move = random_agent(board_copy, dice, player, i) # update the board if len(move) != 0: for m in move: board = update_board(board, m, player) # give status after every move: if commentary: print("move from player", player, ":") pretty_print(board) # players take turns player = -player # return the winner # print(board) # f_final = agent.getFeatures(board, player) # print(f_final) # print(agent.getValue(board, player)) delta = -1 * player - net.torch_nn.forward(agent.getFeatures(board_copy, player)) net.torch_nn.backward(net.gamma, delta) net.torch_nn_policy.theta = net.torch_nn_policy.theta + net.torch_nn_policy.alpha_theta * net.i * delta * agent.getFeatures(board_copy, player) return -1 * player
def play_a_game(commentary=True): board = init_board() # initialize the board player = 1 # player 1 starts # play on while not game_over(board): #and not check_for_error(board): if commentary: print("lets go player ", player) dice = roll_dice() # roll dice if commentary: print("rolled dices:", dice) # make a move (2 moves if the same number appears on the dice) for i in range(1 + int(dice[0] == dice[1])): board_copy = np.copy(board) # make the move if player == 1: move = agent.action(board_copy, dice, 1, i) elif player == -1: move = random_agent(board_copy, dice, i) # update the board if len(move) != 0: for m in move: board = update_board(board, m) if commentary: print("move from player", player, ":") print("board:") if (player == 1): pretty_print(board) else: pretty_print(flip_board(board)) # players take turns player = -player board = flip_board(board) # return the winner return -1 * player
"human":{"type":"BB8Agent", "spec":{"init_x":np.vstack([ 50.,20.0, 0., 0.])}} } reaching_world_spec = { "friction": 0, "reaching_eps": 0.1, "agent_goal_lists":{ "robot": [[70.0,20.0], [10, 40]], "human": [[10.0,20.0], [40, 70]], } } env_spec = { "world": {"type":"FlatReachingWorld", "spec":reaching_world_spec}, "dt": 0.02, "agent_env_spec": agent_env_spec } evaluator = evaluator.Evaluator(agent_specs, env_spec) env = env.FlatEnv(env_spec, agents) dt, env_info, measurement_groups = env.reset() record = [] print("Simulation progress:") for it in progressbar.progressbar(range(100)): actions = {} for agent in agents: # an action is dictionary which must contain a key "control" actions[agent.name] = agent.action(dt, measurement_groups[agent.name]) #sensor data is grouped by agent dt, env_info, measurement_groups = env.step(actions) record.append((env_info,measurement_groups)) evaluator.evaluate(record)
def play_a_game(modelPlayer, modelPlayerOne, modelPlayerOther, commentary=False, randomAgent=False): board = init_board() # initialize the board player = np.random.randint(2) * 2 - 1 # which player begins? modelPlayerOne.Z_w1 = torch.zeros(modelPlayerOne.w1.size(), device=modelPlayerOne.device, dtype=torch.float) modelPlayerOne.Z_b1 = torch.zeros(modelPlayerOne.b1.size(), device=modelPlayerOne.device, dtype=torch.float) modelPlayerOne.Z_w2 = torch.zeros(modelPlayerOne.w2.size(), device=modelPlayerOne.device, dtype=torch.float) modelPlayerOne.Z_b2 = torch.zeros(modelPlayerOne.b2.size(), device=modelPlayerOne.device, dtype=torch.float) modelPlayerOther.Z_w1 = torch.zeros(modelPlayerOther.w1.size(), device=modelPlayerOther.device, dtype=torch.float) modelPlayerOther.Z_b1 = torch.zeros(modelPlayerOther.b1.size(), device=modelPlayerOther.device, dtype=torch.float) modelPlayerOther.Z_w2 = torch.zeros(modelPlayerOther.w2.size(), device=modelPlayerOther.device, dtype=torch.float) modelPlayerOther.Z_b2 = torch.zeros(modelPlayerOther.b2.size(), device=modelPlayerOther.device, dtype=torch.float) #pretty_print(board) # play on while not game_over(board) and not check_for_error(board): #for okei in range(2): if commentary: print("lets go player ", player) # roll dice dice = roll_dice() if commentary: print("rolled dices:", dice) # make a move (2 moves if the same number appears on the dice) for i in range(1 + int(dice[0] == dice[1])): board_copy = np.copy(board) # make the move (agent vs agent): #move = agent.action(board_copy,dice,player,i) #if you're playing vs random agent: if (randomAgent): if player == 1: if (modelPlayer == 1): move = agent.action(board_copy, dice, player, i, modelPlayerOne) else: move = agent.action(board_copy, dice, player, i, modelPlayerOther) elif player == -1: move = random_agent(board_copy, dice, player, i) else: if player == 1: move = agent.action(board_copy, dice, player, i, modelPlayerOne) elif player == -1: move = flipped_agent.action(board_copy, dice, player, i, modelPlayerOther) # update the board if len(move) != 0: for m in move: board = update_board(board, m, player) # give status after every move: if commentary: print("move from player", player, ":") pretty_print(board) # players take turns player = -player modelPlayerOne.gameFinishedUpdate(-1 * player) modelPlayerOne.dynaUpdate() modelPlayerOther.gameFinishedUpdate(-1 * player) modelPlayerOther.dynaUpdate() #if(game_over(board)): # pretty_print(board) # return the winner return -1 * player
def play_a_game(opponent, commentary = False): board = init_board() # initialize the board player = np.random.randint(2)*2-1 # which player begins? y_old = 0 y_old_p2 = 0 firstMove = True firstMove_p2 = True pickle_in = open("randommodel.pickle","rb") model = pickle.load(pickle_in) model = model.cuda() # play on while not game_over(board) and not check_for_error(board): if commentary: print("lets go player ",player) # roll dice dice = roll_dice() if commentary: print("rolled dices:", dice) # make a move (2 moves if the same number appears on the dice) for i in range(1+int(dice[0] == dice[1])): board_copy = np.copy(board) # make the move (agent vs agent): if(opponent == "agent"): if player == 1: move, y_old = agent.action(board_copy,dice,player,i, y_old, model, firstMove, True) # update the board if len(move) != 0: for m in move: board = update_board(board, m, player) if(firstMove): firstMove = False elif player == -1: flipped_board = flipped_agent.flip_board(board_copy) move, y_old_p2 = agent.action(flipped_board,dice,1,i, y_old_p2, model, firstMove_p2, True) if len(move) != 0: for m in move: flipped_board = update_board(flipped_board, m, 1) board = flipped_agent.flip_board(flipped_board) if(firstMove_p2): firstMove_p2 = False elif(opponent == "human"): pretty_print(board) if player == 1: print("Computer's turn") move, y_old = agent.action(board_copy,dice,player,i, y_old, model, firstMove, False) print("Computer's move", move) elif player == -1: print("Human's turn") possible_moves, possible_boards = legal_moves(board_copy, dice, player) print("dice:", dice) printing.moves_to_string(possible_moves) text = input("prompt") move = possible_moves[int(text)] if len(move) != 0: for m in move: board = update_board(board, m, player) #if you're playing vs random agent: elif(opponent == "random"): if player == 1: move, y_old = agent.action(board_copy,dice,player,i, y_old, model, firstMove, False) elif player == -1: move = random_agent(board_copy,dice,player,i) if len(move) != 0: for m in move: board = update_board(board, m, player) # update the board # give status after every move: if commentary: print("move from player",player,":") pretty_print(board) # players take turns player = -player # return the winner winner = -1*player if(opponent == "agent"): if(winner == 1): agent.learn(y_old, model, board_copy, "yes") agent.learn(y_old_p2, model, board_copy, "no") else: agent.learn(y_old, model, board_copy, "no") agent.learn(y_old_p2, model, board_copy, "yes") #print("Winner is player", winner) pickle_out = open("randommodel.pickle","wb") pickle.dump(model, pickle_out) pickle_out.close() return winner