Пример #1
0
    num_test_ep = args.num_test_ep
    num_episodes = num_train_ep + num_test_ep

    print("Starting Training - id={}; num_opponents={}; num_teammates={}; "
          "num_episodes={};".format(agent_id, num_op, num_team, num_episodes))
    # Initialize connection with the HFO server
    hfo_interface = HFOAttackingPlayer(agent_id=agent_id,
                                       num_opponents=num_op,
                                       num_teammates=num_team)
    hfo_interface.connect_to_server()

    # Agent set-up
    reward_function = simple_reward
    features_manager = DiscreteHighLevelFeatures(num_team, num_op)
    actions_manager = ActionManager([SHOOT, MOVE, DRIBBLE])
    agent = QLearningAgent(num_states=features_manager.get_num_states(),
                           num_actions=actions_manager.get_num_actions(),
                           learning_rate=0.1,
                           discount_factor=0.99,
                           epsilon=1.0,
                           epsilon_dec=0.9992)

    # Run training using Q-Learning
    if train_mode == "train_only":
        print('\n=== Train Mode for {}:'.format(num_train_ep))
        train(num_episodes=num_train_ep,
              game_interface=hfo_interface,
              features=features_manager,
              agent=agent,
              actions=actions_manager)
    elif train_mode == "alternate":
Пример #2
0
 parser.add_argument('--saveFile', type=str, default="q_agent.model")
 args = parser.parse_args()
 num_teammates = args.numTeammates
 num_opponents = args.numOpponents
 n_games = args.numEpisodes
 save_file = args.saveFile
 lr = 0.01
 epsilon = 1
 discount_factor = 0.9
 # Useful Instances:
 hfo = HFOEnvironment()
 hfo.connectToServer(feature_set=HIGH_LEVEL_FEATURE_SET, server_port=6000)
 env = DiscreteHighLevelFeatures(hfo.getState(), num_teammates,
                                 num_opponents)
 actions = Action()
 agent = QLearner(num_states=env.get_num_states(),
                  num_actions=actions.get_num_actions(),
                  epsilon=epsilon,
                  learning_rate=lr,
                  discount_factor=discount_factor,
                  save_file=save_file)
 # Saving lists
 scores = []
 eps_history = []
 for i in range(n_games):
     print("\n<< {}/{} Game >> eps={}".format(i, n_games, agent.epsilon))
     game_status = IN_GAME
     score = 0
     while game_status == IN_GAME:
         action_idx = agent.choose_action(env.get_state_index())
         hfo_action = actions.map_action(action_idx)
Пример #3
0
                                                  num_episodes, load_file))
    # Initialize connection with the HFO server
    hfo_interface = HFOAttackingPlayer(agent_id=agent_id,
                                       num_opponents=args.num_opponents,
                                       num_teammates=args.num_teammates)
    hfo_interface.connect_to_server()

    # Reward Function
    reward_function = simple_reward
    
    # Get number of features and actions
    features_manager = DiscreteHighLevelFeatures(num_team, num_op)
    actions_manager = ActionManager([SHOOT, MOVE, DRIBBLE])
    
    # Initialize a Q-Learning Agent
    agent = QPlayerAgent(num_states=features_manager.get_num_states(),
                         num_actions=actions_manager.get_num_actions(),
                         num_games=num_episodes,
                         load_file=load_file)
    
    for i in range(num_episodes):
        agent.reset(i, produce_graph=True)
        observation = hfo_interface.reset()
        # Update environment features:
        curr_state_id = features_manager.get_state_index(observation)
        has_ball = features_manager.has_ball(observation)
        
        while hfo_interface.in_game():
            action_idx = agent.act(curr_state_id)
            hfo_action = actions_manager.map_action(action_idx)