Exemplo n.º 1
0
env = ICRABattleField()
env.seed(args.seed)
losses = []
rewards = []
for i_episode in range(1, args.epoch + 1):
    print("Epoch: [{}/{}]".format(i_episode, args.epoch))
    # Initialize the environment and state
    action = Action()
    pos = env.reset()
    if args.enemy == "hand":
        agent2.reset(pos)
    state, reward, done, info = env.step(action)
    for t in (range(2*60*30)):
        # Other agent
        if args.enemy == "hand":
            env.set_robot_action(ID_B1, agent2.select_action(state[ID_B1]))
        elif args.enemy == "AC":
            env.set_robot_action(ID_B1, agent2.select_action(
                state[ID_B1], mode="max_probability"))

        # Select and perform an action
        state_map = agent.preprocess(state[ID_R1])
        a_m, a_t = agent.run_AC(state_map)
        action = agent.decode_action(a_m, a_t, state[ID_R1], "max_probability")

        # Step
        next_state, reward, done, info = env.step(action)
        tensor_next_state = agent.preprocess(next_state[ID_R1])

        # Store the transition in memory
        agent.push(state_map, tensor_next_state, [a_m, a_t], [reward])
losses = []
rewards = []
for i_episode in range(1, args.epoch + 1):
    print("Epoch: [{}/{}]".format(i_episode, args.epoch))
    # Initialize the environment and state
    action = Action()
    pos = env.reset()
    if args.enemy == "hand":
        pass
        # agent2.reset([7.5, 0.5])
    state, reward, done, info = env.step([action, action])

    for t in (range(2 * 60 * 30)):
        # Other agent
        if args.enemy == 'person':
            env.set_robot_action(ID_B1, env.action[0])
        if args.enemy == "hand":
            env.set_robot_action(ID_B1, agent2.select_action(state[ID_B1]))
            env.set_robot_action(ID_B2, aux_agent2.select_action(state[ID_B2]))
        elif args.enemy == "AC":
            env.set_robot_action(
                ID_B1,
                agent2.select_action(state[ID_B1], mode="max_probability"))
            env.set_robot_action(
                ID_B2,
                aux_agent2.select_action(state[ID_B2], mode="max_probability"))

        # Select and perform an action
        # 新增红方
        state_map = aux_agent.preprocess(state[ID_R2])
        a_m, a_t = aux_agent.run_AC(state_map)