Exemplo n.º 1
0
                    return None

                action[n][0] = obs[i][0]
                action[n][1] = 1
                action[n][4] = target_id
                n = n + 1

        return action


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--ip', help='server ip')
    parser.add_argument('--port', help='server port', default="11111")
    args = parser.parse_args()

    env = sc.MultiAgentEnv(args.ip, args.port, speed=30)
    env.seed(123)
    agent = ClosetAgent(env.action_space)

    episodes = 0
    while True:
        obs = env.reset()
        done = False
        while not done:
            action = agent.act(obs)
            obs, reward, done, info = env.step(action)
        episodes += 1

    env.close()
Exemplo n.º 2
0
    optimizer.zero_grad()
    for prob in model.saved_probs:
        loss += criterion(prob, label)
    loss.backward()
    optimizer.step()
    del model.rewards[:]
    del model.saved_actions[:]
    del model.saved_probs[:]


if __name__ == '__main__':

    max_episode_steps = 500

    env = sc.MultiAgentEnv(args.ip,
                           args.port,
                           speed=30,
                           max_episode_steps=max_episode_steps)
    env.seed(123)
    torch.manual_seed(123)

    agent = IndependentAgents(env.action_space)
    #vis = visdom.Visdom(env='Independent')

    episodes = args.eposides
    start = time.time()
    log_reward_sum = []
    log_reward = []
    while True:
        obs = env.reset()
        '''
		done = False