def AC_main():
    #agent = PGagent(agentParam)
    all_rw = []
    #n_agents = 1#2
    if mode == "social":
        multiPG = socialAgents([
            PGagent(env_dim["cleanup"][0], env_dim["cleanup"][1], add_para(i))
            for i in range(n_agents)
        ], agentParam)
    elif mode == "AC":
        multiPG = AC_Agents([IAC(8, 400) for i in range(n_agents)])
    else:
        multiPG = independentAgent([
            PGagent(env_dim["cleanup"][0], env_dim["cleanup"][1], add_para(i))
            for i in range(n_agents)
        ])

    for i_episode in range(n_episode):
        n_state, ep_reward = env.reset(), 0
        for t in range(n_steps):
            if mode == "social":
                actions = multiPG.select_mask_actions(n_state)
            else:
                actions = multiPG.select_actions(
                    n_state)  ##agent.select_action(state)
            #actions = [ random.randint(0,7) for i in range(n_agents)]
            n_state_, n_reward, _, _ = env.step(actions)
            if render and i_episode == 1:
                env.render(impath, t)
            #multiPG.push_reward(n_reward)
            ep_reward += sum(n_reward)
            multiPG.update(n_state, n_reward, n_state_, actions)
        running_reward = ep_reward
        #if test_mode == False:
        #    multiPG.update_agents()

        all_rw.append(ep_reward)
        if i_episode % (args.log_interval * 2) == 0 and ifsave_data:
            np.save("data/" + model_name + ".npy", all_rw)
        if i_episode % args.log_interval == 0:
            print('Episode {}\tLast reward: {:.2f}\tAverage reward: {:.2f}'.
                  format(i_episode, ep_reward, running_reward))

        if i_episode % save_eps == 0 and i_episode > 10 and ifsave_model:
            multiPG.save(file_name)
Exemplo n.º 2
0
def AC_main():
    #agent = PGagent(agentParam)
    all_rw = []
    #n_agents = 1#2
    if mode == "social":
        multiPG = socialAgents([
            PGagent(env_dim["cleanup"][0], env_dim["cleanup"][1], add_para(i))
            for i in range(n_agents)
        ], agentParam)
    elif mode == "AC":
        multiPG = Agents([Centralised_AC(4, 100) for i in range(n_agents)], 50)
    else:
        multiPG = independentAgent([
            PGagent(env_dim["cleanup"][0], env_dim["cleanup"][1], add_para(i))
            for i in range(n_agents)
        ])

    for i_episode in range(1000):
        n_state, ep_reward = env.reset(), 0
        n_state = n_state[0]
        test_reward_sum = 0
        for t in range(1000):

            if mode == "social":
                actions = multiPG.select_mask_actions(n_state)
            else:
                actions = multiPG.choose_action(
                    process_state(n_state))  ##agent.select_action(state)
            #actions = [ random.randint(0,7) for i in range(n_agents)]
            a = deepcopy(actions)
            for i in range(len(actions)):
                a[i] = [actions[i][0]]
            n_state_, n_reward, _, _, test_reward = env.step(a)
            test_reward_sum += test_reward
            if render and i_episode != 1:
                # env.render(impath,t)
                env.render()
            # time.sleep(0.05)
            #multiPG.push_reward(n_reward)
            ep_reward += sum(n_reward)
            # if [1] in process_state(n_reward):
            #     print("i_episode %d:"%i_episode,process_state(n_reward))
            multiPG.update(process_state(n_state), process_state(n_reward),
                           process_state(n_state_), actions)
            n_state = n_state_
        running_reward = ep_reward
        #if test_mode == False:
        #    multiPG.update_agents()

        all_rw.append(ep_reward)
        if i_episode % (args.log_interval * 2) == 0 and ifsave_data:
            np.save("data/" + model_name + ".npy", all_rw)
        if i_episode % args.log_interval == 0:
            print(
                'Episode {}\tLast reward: {:.2f}\tAverage reward: {:.2f}\ttest_reward: {:.2f}'
                .format(i_episode, ep_reward[0], running_reward[0],
                        test_reward_sum))
            logger.scalar_summary("ep_reward", ep_reward, i_episode)
            logger.scalar_summary("coin_eaten", test_reward_sum, i_episode)

        if i_episode % save_eps == 0 and i_episode > 10 and ifsave_model:
            multiPG.save(file_name)