gamma=args.gamma, exploration_strategy=EpsilonGreedy( initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay)) for ts in env.ts_ids } done = {'__all__': False} infos = [] if args.fixed: while not done['__all__']: _, _, done, _ = env.step({}) else: while not done['__all__']: actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()} s, r, done, _ = env.step(actions=actions) if args.v: print('s=', env.radix_decode(ql_agents['t'].state), 'a=', actions['t'], 's\'=', env.radix_encode(s['t']), 'r=', r['t']) for agent_id in ql_agents.keys(): ql_agents[agent_id].learn(new_state=env.encode( s[agent_id]), reward=r[agent_id]) env.save_csv(out_csv, run) env.close()
initial_states = env.reset() ql_agents = {ts: QLAgent(starting_state=env.encode(initial_states[ts]), state_space=env.observation_space, action_space=env.action_space, alpha=args.alpha, gamma=args.gamma, exploration_strategy=EpsilonGreedy(initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay)) for ts in env.ts_ids} done = {'__all__': False} infos = [] if args.fixed: while not done['__all__']: _, _, done, _ = env.step({}) else: while not done['__all__']: actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()} s, r, done, _ = env.step(actions=actions) if args.v: print('s=', env.radix_decode(ql_agents['t'].state), 'a=', actions['t'], 's\'=', env.radix_encode(s['t']), 'r=', r['t']) for agent_id in ql_agents.keys(): ql_agents[agent_id].learn(new_state=env.encode(s[agent_id]), reward=r[agent_id]) env.save_csv() env.close()