def main(): """ Demo how to run an agent """ if( len(sys.argv) > 1 ): configName = str(sys.argv[1]) filename = configName else: print("Default config ") configName = "./configs/new_traffic_loop_ppo.yaml" dirname = os.path.dirname(__file__) filename = os.path.join(dirname, configName) print( "Config name " + configName ) logging.info("Starting example PPO agent") logoutput = io.StringIO("episode output log") parameters = getParameters(filename) env = SumoGymAdapter(parameters['all']) # here we initialize all agents (in that case one) PPOAgents = [] env.reset() for intersectionId in env.action_space.spaces.keys(): PPOAgents.append(PPOAgent(agentId=intersectionId, environment=env, parameters=parameters['all'])) complexAgent = BasicComplexAgent(PPOAgents) experiment = Experiment(complexAgent, env, parameters['all']['max_steps'], parameters['all']['seedlist'], render=False) experiment.addListener(JsonLogger(logoutput)) experiment.run()
def main(): """ Demo how to run an agent """ # if( len(sys.argv) > 1 ): # configName = str(sys.argv[1]) # filename = configName # else: # print("Default config ") # configName = "./configs/new_traffic_loop_ppo.yaml" # dirname = os.path.dirname(__file__) # filename = os.path.join(dirname, configName) # print( "Config name " + configName ) logging.info("Starting example random agent") logoutput = io.StringIO("episode output log") # parameters = getParameters(filename) env = Warehouse() # here we initialize all agents (in that case one) randomAgents = [] env.reset() for agent_id in env.action_space.spaces.keys(): randomAgents.append(WarehouseNaiveAgent(robot_id=agent_id, env=env)) complexAgent = BasicComplexAgent(randomAgents) experiment = Experiment(complexAgent, env, maxSteps=100, render=True, renderDelay=0.5) experiment.addListener(JsonLogger(logoutput)) experiment.run()
def main(): """ Demo how to run an agent """ dirname = os.path.dirname(__file__) if (len(sys.argv) > 1): env_configName = str(sys.argv[1]) agent_configName = str(sys.argv[2]) else: print("Default config ") env_configName = "./configs/factory_floor_experiment.yaml" env_filename = os.path.join(dirname, env_configName) agent_configName = "./configs/agent_combined_config.yaml" agent_filename = os.path.join(dirname, agent_configName) env_parameters = getParameters(env_filename) agent_parameters = getParameters(agent_filename) # whao, you need to know exact contents of all files here.. recursive_update( agent_parameters['subAgentList'][0]['parameters']['simulator'], env_parameters['environment']) print(env_parameters) print(agent_parameters) random.seed(env_parameters['seed']) maxSteps = env_parameters['max_steps'] baseEnv = FactoryFloor(env_parameters['environment']) packedActionSpace = PackedSpace(baseEnv.action_space, {"robots": ["robot1", "robot2", "robot3"]}) env = ModifiedGymEnv(baseEnv, packedActionSpace) logging.info("Starting example MCTS agent") logoutput = io.StringIO("episode output log") try: logoutputpickle = open('./' + os.environ["SLURM_JOB_ID"] + '.pickle', 'wb') except KeyError: print("No SLURM_JOB_ID found") logoutputpickle = io.BytesIO() obs = env.reset() complexAgent = createAgent(env.action_space, env.observation_space, agent_parameters) experiment = Experiment(complexAgent, env, maxSteps, render=True) experiment.addListener(JsonLogger(logoutput)) experiment.addListener(PickleLogger(logoutputpickle)) stats, confidence_ints = experiment.run() logoutputpickle.close() print("json output:", logoutput.getvalue()) print("\n\nREWARD STATS: " + str(stats) + " \nCONFIDENCE INTERVALS " + str(confidence_ints))
def testRun1step(self): agent = Mock() env = Mock() # each episode succeeds in step 1 env.step = Mock(return_value=('observation', 3.0, True, {})) firstAction = Mock() exp = Experiment(agent, env, 100, None, False, 0) result = np.mean(exp.run()) self.assertEqual(3.0, result)
def testListener(self): agent = Mock() env = Mock() env.step = Mock(return_value=('observation', 3.0, True, {})) firstAction = Mock() exp = Experiment(agent, env, 30, None, False, 0) listener = Mock() exp.addListener(listener) result = exp.run() # check we got 30 callbacks self.assertEquals(30, len(listener.notifyChange.mock_calls))
def testListener(self): agent = Mock() env = Mock() env.step = Mock(return_value=('observation', 3.0, True, {})) firstAction = Mock() exp = Experiment(agent, env, 30, None, False, 0) listener = Mock() exp.addListener(listener) result = exp.run() # check we got 30 callbacks # This fails because Jinke added an entry to the notification dictionary in Experiment.py self.assertEquals(30, len(listener.notifyChange.mock_calls))
def main(): if (len(sys.argv) > 1): configName = str(sys.argv[1]) filename = configName else: print("Default config ") configName = "../configs/PPO.yaml" dirname = os.path.dirname(__file__) filename = os.path.join(dirname, configName) logging.info("Starting example warehouse DQN agent") logoutput = io.StringIO("episode output log") parameters = getParameters(filename) env = Warehouse() # here we initialize all agents (in that case one) agents = [] env.reset() for agent_id, action_space in env.action_space.spaces.items(): agents.append( WarehouseNaiveAgent(robot_id=agent_id, actionspace=action_space, observationspace=env.observation_space)) breakpoint() agents[20] = PPOAgent(agentId=20, actionspace=env.action_space[20], observationspace=env.observation_space, parameters=parameters['all']) multi_agent = MultiAgent(agents) experiment = Experiment( multi_agent, env, maxSteps=1e6, render=True, seedlist=[random.randrange(1, 1e3, 1) for i in range(int(1e5))], renderDelay=0.5) experiment.addListener(JsonLogger(logoutput)) experiment.run()
def testRun3steps(self): agent = Mock() env = Mock() env.step = Mock() # each episode succeeds after 3 steps, then restart 10* env.step.side_effect = [('observation1', 3.0, False, {}), ('observation2', 4.0, False, {}), ('observation3', 5.0, True, {})] * 10 firstAction = Mock() exp = Experiment(agent, env, 30, None, False, 0) result = np.mean(exp.run()) # each episode has reward 12 self.assertEqual(12.0, result)
def test_PPO_agent(self): logging.info("Starting test_PPO_agent") dirname = os.path.dirname(__file__) filename = os.path.join(dirname, "configs/new_traffic_loop_ppo.yaml") with open(filename, 'r') as stream: try: parameters = yaml.safe_load(stream)['parameters'] except yaml.YAMLError as exc: logging.error(exc) env = SumoGymAdapter(parameters) env.reset() PPOAgents = [] for intersectionId in env.action_space.spaces.keys(): PPOAgents.append( PPOAgent(intersectionId, env.action_space, env.observation_space, parameters)) complexAgent = BasicComplexAgent(PPOAgents, env.action_space, env.observation_space) experiment = Experiment(complexAgent, env, parameters['max_steps']) experiment.run()
def main(): """ MCTS Factory Floor experiment """ dirname = os.path.dirname(__file__) parser = configargparse.ArgParser() parser.add('-e', '--env-config', dest="env_filename", default=os.path.join( dirname, "./debug_configs/factory_floor_experiment.yaml")) parser.add('-a', '--agent-config', dest="agent_filename", default=os.path.join(dirname, "./debug_configs/agent_config.yaml")) parser.add('-d', '--data-dirname', dest="data_dirname", default="data") args = parser.parse_args() try: data_outputdir = os.path.join( dirname, "./" + args.data_dirname + "/" + os.environ["SLURM_JOB_ID"]) os.makedirs(data_outputdir) logoutputpickle = open('./' + data_outputdir + '/output.pickle', 'wb') rewardsFile = open('./' + data_outputdir + '/rewards.yaml', 'w+') except KeyError: print("No SLURM_JOB_ID found") logoutputpickle = io.BytesIO() rewardsFile = io.StringIO() env_parameters = getParameters(args.env_filename) agent_parameters = getParameters(args.agent_filename) print(env_parameters) print(agent_parameters) for subAgent in agent_parameters["subAgentList"]: subAgent["parameters"]["simulator"] = env_parameters["environment"] subAgent["parameters"]["simulator"][ "fullname"] = "aienvs.FactoryFloor.FactoryFloor.FactoryFloor" random.seed(env_parameters['seed']) maxSteps = env_parameters['max_steps'] env = FactoryFloor(env_parameters['environment']) logging.info("Starting example MCTS agent") logoutput = io.StringIO("episode output log") obs = env.reset() complexAgent = createAgent(env.action_space, env.observation_space, agent_parameters) experiment = Experiment(complexAgent, env, maxSteps, render=False) experiment.addListener(JsonLogger(logoutput)) experiment.addListener(PickleLogger(logoutputpickle)) rewards = experiment.run() # statistics, confidence_ints = stats.describe(rewards), stats.bayes_mvs(rewards) logoutputpickle.close() yaml.dump(rewards, rewardsFile) rewardsFile.close() print("json output:", logoutput.getvalue()) print("\n\nREWARD STATS: " + str(statistics) + " \nCONFIDENCE INTERVALS " + str(confidence_ints))