def test_MLEGreedyAgent_works(self): experiment = _setup_experiment() experiment.agent_class = allocation_agents.MLEGreedyAgent experiment.agent_params = allocation_agents.MLEGreedyAgentParams( burn_steps=5, window=10, alpha=5.0) result = attention_allocation_experiment.run(experiment) # Tests that the result is a valid json string. result = json.loads(result)
def test_allocate_beliefs_greedy(self): env_params = attention_allocation.Params( n_locations=4, prior_incident_counts=(10, 10, 10, 10), n_attention_units=5, incident_rates=[0, 0, 0, 0], ) env = attention_allocation.LocationAllocationEnv(params=env_params) agent_params = allocation_agents.MLEGreedyAgentParams(epsilon=0.0) agent = allocation_agents.MLEGreedyAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.VectorSumReward("incidents_seen"), params=agent_params, ) allocation = agent._allocate(5, [5, 2, 1, 1]) self.assertTrue(np.all(np.equal(allocation, [4, 1, 0, 0])))
def test_allocate_beliefs_fair_unsatisfiable(self): env_params = attention_allocation.Params( n_locations=4, prior_incident_counts=(10, 10, 10, 10), n_attention_units=5, incident_rates=[0, 0, 0, 0], ) env = attention_allocation.LocationAllocationEnv(params=env_params) agent_params = allocation_agents.MLEGreedyAgentParams(epsilon=0.0, alpha=0.25) agent = allocation_agents.MLEGreedyAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.VectorSumReward("incidents_seen"), params=agent_params, ) with self.assertRaises(gym.error.InvalidAction): agent._allocate(5, [5, 2, 1, 1])
def mle_greedy_alpha5_agent_resource_all_dynamics(): """Run experiments on a greedy-epsilon mle agent, epsilon=0.1, across dynamics.""" dynamic_values_to_test = [0.0, 0.01, 0.05, 0.1, 0.15] experiment = _setup_experiment() experiment.agent_class = allocation_agents.MLEGreedyAgent experiment.agent_params = allocation_agents.MLEGreedyAgentParams( burn_steps=25, window=100, alpha=0.75) reports_dict = {} for value in dynamic_values_to_test: print("Running an experiment...") experiment.env_params.dynamic_rate = value json_report = attention_allocation_experiment.run(experiment) report = json.loads(json_report) print("\n\nMLE Greedy Fair Agent, 6 attention units, alpha=0.75") _print_discovered_missed_incidents_report(value, report) output_filename = "mle_greedy_fair_alpha75_6units_%f.json" % value with open(os.path.join(FLAGS.output_dir, output_filename), "w") as f: json.dump(report, f) reports_dict[value] = json_report return reports_dict