def test_should_traverse(self, env): # given cfg = Configuration(8, 8, epsilon=1.0, biased_exploration=0.5, do_ga=False, metrics_trial_frequency=1, user_metrics_collector_fcn=self._maze_metrics) agent = ACS2(cfg) # when population, metrics = agent.explore(env, 300) # then assert 90 < count_macroclassifiers(population) < 200 assert self._get_knowledge(metrics) == 100 assert count_macroclassifiers(population) == count_reliable(population) assert count_macroclassifiers(population) \ == count_microclassifiers(population) assert self._get_total_steps(metrics) > 5000
def test_should_traverse_with_ga(self, env): # given cfg = Configuration(8, 8, epsilon=0.8, biased_exploration=0.5, mu=0.3, chi=0.0, do_ga=True, metrics_trial_frequency=1, user_metrics_collector_fcn=self._maze_metrics) agent = ACS2(cfg) # when population, metrics = agent.explore(env, 300) # then assert abs(380 - count_macroclassifiers(population)) < 55 assert abs(100 - self._get_knowledge(metrics)) < 5 assert count_macroclassifiers(population) \ > count_reliable(population) assert count_macroclassifiers(population) \ < count_microclassifiers(population) assert self._get_total_steps(metrics) > 2500
def start_single_experiment(env, explore_trials, exploit_trials, **kwargs): # Prepare the environment env.reset() cfg = Configuration(**kwargs) explorer = ACS2(cfg) population_explore, metrics_explore = explorer.explore(env, explore_trials) exploiter = ACS2(cfg, population_explore) population_exploit, metrics_exploit = explorer.exploit(env, exploit_trials) # Parse results into DataFrame df = parse_experiments_results(metrics_explore, metrics_exploit, cfg.metrics_trial_frequency) return population_exploit, df
def get_actors(): mp = gym.make('boolean-multiplexer-6bit-v0') cfg = Configuration(mp.env.observation_space.n, 2, perception_mapper_fcn=_map_perception, do_ga=True) return ACS2(cfg), mp
def get_actors(): mp = gym.make('boolean-multiplexer-6bit-v0') cfg = Configuration(mp.env.observation_space.n, 2, environment_adapter=MultiplexerAdapter(), do_ga=True) return ACS2(cfg), mp
def get_actors(): mp = gym.make('boolean-multiplexer-37bit-v0') cfg = Configuration(mp.env.observation_space.n, 2, performance_fcn=evaluate_performance, performance_fcn_params={'ctrl_bits': 5}, do_ga=True) return ACS2(cfg), mp
def start_single_experiment(env, trials, **kwargs): env.reset() cfg = Configuration(**kwargs) agent = ACS2(cfg) population, metrics = agent.explore_exploit(env, trials) metrics_df = parse_metrics(metrics) return population, metrics_df
def test_should_be_no_duplicated_classifiers_without_ga(self, mp): # given cfg = Configuration(mp.env.observation_space.n, 2, environment_adapter=MultiplexerAdapter(), do_ga=False) agent = ACS2(cfg) # when population, metrics = agent.explore(mp, 10) # then assert count_macroclassifiers(population) == len(set(population))
def test_should_be_no_duplicated_classifiers_without_ga(self, mp): # given cfg = Configuration(mp.env.observation_space.n, 2, perception_mapper_fcn=_map_perception, do_ga=False) agent = ACS2(cfg) # when population, metrics = agent.explore(mp, 10) # then assert count_macroclassifiers(population) == len(set(population))
def test_should_evaluate_knowledge(self, mp): # given cfg = Configuration(mp.env.observation_space.n, 2, do_ga=False, environment_adapter=MultiplexerAdapter()) agent = ACS2(cfg) # when population, metrics = agent.explore(mp, 10) # then for metric in metrics: assert metric['reward'] in {0, 1000}
def test_should_evaluate_knowledge(self, mp): # given cfg = Configuration(mp.env.observation_space.n, 2, do_ga=False, perception_mapper_fcn=_map_perception, performance_fcn=calculate_performance, performance_fcn_params={'ctrl_bits': 2}) agent = ACS2(cfg) # when population, metrics = agent.explore(mp, 10) # then for metric in metrics: assert metric['performance']['was_correct'] in {0, 1}
def test_should_gain_knowledge(self, env): # given cfg = Configuration(env.observation_space.n, env.action_space.n, epsilon=1.0, do_ga=False, do_action_planning=True, action_planning_frequency=50, metrics_trial_frequency=1, user_metrics_collector_fcn=handeye_metrics) agent = ACS2(cfg) # when population, metrics = agent.explore(env, 20) # then assert metrics[-1]['knowledge'] > 0.0 assert metrics[-1]['with_block'] > 0.0 assert metrics[-1]['no_block'] > 0.0
def test_should_evaluate_knowledge(self, env): # given cfg = Configuration(env.observation_space.n, env.action_space.n, epsilon=1.0, do_ga=False, do_action_planning=True, action_planning_frequency=50, user_metrics_collector_fcn=handeye_metrics) agent = ACS2(cfg) # when population, metrics = agent.explore(env, 10) # then for metric in metrics: assert 0.0 <= metric['knowledge'] <= 100.0 assert 0.0 <= metric['with_block'] <= 100.0 assert 0.0 <= metric['no_block'] <= 100.0
def test_should_traverse(self, env): # given cfg = Configuration(8, 8, epsilon=1.0, do_ga=False, performance_fcn=calculate_performance) agent = ACS2(cfg) # when population, metrics = agent.explore(env, 300) # then assert 90 < count_macroclassifiers(population) < 200 assert 100 == self._get_knowledge(metrics) assert count_macroclassifiers(population) == count_reliable(population) assert count_macroclassifiers(population) \ == count_microclassifiers(population) assert self._get_total_steps(metrics) > 5000
class MultiplexerAdapter(EnvironmentAdapter): @staticmethod def to_genotype(env_state): return [str(x) for x in env_state] if __name__ == '__main__': # Load desired environment mp = gym.make('boolean-multiplexer-6bit-v0') # Create agent cfg = Configuration(mp.env.observation_space.n, 2, do_ga=False, environment_adapter=MultiplexerAdapter(), metrics_trial_frequency=50, user_metrics_collector_fcn=mpx_metrics) agent = ACS2(cfg) # Explore the environment population, explore_metrics = agent.explore(mp, 1500) # Exploit the environment agent = ACS2(cfg, population) population, exploit_metrics = agent.exploit(mp, 50) # See how it went for metric in explore_metrics: print(metric)
number_of_possible_actions=4, epsilon=0.9, beta=0.2, gamma=0.95, theta_i=0.1, theta_as=50, theta_exp=50, theta_ga=50, do_ga=False, mu=0.04, u_max=2, metrics_trial_frequency=5, user_metrics_collector_fcn=grid_metrics) # Explore the environment agent1 = ACS2(cfg) population, explore_metrics = agent1.explore(grid, 500, decay=False) for cl in sorted(population, key=lambda c: -c.fitness): if cl.does_anticipate_change(): print_cl(cl) # Exploit agent2 = ACS2(cfg, population) pop_exploit, metric_exploit = agent2.exploit(grid, 30) # Print classifiers for cl in sorted(pop_exploit, key=lambda c: -c.fitness): if cl.does_anticipate_change(): print_cl(cl)
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("-e", "--environment", default="Maze4-v0") parser.add_argument("--epsilon", default=1.0, type=float) parser.add_argument("--ga", action="store_true") parser.add_argument("--explore-trials", default=50, type=int) parser.add_argument("--exploit-trials", default=10, type=int) args = parser.parse_args() maze = gym.make(args.environment) freq = 10 plot = pl.Plots('conf.txt', freq) cfg = Configuration(8, 8, epsilon=args.epsilon, do_ga=args.ga, user_metrics_collector_fcn=plot.get_logger(), metrics_trial_frequency=freq) agent = ACS2(cfg) for i in range(3): population, m = agent.explore(maze, 200, decay=False) plot.add_data("mix", f'explore {i+1}', m) population, m = agent.exploit(maze, 200) plot.add_data("mix", f'exploit {i+1}', m) plot.add_data("mix_no_div", '') plot.draw() plot.save_datasets('test.csv')
def run(environment, explore_trials, exploit_trials, position_bins, velocity_bins, biased_exploration_prob, decay, gamma): bins = [position_bins, velocity_bins] with mlflow.start_run(): logging.info("Initializing environment...") env = gym.make(environment) env._max_episode_steps = 1000 logging.info("Creating real-value discretizer...") _range, _low = (env.observation_space.high - env.observation_space.low, env.observation_space.low) class MountainCarAdapter(EnvironmentAdapter): @classmethod def to_genotype(cls, obs): r = (obs + np.abs(_low)) / _range b = (r * bins).astype(int) return b.astype(str).tolist() logging.info("Creating custom metrics") def mc_metrics(pop, env): metrics = { 'avg_fitness': np.mean([cl.fitness for cl in pop if cl.is_reliable()]) } metrics.update(population_metrics(pop, env)) return metrics logging.info("Building agent configuration...") cfg = Configuration(classifier_length=2, number_of_possible_actions=3, epsilon=1.0, biased_exploration=biased_exploration_prob, beta=0.2, gamma=gamma, theta_as=50, theta_exp=100, theta_ga=50, do_ga=True, mu=0.03, chi=0.0, metrics_trial_frequency=5, user_metrics_collector_fcn=mc_metrics, environment_adapter=MountainCarAdapter) trials = int(explore_trials / 2) logging.info( f"Running {trials} experiments with pure exploration (decay = False)" ) agent = ACS2(cfg) population, metrics_1 = agent.explore(env, trials, decay=False) logging.info( f"Running {trials} experiments with optional decay (decay = {decay})" ) agent = ACS2(cfg, population) population, metrics_2 = agent.explore(env, trials, decay=decay) logging.info("Generating metrics...") explore_metrics_df = merge_metrics(metrics_1, metrics_2, cfg) log_metrics("explore", explore_metrics_df) logging.info("Logging population artifact...") log_population_artifact("explore-population", population) log_metrics_artifact("explore-metrics", explore_metrics_df) logging.info("Generating plots...") avg_window = int(trials / 100) plot_steps_in_trial("explore-steps.png", explore_metrics_df, window=avg_window) plot_avg_fitness("explore-fitness.png", explore_metrics_df, window=avg_window) plot_reward("explore-reward.png", explore_metrics_df, window=avg_window) plot_classifiers("explore-classifiers.png", explore_metrics_df, window=avg_window) logging.info(f"Running {exploit_trials} exploit trials") exploiter = ACS2(cfg, deepcopy(population)) population_exploit, metrics_3 = agent.exploit(env, exploit_trials) logging.info("Generating metrics") exploit_metrics_df = metrics_to_df(metrics_3) log_metrics_artifact("exploit-metrics", exploit_metrics_df) log_metrics("exploit", exploit_metrics_df)