def run(arguments): parser = argparse.ArgumentParser(description='Perform linear GP evolution for a given environment.') # Program configuration parser.add_argument('--minps', dest='min_prog_size', type=int, help='Minimum number of instructions per Program', default=32) parser.add_argument('--maxps', dest='max_prog_size', type=int, help='Maximum number of instructions per Program', default=1024) parser.add_argument('--padd', dest='padd', type=float, help='Instruction addition strength', default=0.7) parser.add_argument('--pdel', dest='pdel', type=float, help='Instruction deletion strength', default=0.7) parser.add_argument('--pmut', dest='pmut', type=float, help='Instruction mutation strength', default=0.7) # Trainer configuration parser.add_argument('--generations', dest='num_generations', type=int, help='Number of generations over which evolution is performed', default=50) parser.add_argument('--pop', dest='population_size', type=int, help='Learner population size', default=200) parser.add_argument('--keep', dest='percent_keep', type=float, help='Percentage of surviving Learners', default=0.3) parser.add_argument('--fast', dest='fast_mode', type=bool, help='Skip some re-evaluations', default=True) parser.add_argument('--skips', dest='num_skips', type=int, help='Number of generations over which to skip re-evaluation', default=3) parser.add_argument('--episodes', dest='num_eps_per_gen', type=int, help='Number of episodes over which an agent is evaluated each generation', default=3) parser.add_argument('--verbose', dest='verbose', type=bool, help='Do print out info to the console during evolution', default=True) parser.add_argument('--agent', dest='agent_save_name', type=str, help='Name under which to save the evolved agent', default="") # Environment configuration parser.add_argument('--env', dest='env', type=str, help='OpenAI environment', default="CartPole-v1") parser.add_argument('--statespace', dest='statespace', type=int, help='Length of flattened state space', default=4) args = parser.parse_args(arguments) if args.env != "CartPole-v0" and args.env != "CartPole-v1": print("Woops! So far this module only works in the CartPole environment!") return if args.statespace != 4: print("Woops! So far this module only works in the CartPole environment with a statespace size of 4!") return ConfigureProgram( num_inputs = args.statespace, min_prog_size = args.min_prog_size, max_prog_size = args.max_prog_size, p_add = args.padd, p_del = args.pdel, p_mut = args.pmut) ConfigureTrainer( num_generations = args.num_generations, population_size = args.population_size, percent_keep = args.percent_keep, fast_mode = args.fast_mode, max_num_skips = args.num_skips, num_eps_per_gen = args.num_eps_per_gen, verbose = args.verbose, agent_save_name = args.agent_save_name) env = gym.make(args.env) trainer = Trainer(env) trainer.evolve()
def run(arguments): if len(arguments) == 0: print("ERROR - No arguments given to main") sys.exit(0) # Setup the command line parsing to read the environment title parser = argparse.ArgumentParser( description='Perform TPG evolution for a given environment.') parser.add_argument('--env', dest='env', type=str, help='OpenAI environment', default="CartPole-v1") parser.add_argument('--trainer', dest='trainer_fname', type=str, help='Previously saved Trainer object', default="") parser.add_argument('--trainer_savename', dest='trainer_savename', type=str, help='Name under which to save the Trainer object', default="") parser.add_argument('--agent_savename', dest='agent_savename', type=str, help='Name under which to save the top Agent object', default="") parser.add_argument('--training_results_savename', dest='training_results_savename', type=str, help='Name under which to save the training results', default="") parser.add_argument('--generations', dest='num_generations', type=int, help='Number of generations', default=500) parser.add_argument('--episodes', dest='num_episodes', type=int, help='Number of episodes per agent at each generation', default=1) parser.add_argument('--pop', dest='r_size', type=int, help='Number of agents (root teams) per generation', default=200) parser.add_argument('--frames', dest='num_frames', type=int, help='Max number of frames per episode', default=18000) parser.add_argument('--seed', dest='seed', type=int, help='Seed for environment', default=-1) parser.add_argument('--verbose', dest='verbose', type=bool, help="Print results to the console as we evolve", default=False) parser.add_argument('--fast', dest='fast', type=bool, help="Set to True to skip re-evaluating agents", default=False) parser.add_argument( '--skips', dest='skips', type=int, help='Maximum number of times an agent can skip re-evaluation', default=0) parser.add_argument('--action_type', dest='action_type', type=int, help='0 = Standard, 1 = Real-valued', default=0) args = parser.parse_args(arguments) # Get environment details env = gym.make(args.env) if args.action_type == 0: num_actions = env.action_space.n elif args.action_type == 1: num_actions = 100 # This number is meaningless when choosing real-valued actions # but the Team initialization doesn't like anything less than 2. # In fact, the bigger the number, the faster Team intialization. else: print("Invalid action_type argument {}".format(args.action_type)) input_size = reduce(lambda x, y: x * y, env.observation_space.shape) configureTrainer( atomic_action_range=num_actions, p_del_learner=0.7, p_add_learner=0.7, p_mut_learner=0.3, p_mut_learner_action=0.6, p_atomic=0.5, r_size=args.r_size, percent_keep=0.40, env_name=args.env, trainer_name=args.trainer_savename, agent_name=args.agent_savename, verbose=args.verbose, env_seed=args.seed, do_fast_track=args.fast, max_eval_skips=args.skips, action_selection=args.action_type, training_results_nparray_name=args.training_results_savename) configureProgram(p_add=0.6, p_del=0.6, p_mut=0.6, p_swap=0.6, max_prog_size=128, min_prog_size=8, num_registers=8, input_size=input_size) # If trainer filename was passed in, re-create Trainer. Otherwise, create # a new one from scratch if args.trainer_fname == "": # Create Trainer trainer = Trainer() # Perform TPG initialization (ie. create initial Team and Learner populations) trainer.initialize() else: # Load Trainer trainer = loadTrainer(args.trainer_fname) # Try to generate an agent trainer.evolve(env, num_generations=args.num_generations, num_episodes=args.num_episodes, num_frames=args.num_frames)
def run(arguments): parser = argparse.ArgumentParser( description='Perform linear GP evolution for a given environment.') # Program configuration parser.add_argument('--minps', dest='min_prog_size', type=int, help='Minimum number of instructions per Program', default=32) parser.add_argument('--maxps', dest='max_prog_size', type=int, help='Maximum number of instructions per Program', default=1024) parser.add_argument('--padd', dest='padd', type=float, help='Instruction addition strength', default=0.7) parser.add_argument('--pdel', dest='pdel', type=float, help='Instruction deletion strength', default=0.7) parser.add_argument('--pmut', dest='pmut', type=float, help='Instruction mutation strength', default=0.7) # Trainer configuration parser.add_argument( '--generations', dest='num_generations', type=int, help='Number of generations over which evolution is performed', default=50) parser.add_argument('--pop', dest='population_size', type=int, help='Learner population size', default=200) parser.add_argument('--keep', dest='percent_keep', type=float, help='Percentage of surviving Learners', default=0.3) parser.add_argument('--fast', dest='fast_mode', type=bool, help='Skip some re-evaluations', default=True) parser.add_argument( '--skips', dest='num_skips', type=int, help='Number of generations over which to skip re-evaluation', default=3) parser.add_argument( '--episodes', dest='num_eps_per_gen', type=int, help= 'Number of episodes over which an agent is evaluated each generation', default=3) parser.add_argument( '--verbose', dest='verbose', type=bool, help='Do print out info to the console during evolution', default=True) parser.add_argument('--agent', dest='agent_save_name', type=str, help='Name under which to save the evolved agent', default="") parser.add_argument('--fitness_sharing', dest='fitness_sharing', type=bool, help='Use fitness sharing to evaluate learners', default=False) # Environment configuration parser.add_argument('--env', dest='env', type=str, help='OpenAI environment', default="CartPole-v1") parser.add_argument('--statespace', dest='statespace', type=int, help='Length of flattened state space', default=4) args = parser.parse_args(arguments) # Set env env, args = set_env(args) ConfigureProgram(num_inputs=args.statespace, min_prog_size=args.min_prog_size, max_prog_size=args.max_prog_size, p_add=args.padd, p_del=args.pdel, p_mut=args.pmut) ConfigureTrainer(num_generations=args.num_generations, population_size=args.population_size, percent_keep=args.percent_keep, fast_mode=args.fast_mode, max_num_skips=args.num_skips, num_eps_per_gen=args.num_eps_per_gen, verbose=args.verbose, agent_save_name=args.agent_save_name, output_folder="../lgp-outputs/", env_name=args.env, fitness_sharing=args.fitness_sharing) trainer = Trainer(env) trainer.evolve()