示例#1
0
def run(arguments):

    parser = argparse.ArgumentParser(description='Perform linear GP evolution for a given environment.')

    # Program configuration
    parser.add_argument('--minps', dest='min_prog_size', type=int, help='Minimum number of instructions per Program', default=32)
    parser.add_argument('--maxps', dest='max_prog_size', type=int, help='Maximum number of instructions per Program', default=1024)
    parser.add_argument('--padd', dest='padd', type=float, help='Instruction addition strength', default=0.7)
    parser.add_argument('--pdel', dest='pdel', type=float, help='Instruction deletion strength', default=0.7)
    parser.add_argument('--pmut', dest='pmut', type=float, help='Instruction mutation strength', default=0.7)

    # Trainer configuration
    parser.add_argument('--generations', dest='num_generations', type=int, help='Number of generations over which evolution is performed', default=50)
    parser.add_argument('--pop', dest='population_size', type=int, help='Learner population size', default=200)
    parser.add_argument('--keep', dest='percent_keep', type=float, help='Percentage of surviving Learners', default=0.3)
    parser.add_argument('--fast', dest='fast_mode', type=bool, help='Skip some re-evaluations', default=True)
    parser.add_argument('--skips', dest='num_skips', type=int, help='Number of generations over which to skip re-evaluation', default=3)
    parser.add_argument('--episodes', dest='num_eps_per_gen', type=int, help='Number of episodes over which an agent is evaluated each generation', default=3)
    parser.add_argument('--verbose', dest='verbose', type=bool, help='Do print out info to the console during evolution', default=True)
    parser.add_argument('--agent', dest='agent_save_name', type=str, help='Name under which to save the evolved agent', default="")

    # Environment configuration
    parser.add_argument('--env', dest='env', type=str, help='OpenAI environment', default="CartPole-v1")
    parser.add_argument('--statespace', dest='statespace', type=int, help='Length of flattened state space', default=4)
    args = parser.parse_args(arguments)

    if args.env != "CartPole-v0" and args.env != "CartPole-v1":
        print("Woops! So far this module only works in the CartPole environment!")
        return

    if args.statespace != 4:
        print("Woops! So far this module only works in the CartPole environment with a statespace size of 4!")
        return

    ConfigureProgram(
        num_inputs      = args.statespace,
        min_prog_size   = args.min_prog_size,
        max_prog_size   = args.max_prog_size,
        p_add           = args.padd,
        p_del           = args.pdel,
        p_mut           = args.pmut)

    ConfigureTrainer(
        num_generations     = args.num_generations,
        population_size     = args.population_size,
        percent_keep        = args.percent_keep,
        fast_mode           = args.fast_mode,
        max_num_skips       = args.num_skips,
        num_eps_per_gen     = args.num_eps_per_gen,
        verbose             = args.verbose,
        agent_save_name     = args.agent_save_name)

    env = gym.make(args.env)
    trainer = Trainer(env)
    trainer.evolve()
示例#2
0
def run(arguments):
    if len(arguments) == 0:
        print("ERROR - No arguments given to main")
        sys.exit(0)

    # Setup the command line parsing to read the environment title
    parser = argparse.ArgumentParser(
        description='Perform TPG evolution for a given environment.')
    parser.add_argument('--env',
                        dest='env',
                        type=str,
                        help='OpenAI environment',
                        default="CartPole-v1")
    parser.add_argument('--trainer',
                        dest='trainer_fname',
                        type=str,
                        help='Previously saved Trainer object',
                        default="")
    parser.add_argument('--trainer_savename',
                        dest='trainer_savename',
                        type=str,
                        help='Name under which to save the Trainer object',
                        default="")
    parser.add_argument('--agent_savename',
                        dest='agent_savename',
                        type=str,
                        help='Name under which to save the top Agent object',
                        default="")
    parser.add_argument('--training_results_savename',
                        dest='training_results_savename',
                        type=str,
                        help='Name under which to save the training results',
                        default="")
    parser.add_argument('--generations',
                        dest='num_generations',
                        type=int,
                        help='Number of generations',
                        default=500)
    parser.add_argument('--episodes',
                        dest='num_episodes',
                        type=int,
                        help='Number of episodes per agent at each generation',
                        default=1)
    parser.add_argument('--pop',
                        dest='r_size',
                        type=int,
                        help='Number of agents (root teams) per generation',
                        default=200)
    parser.add_argument('--frames',
                        dest='num_frames',
                        type=int,
                        help='Max number of frames per episode',
                        default=18000)
    parser.add_argument('--seed',
                        dest='seed',
                        type=int,
                        help='Seed for environment',
                        default=-1)
    parser.add_argument('--verbose',
                        dest='verbose',
                        type=bool,
                        help="Print results to the console as we evolve",
                        default=False)
    parser.add_argument('--fast',
                        dest='fast',
                        type=bool,
                        help="Set to True to skip re-evaluating agents",
                        default=False)
    parser.add_argument(
        '--skips',
        dest='skips',
        type=int,
        help='Maximum number of times an agent can skip re-evaluation',
        default=0)
    parser.add_argument('--action_type',
                        dest='action_type',
                        type=int,
                        help='0 = Standard, 1 = Real-valued',
                        default=0)
    args = parser.parse_args(arguments)

    # Get environment details
    env = gym.make(args.env)

    if args.action_type == 0:
        num_actions = env.action_space.n
    elif args.action_type == 1:
        num_actions = 100  # This number is meaningless when choosing real-valued actions
        # but the Team initialization doesn't like anything less than 2.
        # In fact, the bigger the number, the faster Team intialization.
    else:
        print("Invalid action_type argument {}".format(args.action_type))

    input_size = reduce(lambda x, y: x * y, env.observation_space.shape)

    configureTrainer(
        atomic_action_range=num_actions,
        p_del_learner=0.7,
        p_add_learner=0.7,
        p_mut_learner=0.3,
        p_mut_learner_action=0.6,
        p_atomic=0.5,
        r_size=args.r_size,
        percent_keep=0.40,
        env_name=args.env,
        trainer_name=args.trainer_savename,
        agent_name=args.agent_savename,
        verbose=args.verbose,
        env_seed=args.seed,
        do_fast_track=args.fast,
        max_eval_skips=args.skips,
        action_selection=args.action_type,
        training_results_nparray_name=args.training_results_savename)

    configureProgram(p_add=0.6,
                     p_del=0.6,
                     p_mut=0.6,
                     p_swap=0.6,
                     max_prog_size=128,
                     min_prog_size=8,
                     num_registers=8,
                     input_size=input_size)

    # If trainer filename was passed in, re-create Trainer. Otherwise, create
    # a new one from scratch
    if args.trainer_fname == "":
        # Create Trainer
        trainer = Trainer()

        # Perform TPG initialization (ie. create initial Team and Learner populations)
        trainer.initialize()
    else:
        # Load Trainer
        trainer = loadTrainer(args.trainer_fname)

    # Try to generate an agent
    trainer.evolve(env,
                   num_generations=args.num_generations,
                   num_episodes=args.num_episodes,
                   num_frames=args.num_frames)
示例#3
0
def run(arguments):

    parser = argparse.ArgumentParser(
        description='Perform linear GP evolution for a given environment.')

    # Program configuration
    parser.add_argument('--minps',
                        dest='min_prog_size',
                        type=int,
                        help='Minimum number of instructions per Program',
                        default=32)
    parser.add_argument('--maxps',
                        dest='max_prog_size',
                        type=int,
                        help='Maximum number of instructions per Program',
                        default=1024)
    parser.add_argument('--padd',
                        dest='padd',
                        type=float,
                        help='Instruction addition strength',
                        default=0.7)
    parser.add_argument('--pdel',
                        dest='pdel',
                        type=float,
                        help='Instruction deletion strength',
                        default=0.7)
    parser.add_argument('--pmut',
                        dest='pmut',
                        type=float,
                        help='Instruction mutation strength',
                        default=0.7)

    # Trainer configuration
    parser.add_argument(
        '--generations',
        dest='num_generations',
        type=int,
        help='Number of generations over which evolution is performed',
        default=50)
    parser.add_argument('--pop',
                        dest='population_size',
                        type=int,
                        help='Learner population size',
                        default=200)
    parser.add_argument('--keep',
                        dest='percent_keep',
                        type=float,
                        help='Percentage of surviving Learners',
                        default=0.3)
    parser.add_argument('--fast',
                        dest='fast_mode',
                        type=bool,
                        help='Skip some re-evaluations',
                        default=True)
    parser.add_argument(
        '--skips',
        dest='num_skips',
        type=int,
        help='Number of generations over which to skip re-evaluation',
        default=3)
    parser.add_argument(
        '--episodes',
        dest='num_eps_per_gen',
        type=int,
        help=
        'Number of episodes over which an agent is evaluated each generation',
        default=3)
    parser.add_argument(
        '--verbose',
        dest='verbose',
        type=bool,
        help='Do print out info to the console during evolution',
        default=True)
    parser.add_argument('--agent',
                        dest='agent_save_name',
                        type=str,
                        help='Name under which to save the evolved agent',
                        default="")
    parser.add_argument('--fitness_sharing',
                        dest='fitness_sharing',
                        type=bool,
                        help='Use fitness sharing to evaluate learners',
                        default=False)

    # Environment configuration
    parser.add_argument('--env',
                        dest='env',
                        type=str,
                        help='OpenAI environment',
                        default="CartPole-v1")
    parser.add_argument('--statespace',
                        dest='statespace',
                        type=int,
                        help='Length of flattened state space',
                        default=4)
    args = parser.parse_args(arguments)

    # Set env
    env, args = set_env(args)

    ConfigureProgram(num_inputs=args.statespace,
                     min_prog_size=args.min_prog_size,
                     max_prog_size=args.max_prog_size,
                     p_add=args.padd,
                     p_del=args.pdel,
                     p_mut=args.pmut)

    ConfigureTrainer(num_generations=args.num_generations,
                     population_size=args.population_size,
                     percent_keep=args.percent_keep,
                     fast_mode=args.fast_mode,
                     max_num_skips=args.num_skips,
                     num_eps_per_gen=args.num_eps_per_gen,
                     verbose=args.verbose,
                     agent_save_name=args.agent_save_name,
                     output_folder="../lgp-outputs/",
                     env_name=args.env,
                     fitness_sharing=args.fitness_sharing)

    trainer = Trainer(env)
    trainer.evolve()