def learn_evolutionary(): base_agent = MiniSoccerAgent(rl.FeatureSet([])) sample_state = base_agent.environment.generate_start_state() state_vars = sample_state.state_variables featurizer_retile = rl.FeaturizerRetile(state_vars) featurizer_interaction = rl.FeaturizerInteraction(state_vars) featurizer_angle = rl.FeaturizerAngle(state_vars) featurizer_dist = rl.FeaturizerDist(state_vars) featurizer_dist_x = rl.FeaturizerDistX(state_vars) featurizer_dist_y = rl.FeaturizerDistY(state_vars) featurizer_flag = rl.FeaturizerFlag(state_vars) featurizer_point_xy = rl.FeaturizerPointXY(state_vars) featurizer_point_x = rl.FeaturizerPointX(state_vars) featurizer_point_y = rl.FeaturizerPointY(state_vars) featurizers_map = [ (0.12, featurizer_retile), (0.15, featurizer_interaction), (0.10, featurizer_flag), (0.16, featurizer_angle), (0.12, featurizer_dist), (0.09, featurizer_dist_x), (0.09, featurizer_dist_y), (0.07, featurizer_point_xy), (0.05, featurizer_point_x), (0.05, featurizer_point_y) ] # featurizers_map = [(0.15, featurizer_retile), # (0.10, featurizer_interaction), # (0.10, featurizer_flag), # (0.20, featurizer_angle), # (0.15, featurizer_dist), # (0.10, featurizer_dist_x), # (0.10, featurizer_dist_y), # (0.10, featurizer_point_xy), # (0.0, featurizer_point_x), # (0.0, featurizer_point_y) # ] arbitrator = rl.ArbitratorEvolutionary(base_agent, featurizers_map, NUM_GENERATIONS, POPULATION_SIZE, NUM_GENERATION_EPISODES, NUM_CHAMPION_TRIALS, NUM_BEST_CHAMPION_EPISODES, NUM_BEST_CHAMPION_TRIALS, rl.DEFAULT_ETA) arbitrator.run(MAX_STEPS)
def learn_evolutionary(): base_agent = MiniSoccerAgent(rl.FeatureSet([])) sample_state = base_agent.environment.generate_start_state() state_vars = sample_state.state_variables featurizer_retile = rl.FeaturizerRetile(state_vars) featurizer_interaction = rl.FeaturizerInteraction(state_vars) featurizer_angle = rl.FeaturizerAngle(state_vars) featurizer_dist = rl.FeaturizerDist(state_vars) featurizer_dist_x = rl.FeaturizerDistX(state_vars) featurizer_dist_y = rl.FeaturizerDistY(state_vars) featurizer_flag = rl.FeaturizerFlag(state_vars) featurizer_point_xy = rl.FeaturizerPointXY(state_vars) featurizer_point_x = rl.FeaturizerPointX(state_vars) featurizer_point_y = rl.FeaturizerPointY(state_vars) featurizers_map = [(0.12, featurizer_retile), (0.15, featurizer_interaction), (0.05, featurizer_flag), (0.20, featurizer_angle), (0.08, featurizer_dist), (0.08, featurizer_dist_x), (0.08, featurizer_dist_y), (0.08, featurizer_point_xy), (0.08, featurizer_point_x), (0.08, featurizer_point_y)] featurizers_map = [(0.15, featurizer_retile), (0.10, featurizer_interaction), (0.10, featurizer_flag), (0.20, featurizer_angle), (0.15, featurizer_dist), (0.10, featurizer_dist_x), (0.10, featurizer_dist_y), (0.10, featurizer_point_xy), (0.0, featurizer_point_x), (0.0, featurizer_point_y)] sum_prob = 0.0 for (prob, featurizer) in featurizers_map: sum_prob += prob print "Initialized %d featurizers, sum of selection probabilities: %f" % ( len(featurizers_map), sum_prob) arbitrator = rl.ArbitratorEvolutionary(base_agent, featurizers_map, NUM_GENERATIONS, POPULATION_SIZE, GENERATION_EPISODES) arbitrator.execute(MAX_STEPS)
def learn_evolutionary(): base_agent = MiniSoccerAgent(rl.FeatureSet([])) sample_state = base_agent.environment.generate_start_state() state_vars = sample_state.state_variables featurizer_retile = rl.FeaturizerRetile(state_vars) featurizer_angle = rl.FeaturizerAngle(state_vars) featurizer_dist = rl.FeaturizerDist(state_vars) featurizer_dist_x = rl.FeaturizerDistX(state_vars) featurizer_dist_y = rl.FeaturizerDistY(state_vars) featurizer_flag = rl.FeaturizerFlag(state_vars) featurizer_point2d = rl.FeaturizerPoint2D(state_vars) featurizers_map = [(0.20, featurizer_retile), (0.35, featurizer_dist), (0.50, featurizer_dist_x), (0.65, featurizer_dist_y), (0.80, featurizer_angle), (0.90, featurizer_point2d), (1.00, featurizer_flag)] arbitrator = rl.ArbitratorEvolutionary(base_agent, featurizers_map, NUM_GENERATIONS, POPULATION_SIZE, GENERATION_EPISODES) arbitrator.execute(MAX_STEPS)
def learn_evolutionary(): base_agent = KeepAwayAgent(rl.FeatureSet([])) sample_state = base_agent.environment.generate_start_state() state_vars = sample_state.state_variables # retile_featurizer = rl.FeaturizerRetile(state_vars) # angle_featurizer = rl.FeaturizerAngle(state_vars) # dist_featurizer = rl.FeaturizerDist(state_vars) # # featurizers_map = [(0.2, retile_featurizer), # (0.4, angle_featurizer), # (0.4, dist_featurizer)] featurizer_retile = rl.FeaturizerRetile(state_vars) featurizer_interaction = rl.FeaturizerInteraction(state_vars) featurizer_angle = rl.FeaturizerAngle(state_vars) featurizer_dist = rl.FeaturizerDist(state_vars) featurizer_dist_x = rl.FeaturizerDistX(state_vars) featurizer_dist_y = rl.FeaturizerDistY(state_vars) # featurizer_flag = rl.FeaturizerFlag(state_vars) featurizer_point_xy = rl.FeaturizerPointXY(state_vars) featurizer_point_x = rl.FeaturizerPointX(state_vars) featurizer_point_y = rl.FeaturizerPointY(state_vars) featurizers_map = [(0.16, featurizer_retile), (0.15, featurizer_interaction), (0.16, featurizer_angle), (0.13, featurizer_dist), (0.11, featurizer_dist_x), (0.11, featurizer_dist_y), (0.08, featurizer_point_xy), (0.05, featurizer_point_x), (0.05, featurizer_point_y)] arbitrator = rl.ArbitratorEvolutionary( base_agent, featurizers_map, NUM_GENERATIONS, POPULATION_SIZE, NUM_GENERATION_EPISODES, NUM_CHAMPION_TRIALS, NUM_BEST_CHAMPION_EPISODES, NUM_BEST_CHAMPION_TRIALS, rl.DEFAULT_ETA) arbitrator.run()