def testOutput(self): g_0 = sprite_generators.generate_sprites(_distrib_0, num_sprites=1) g_1 = sprite_generators.generate_sprites(_distrib_1, num_sprites=1) g_chain = sprite_generators.sample_generator((g_0, g_1)) sprite_list = g_chain() self.assertIsInstance(sprite_list, list) self.assertLen(sprite_list, 1) self.assertNotEqual(_distrib_0.contains(sprite_list[0].factors), _distrib_1.contains(sprite_list[0].factors))
def testOutput(self): g_0 = sprite_generators.generate_sprites(_distrib_0, num_sprites=1) g_1 = sprite_generators.generate_sprites(_distrib_1, num_sprites=2) g_chain = sprite_generators.chain_generators(g_0, g_1) sprite_list = g_chain() self.assertIsInstance(sprite_list, list) self.assertLen(sprite_list, 3) self.assertTrue(_distrib_0.contains(sprite_list[0].factors)) self.assertTrue(_distrib_1.contains(sprite_list[1].factors)) self.assertTrue(_distrib_1.contains(sprite_list[2].factors))
def get_config(mode='train'): """Generate environment config. Args: mode: 'train' or 'test'. Returns: config: Dictionary defining task/environment configuration. Can be fed as kwargs to environment.Environment. """ shared_factors = distribs.Product([ distribs.Discrete('shape', ['square', 'triangle', 'circle']), distribs.Discrete('scale', [0.13]), distribs.Continuous('c1', 0.3, 1.), distribs.Continuous('c2', 0.9, 1.), ]) target_hue = distribs.Continuous('c0', 0., 0.4) distractor_hue = distribs.Continuous('c0', 0.5, 0.9) target_factors = distribs.Product([ MODES_TARGET_POSITIONS[mode], target_hue, shared_factors, ]) distractor_factors = distribs.Product([ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), distractor_hue, shared_factors, ]) target_sprite_gen = sprite_generators.generate_sprites( target_factors, num_sprites=NUM_TARGETS) distractor_sprite_gen = sprite_generators.generate_sprites( distractor_factors, num_sprites=NUM_DISTRACTORS) sprite_gen = sprite_generators.chain_generators(target_sprite_gen, distractor_sprite_gen) # Randomize sprite ordering to eliminate any task information from occlusions sprite_gen = sprite_generators.shuffle(sprite_gen) task = tasks.FindGoalPosition(filter_distrib=target_hue, terminate_distance=TERMINATE_DISTANCE) config = { 'task': task, 'action_space': common.action_space(), 'renderers': common.renderers(), 'init_sprites': sprite_gen, 'max_episode_length': 20, 'metadata': { 'name': os.path.basename(__file__), 'mode': mode } } return config
def testGenerateSpritesLengthType(self, num_sprites): g = sprite_generators.generate_sprites(_distrib_0, num_sprites=num_sprites) sprite_list = g() self.assertIsInstance(sprite_list, list) self.assertLen(sprite_list, num_sprites) self.assertIsInstance(sprite_list[0], sprite.Sprite)
def get_config(mode='train'): """Generate environment config. Args: mode: 'train' or 'test'. Returns: config: Dictionary defining task/environment configuration. Can be fed as kwargs to environment.Environment. """ # Select clusters to use, and their c0 factor distribution. c0_clusters = [CLUSTERS_DISTS[cluster] for cluster in MODES[mode]] print('Clustering task: {}, #sprites: {}'.format(MODES[mode], NUM_SPRITES_PER_CLUSTER)) other_factors = distribs.Product([ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), distribs.Discrete('shape', ['square', 'triangle', 'circle']), distribs.Discrete('scale', [0.13]), distribs.Continuous('c1', 0.3, 1.), distribs.Continuous('c2', 0.9, 1.), ]) # Generate the sprites to be used in this task, by combining Hue with the # other factors. sprite_factors = [ distribs.Product((other_factors, c0)) for c0 in c0_clusters ] # Convert to sprites, generating the appropriate number per cluster. sprite_gen_per_cluster = [ sprite_generators.generate_sprites(factors, num_sprites=NUM_SPRITES_PER_CLUSTER) for factors in sprite_factors ] # Concat clusters into single scene to generate. sprite_gen = sprite_generators.chain_generators(*sprite_gen_per_cluster) # Randomize sprite ordering to eliminate any task information from occlusions sprite_gen = sprite_generators.shuffle(sprite_gen) # Clustering task will define rewards task = tasks.Clustering(c0_clusters, terminate_bonus=0., reward_range=10., sparse_reward=True) config = { 'task': task, 'action_space': common.noisy_action_space(), 'renderers': common.renderers(), 'init_sprites': sprite_gen, 'max_episode_length': MAX_EPISODE_LENGTH, 'metadata': { 'name': os.path.basename(__file__), 'mode': mode } } return config
def testGenerateSpritesCallableNum(self): minval = 3 maxval = 6 num_sprites = np.random.randint(minval, maxval) g = sprite_generators.generate_sprites(_distrib_0, num_sprites=num_sprites) sprite_list = g() self.assertGreaterEqual(len(sprite_list), minval) self.assertLess(len(sprite_list), maxval)
def get_config(mode='train'): """Generate environment config. Args: mode: 'train' or 'test'. Returns: config: Dictionary defining task/environment configuration. Can be fed as kwargs to environment.Environment. """ factors = distribs.Product([ MODES_SHAPES[mode], distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), distribs.Discrete('scale', [0.2]), distribs.Discrete('c0', [0.9, 0.55, 0.27], probs=[0.333, 0.334, 0.333]), distribs.Discrete('c1', [0.6]), distribs.Continuous('c2', 0.9, 1.), ]) sprite_gen = sprite_generators.generate_sprites(factors, num_sprites=NUM_TARGETS, fix_colors=True) # Randomize sprite ordering to eliminate any task information from occlusions sprite_gen = sprite_generators.shuffle(sprite_gen) task = tasks.FindGoalPosition(terminate_distance=TERMINATE_DISTANCE, sparse_reward=True) config = { 'task': task, 'action_space': common.noisy_action_space(MOTION_STD_DEV, PROPORTIONAL_MOTION_NOISE, None), 'renderers': common.renderers(), 'init_sprites': sprite_gen, 'max_episode_length': 60, 'metadata': { 'name': os.path.basename(__file__), 'mode': mode } } return config
def get_config(mode=None): """Generate environment config. Args: mode: Unused. Returns: config: Dictionary defining task/environment configuration. Can be fed as kwargs to environment.Environment. """ del mode # No train/test split for pure exploration factors = distribs.Product([ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), distribs.Discrete('shape', ['square', 'triangle', 'circle']), distribs.Discrete('scale', [0.13]), distribs.Continuous('c0', 0., 1.), distribs.Continuous('c1', 0.3, 1.), distribs.Continuous('c2', 0.9, 1.), ]) num_sprites = lambda: np.random.randint(1, 7) sprite_gen = sprite_generators.generate_sprites(factors, num_sprites=num_sprites) task = tasks.NoReward() config = { 'task': task, 'action_space': common.action_space(), 'renderers': common.renderers(), 'init_sprites': sprite_gen, 'max_episode_length': 10, 'metadata': { 'name': os.path.basename(__file__), 'mode': mode } } return config
def get_config(mode=None): """Generate environment config. Args: mode: Unused task mode. Returns: config: Dictionary defining task/environment configuration. Can be fed as kwargs to environment.Environment. """ del mode shared_factors = distribs.Product([ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), distribs.Discrete('shape', ['square', 'triangle', 'circle']), distribs.Discrete('scale', [0.13]), distribs.Continuous('c1', 0.3, 1.), distribs.Continuous('c2', 0.9, 1.), ]) target_hue = distribs.Continuous('c0', 0., 0.4) distractor_hue = distribs.Continuous('c0', 0.5, 0.9) target_factors = distribs.Product([ target_hue, shared_factors, ]) distractor_factors = distribs.Product([ distractor_hue, shared_factors, ]) target_sprite_gen = sprite_generators.generate_sprites( target_factors, num_sprites=NUM_TARGETS) distractor_sprite_gen = sprite_generators.generate_sprites( distractor_factors, num_sprites=NUM_DISTRACTORS) sprite_gen = sprite_generators.chain_generators(target_sprite_gen, distractor_sprite_gen) # Randomize sprite ordering to eliminate any task information from occlusions sprite_gen = sprite_generators.shuffle(sprite_gen) # Create the agent body agent_body_factors = distribs.Product([ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), distribs.Discrete('shape', ['circle']), distribs.Discrete('scale', [0.07]), distribs.Discrete('c0', [1.]), distribs.Discrete('c1', [0.]), distribs.Discrete('c2', [1.]), ]) agent_body_gen = sprite_generators.generate_sprites(agent_body_factors, num_sprites=1) sprite_gen = sprite_generators.chain_generators(sprite_gen, agent_body_gen) task = tasks.FindGoalPosition(filter_distrib=target_hue, terminate_distance=TERMINATE_DISTANCE) renderers = { 'image': spriteworld_renderers.PILRenderer(image_size=(64, 64), anti_aliasing=5, color_to_rgb=color_maps.hsv_to_rgb) } config = { 'task': task, 'action_space': action_spaces.Embodied(step_size=0.05), 'renderers': renderers, 'init_sprites': sprite_gen, 'max_episode_length': 50, 'metadata': { 'name': os.path.basename(__file__), 'mode': mode } } return config
def get_config(mode='train'): """Generate environment config. Args: mode: Unused task mode. Returns: config: Dictionary defining task/environment configuration. Can be fed as kwargs to environment.Environment. """ # Factor distributions common to all objects. common_factors = distribs.Product([ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), distribs.Continuous('angle', 0, 360, dtype='int32'), ]) # train/test split for goal-finding object scales and clustering object colors goal_finding_scale_test = distribs.Continuous('scale', 0.08, 0.12) green_blue_colors = distribs.Product([ distribs.Continuous('c1', 64, 256, dtype='int32'), distribs.Continuous('c2', 64, 256, dtype='int32'), ]) if mode == 'train': goal_finding_scale = distribs.SetMinus( distribs.Continuous('scale', 0.05, 0.15), goal_finding_scale_test, ) cluster_colors = distribs.Product([ distribs.Continuous('c0', 128, 256, dtype='int32'), green_blue_colors ]) elif mode == 'test': goal_finding_scale = goal_finding_scale_test cluster_colors = distribs.Product([ distribs.Continuous('c0', 0, 128, dtype='int32'), green_blue_colors ]) else: raise ValueError( 'Invalid mode {}. Mode must be "train" or "test".'.format(mode)) # Create clustering sprite generators sprite_gen_list = [] cluster_shapes = [ distribs.Discrete('shape', [s]) for s in ['triangle', 'square', 'pentagon'] ] for shape in cluster_shapes: factors = distribs.Product([ common_factors, cluster_colors, shape, distribs.Continuous('scale', 0.08, 0.12), ]) sprite_gen_list.append( sprite_generators.generate_sprites(factors, num_sprites=2)) # Create goal-finding sprite generators goal_finding_colors = [ distribs.Product([ distribs.Continuous('c0', 192, 256, dtype='int32'), distribs.Continuous('c1', 0, 128, dtype='int32'), distribs.Continuous('c2', 64, 128, dtype='int32'), ]), distribs.Product([ distribs.Continuous('c0', 0, 128, dtype='int32'), distribs.Continuous('c1', 192, 256, dtype='int32'), distribs.Continuous('c2', 64, 128, dtype='int32'), ]) ] # Goal positions corresponding to the colors in goal_finding_colors goal_finding_positions = [(0., 0.5), (1., 0.5)] goal_finding_shapes = distribs.Discrete('shape', ['spoke_4', 'star_4']) for colors in goal_finding_colors: factors = distribs.Product([ common_factors, goal_finding_scale, goal_finding_shapes, colors, ]) sprite_gen_list.append( sprite_generators.generate_sprites( factors, num_sprites=lambda: np.random.randint(1, 3))) # Create distractor sprite generator distractor_factors = distribs.Product([ common_factors, distribs.Discrete('shape', ['circle']), distribs.Continuous('c0', 64, 256, dtype='uint8'), distribs.Continuous('c1', 64, 256, dtype='uint8'), distribs.Continuous('c2', 64, 256, dtype='uint8'), distribs.Continuous('scale', 0.08, 0.12), ]) sprite_gen_list.append( sprite_generators.generate_sprites( distractor_factors, num_sprites=lambda: np.random.randint(0, 3))) # Concat clusters into single scene to generate sprite_gen = sprite_generators.chain_generators(*sprite_gen_list) # Randomize sprite ordering to eliminate any task information from occlusions sprite_gen = sprite_generators.shuffle(sprite_gen) # Create the combined task of goal-finding and clustering task_list = [] task_list.append( tasks.Clustering(cluster_shapes, terminate_bonus=0., reward_range=10.)) for colors, goal_pos in zip(goal_finding_colors, goal_finding_positions): goal_finding_task = tasks.FindGoalPosition(distribs.Product( [colors, goal_finding_shapes]), goal_position=goal_pos, weights_dimensions=(1, 0), terminate_distance=0.15, raw_reward_multiplier=30) task_list.append(goal_finding_task) task = tasks.MetaAggregated(task_list, reward_aggregator='sum', termination_criterion='all') renderers = { 'image': spriteworld_renderers.PILRenderer(image_size=(64, 64), anti_aliasing=5) } config = { 'task': task, 'action_space': action_spaces.SelectMove(scale=0.5), 'renderers': renderers, 'init_sprites': sprite_gen, 'max_episode_length': 50, 'metadata': { 'name': os.path.basename(__file__), 'mode': mode } } return config
def testOutput(self): g = sprite_generators.generate_sprites(_distrib_0, num_sprites=5) g_shuffle = sprite_generators.shuffle(g) sprite_list = g_shuffle() self.assertIsInstance(sprite_list, list) self.assertLen(sprite_list, 5)
def get_config(mode='train'): """Generate environment config. Args: mode: 'train' or 'test'. Returns: config: Dictionary defining task/environment configuration. Can be fed as kwargs to environment.Environment. """ # Create the subtasks and their corresponding sprite generators subtasks = [] sprite_gen_per_subtask = [] for subtask in SUBTASKS: subtasks.append( tasks.FindGoalPosition( filter_distrib=subtask['distrib'], goal_position=subtask['goal_position'], terminate_distance=TERMINATE_DISTANCE, raw_reward_multiplier=RAW_REWARD_MULTIPLIER)) factors = distribs.Product(( subtask['distrib'], distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), distribs.Discrete('shape', ['square', 'triangle', 'circle']), distribs.Discrete('scale', [0.13]), distribs.Continuous('c1', 0.3, 1.), distribs.Continuous('c2', 0.9, 1.), )) sprite_gen_per_subtask.append( sprite_generators.generate_sprites(factors, num_sprites=1)) # Consider all combinations of subtasks subtask_combos = list( itertools.combinations(np.arange(len(SUBTASKS)), NUM_TARGETS)) if mode == 'train': # Randomly sample a combination of subtasks, holding one combination out sprite_gen = sprite_generators.sample_generator([ sprite_generators.chain_generators( *[sprite_gen_per_subtask[i] for i in c]) for c in subtask_combos[1:] ]) elif mode == 'test': # Use the held-out subtask combination for testing sprite_gen = sprite_generators.chain_generators( *[sprite_gen_per_subtask[i] for i in subtask_combos[0]]) else: raise ValueError('Invalide mode {}.'.format(mode)) # Randomize sprite ordering to eliminate any task information from occlusions sprite_gen = sprite_generators.shuffle(sprite_gen) task = tasks.MetaAggregated(subtasks, reward_aggregator='sum', termination_criterion='all') config = { 'task': task, 'action_space': common.action_space(), 'renderers': common.renderers(), 'init_sprites': sprite_gen, 'max_episode_length': MAX_EPISODE_LENGTH, 'metadata': { 'name': os.path.basename(__file__), 'mode': mode } } return config