Exemplo n.º 1
0
def get_env():
    """Get simple environment, wrapper in simulation wrapper."""
    def _state_initializer():
        agent = sprite.Sprite(x=0.5, y=0.5, scale=0.1, c0=128)
        target = sprite.Sprite(x=0.75, y=0.5, scale=0.1, c1=128)
        state = collections.OrderedDict([('agent', [agent]),
                                         ('target', [target])])
        return state

    task = tasks.ContactReward(1.,
                               'agent',
                               'target',
                               reset_steps_after_contact=2)

    action_space = action_spaces.Grid(0.1,
                                      action_layers='agent',
                                      control_velocity=True)

    def _modify_meta_state(meta_state):
        meta_state['key'] = meta_state['key'] + 1

    update_meta_state = game_rules.ModifyMetaState(_modify_meta_state)

    env = environment.Environment(
        state_initializer=_state_initializer,
        physics=physics_lib.Physics(),
        task=task,
        action_space=action_space,
        observers={'image': observers.PILRenderer(image_size=(64, 64))},
        meta_state_initializer=lambda: {'key': 0},
        game_rules=(update_meta_state, ),
    )
    sim_env = simulation.SimulationEnvironment(env)
    return sim_env
Exemplo n.º 2
0
    def testGridActions(self):
        def _state_initializer():
            agent = sprite.Sprite(x=0.5, y=0.5, scale=0.1, c0=128)
            return collections.OrderedDict([('agent', [agent])])

        max_episode_length = 5
        task = tasks.Reset(lambda _: True,
                           steps_after_condition=max_episode_length - 1)
        env = environment.Environment(
            state_initializer=_state_initializer,
            physics=physics_lib.Physics(),
            task=task,
            action_space=action_spaces.Grid(),
            observers={'image': observers.PILRenderer(image_size=(64, 64))})
        gym_env = gym_wrapper.GymWrapper(env)

        assert (gym_env.observation_space == spaces.Dict({
            'image':
            spaces.Box(-np.inf, np.inf, shape=(64, 64, 3), dtype=np.uint8)
        }))
        assert (gym_env.action_space == spaces.Discrete(5))

        for _ in range(3):
            gym_env.reset()
            for _ in range(max_episode_length - 1):
                action = gym_env.action_space.sample()
                obs, reward, done, _ = gym_env.step(action)
                assert (obs['image'].dtype == np.uint8)
                assert not done
                assert reward == 0.
            action = gym_env.action_space.sample()
            _, _, done, _ = gym_env.step(action)
            assert done
            _, _, done, _ = gym_env.step(action)
            assert not done
Exemplo n.º 3
0
    def _run_test(self, tether, step_1_state, final_state, plot=False):
        """Run test.
        
        Set plot = True to display videos of the test conditions.
        
        Args:
            tether: Tether force.
            step_1_state: Iterable of lists, one for each sprite. Each element
                is a list of [position, velocity, angle_vel] for the sprite
                after the first physics step.
            final_state: Same as step_1_state, except for the final step.
            plot: Bool. Whether to display video or run test.
        """
        state = get_state()

        collision = physics_lib.Collision(elasticity=0.,
                                          symmetric=False,
                                          update_angle_vel=True)
        physics = physics_lib.Physics(
            (collision, 'sprites', 'walls'),
            corrective_physics=[tether],
            updates_per_env_step=10,
        )

        steps = 45
        if plot:
            MatplotlibUI()._simulate_video(physics, state, steps=steps)
        else:
            physics.step(state)

            for s, pred in zip(state['sprites'], step_1_state):
                assert np.allclose(s.position, pred[0], atol=_ATOL)
                assert np.allclose(s.velocity, pred[1], atol=_ATOL)
                assert np.allclose(s.angle_vel, pred[2], atol=_ATOL)

            for _ in range(steps - 1):
                physics.step(state)

            for s, pred in zip(state['sprites'], final_state):
                assert np.allclose(s.position, pred[0], atol=_ATOL)
                assert np.allclose(s.velocity, pred[1], atol=_ATOL)
                assert np.allclose(s.angle_vel, pred[2], atol=_ATOL)
Exemplo n.º 4
0
def _get_config(max_vel):
    """Get environment config."""

    ############################################################################
    # Sprite initialization
    ############################################################################

    # Grid
    grid = shapes.grid_lines(grid_x=_GRID_SIZE,
                             grid_y=_GRID_SIZE,
                             buffer_border=1.,
                             c0=0.,
                             c1=0.,
                             c2=0.5)

    def state_initializer():
        agent = sprite.Sprite(x=0.5,
                              y=0.5,
                              shape='circle',
                              scale=0.04,
                              c0=0.33,
                              c1=1.,
                              c2=0.66)
        annulus_shape = shapes.annulus_vertices(0.15, 2.)
        agent_annulus = sprite.Sprite(x=0.5,
                                      y=0.5,
                                      shape=annulus_shape,
                                      scale=1.,
                                      c0=0.6,
                                      c1=1.,
                                      c2=1.)
        prey = get_prey(
            get_parallelogram(min_axis_ratio=0.5),
            scale=0.4,
            max_vel=max_vel,
            sprite_scale=0.075,
        )
        state = collections.OrderedDict([
            ('grid', grid),
            ('prey', prey),
            ('agent', [agent]),
            ('agent_annulus', [agent_annulus]),
        ])
        return state

    ############################################################################
    # Physics
    ############################################################################

    force = (physics_lib.Drag(coeff_friction=0.25), ['agent', 'agent_annulus'])
    corrective_physics = physics_lib.Tether(('prey', ), update_angle_vel=True)
    physics = physics_lib.Physics(
        force,
        updates_per_env_step=10,
        corrective_physics=corrective_physics,
    )

    ############################################################################
    # Task
    ############################################################################

    prey_task = tasks.ContactReward(
        1,
        layers_0='agent',
        layers_1='prey',
        condition=lambda s_agent, s_prey: s_prey.c1 > 0.5,
    )
    reset_trial_task = tasks.Reset(
        condition=lambda state: all([s.c1 < 0.5 for s in state['prey']]),
        steps_after_condition=10,
    )
    task = tasks.CompositeTask(prey_task, reset_trial_task, timeout_steps=500)

    ############################################################################
    # Action space
    ############################################################################

    action_space = action_spaces.Joystick(scaling_factor=0.01,
                                          action_layers=('agent',
                                                         'agent_annulus'))

    ############################################################################
    # Observer
    ############################################################################

    _polygon_modifier = observers.polygon_modifiers.FirstPersonAgent(
        agent_layer='agent')
    observer = observers.PILRenderer(
        image_size=(64, 64),
        anti_aliasing=1,
        color_to_rgb='hsv_to_rgb',
        polygon_modifier=_polygon_modifier,
    )

    ############################################################################
    # Game rules
    ############################################################################

    # Make prey gray upon contact
    def _make_prey_gray(prey):
        prey.c1 = 0.
        prey.c2 = 0.6

    make_prey_gray = game_rules.ModifyOnContact(
        layers_0='agent',
        layers_1='prey',
        modifier_1=_make_prey_gray,
    )

    # Keep agent near center
    keep_near_center = game_rules.KeepNearCenter(
        agent_layer='agent',
        layers_to_center=['agent_annulus', 'prey'],
        grid_x=_GRID_SIZE,
    )

    rules = (make_prey_gray, keep_near_center)

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {
            'image': observer
        },
        'game_rules': rules,
    }
    return config
Exemplo n.º 5
0
def _get_config(num_ghosts, maze_size):
    """Get environment config."""

    ############################################################################
    # Sprite initialization
    ############################################################################

    # Agent
    agent_factors = dict(shape='circle', scale=0.05, c0=0.33, c1=1., c2=0.66)

    # Prey
    prey_factors = dict(shape='circle', scale=0.025, c0=0.2, c1=1., c2=1.)

    # Ghosts
    ghost_factors = dict(shape='circle',
                         scale=0.05,
                         mass=np.inf,
                         c0=0.,
                         c1=1.,
                         c2=0.8)

    def state_initializer():
        maze = maze_lib.generate_random_maze_matrix(size=maze_size,
                                                    ambient_size=12)
        maze = maze_lib.Maze(np.flip(maze, axis=0))
        walls = maze.to_sprites(c0=0., c1=0., c2=0.8)

        # Sample positions in maze grid of agent and ghosts
        n_ghosts = num_ghosts()
        points = maze.sample_distinct_open_points(1 + n_ghosts)
        positions = [maze.grid_side * (0.5 + np.array(x)) for x in points]

        # Agent
        agent_position = positions[0]
        agent = [
            sprite.Sprite(x=agent_position[1],
                          y=agent_position[0],
                          **agent_factors)
        ]

        # ghosts
        ghosts = []
        for i in range(n_ghosts):
            position = positions[i + 1]
            ghosts.append(
                sprite.Sprite(x=position[1], y=position[0], **ghost_factors))

        # Place prey at every open maze location
        prey = []
        open_maze_points = np.argwhere(maze.maze == 0)
        for p in open_maze_points:
            pos = maze.grid_side * (0.5 + np.array(p))
            prey.append(sprite.Sprite(x=pos[1], y=pos[0], **prey_factors))

        state = collections.OrderedDict([
            ('walls', walls),
            ('prey', prey),
            ('ghosts', ghosts),
            ('agent', agent),
        ])
        return state

    ############################################################################
    # Physics
    ############################################################################

    maze_physics = physics_lib.MazePhysics(
        maze_layer='walls',
        avatar_layers=('agent', 'prey', 'ghosts'),
        constant_speed=0.015,
    )

    physics = physics_lib.Physics(
        (physics_lib.RandomMazeWalk(speed=0.015), ['ghosts']),
        updates_per_env_step=1,
        corrective_physics=[maze_physics],
    )

    ############################################################################
    # Task
    ############################################################################

    ghost_task = tasks.ContactReward(-5,
                                     layers_0='agent',
                                     layers_1='ghosts',
                                     reset_steps_after_contact=0)
    prey_task = tasks.ContactReward(1, layers_0='agent', layers_1='prey')
    reset_task = tasks.Reset(
        condition=lambda state: len(state['prey']) == 0,
        steps_after_condition=5,
    )
    task = tasks.CompositeTask(ghost_task,
                               prey_task,
                               reset_task,
                               timeout_steps=1000)

    ############################################################################
    # Action space
    ############################################################################

    action_space = action_spaces.Grid(
        scaling_factor=0.015,
        action_layers='agent',
        control_velocity=True,
        momentum=0.5,  # Value irrelevant, since maze_physics has constant speed
    )

    ############################################################################
    # Observer
    ############################################################################

    observer = observers.PILRenderer(
        image_size=(256, 256),
        anti_aliasing=1,
        color_to_rgb='hsv_to_rgb',
    )

    ############################################################################
    # Game rules
    ############################################################################

    def _unglue(s):
        s.mass = 1.

    def _unglue_condition(state):
        return not np.all(state['agent'][0].velocity == 0)

    unglue = game_rules.ConditionalRule(
        condition=_unglue_condition,
        rules=game_rules.ModifySprites(('prey', 'ghosts'), _unglue),
    )

    vanish_on_contact = game_rules.VanishOnContact(vanishing_layer='prey',
                                                   contacting_layer='agent')

    rules = (vanish_on_contact, unglue)

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {
            'image': observer
        },
        'game_rules': rules,
    }
    return config
Exemplo n.º 6
0
def get_config(num_targets):
    """Get environment config.
    
    Args:
        num_targets: Int. Number of targets.
    """
    if num_targets == 0 or not isinstance(num_targets, int):
        raise ValueError(
            f'num_targets is {num_targets}, but must be a positive integer')

    ############################################################################
    # State initialization
    ############################################################################

    screen = sprite.Sprite(x=0.5,
                           y=0.5,
                           shape='square',
                           scale=2.,
                           c0=0.6,
                           c1=0.7,
                           c2=0.7)

    target_factor_distrib = distribs.Product(
        [distribs.Continuous('c0', 0., 1.)],
        shape='circle',
        scale=0.085,
        c1=1.,
        c2=1.,
    )
    cover_factors = dict(mass=0.,
                         shape='circle',
                         scale=0.1,
                         c0=0.,
                         c1=0.,
                         c2=0.5,
                         opacity=0)

    def state_initializer():
        """State initializer method to be fed into environment."""

        # Get targets and covers
        sprite_positions = 0.5 + 0.35 * _get_polygon(num_targets, 0.7)
        target_factors = [
            target_factor_distrib.sample() for _ in range(num_targets)
        ]
        targets = [
            sprite.Sprite(x=pos[0], y=pos[1], **factors)
            for pos, factors in zip(sprite_positions, target_factors)
        ]
        covers = [
            sprite.Sprite(x=pos[0], y=pos[1], **cover_factors)
            for pos in sprite_positions
        ]

        # Tag the cover metadata based on whether they are prey or not
        for i, s in enumerate(covers):
            if i == 0:
                s.metadata = {'prey': True}
            else:
                s.metadata = {'prey': False}

        # Make cue have the same factors as the first target, except slightly
        # smaller
        cue_factors = copy.deepcopy(target_factors[0])
        cue_factors['scale'] = 0.7 * target_factors[0]['scale']
        cue = sprite.Sprite(x=0.5,
                            y=0.501,
                            opacity=0,
                            mass=np.inf,
                            **cue_factors)

        agent = sprite.Sprite(x=0.5,
                              y=0.5,
                              shape='circle',
                              scale=0.1,
                              c0=0.4,
                              c1=0.,
                              c2=1.,
                              mass=np.inf)
        annulus_verts = shapes.annulus_vertices(0.34, 0.36)
        annulus = sprite.Sprite(x=0.5,
                                y=0.5,
                                shape=annulus_verts,
                                scale=1.,
                                c0=0.,
                                c1=0.,
                                c2=0.3)

        state = collections.OrderedDict([
            ('annulus', [annulus]),
            ('targets', targets),
            ('covers', covers),
            ('agent', [agent]),
            ('cue', [cue]),
            ('screen', [screen]),
        ])
        return state

    ################################################################################
    # Physics
    ################################################################################

    drag = (physics_lib.Drag(coeff_friction=0.25), ['agent', 'cue'])
    tether_covers = physics_lib.TetherZippedLayers(('targets', 'covers'),
                                                   anchor=np.array([0.5, 0.5]))
    physics = physics_lib.Physics(
        drag,
        updates_per_env_step=1,
        corrective_physics=[tether_covers],
    )

    ################################################################################
    # Task
    ################################################################################

    contact_task = tasks.ContactReward(
        reward_fn=lambda _, s: 1 if s.metadata['prey'] else -1,
        layers_0='agent',
        layers_1='covers',
    )

    def _should_reset(state, meta_state):
        should_reset = (state['covers'][0].opacity == 0
                        and meta_state['phase'] == 'response')
        return should_reset

    reset_task = tasks.Reset(
        condition=_should_reset,
        steps_after_condition=15,
    )

    task = tasks.CompositeTask(contact_task, reset_task, timeout_steps=800)

    ################################################################################
    # Action Space
    ################################################################################

    action_space = action_spaces.Joystick(scaling_factor=0.01,
                                          action_layers=['agent', 'cue'])

    ################################################################################
    # Observer
    ################################################################################

    _polygon_modifier = observers.polygon_modifiers.FirstPersonAgent(
        agent_layer='agent')
    observer = observers.PILRenderer(
        image_size=(64, 64),
        anti_aliasing=1,
        color_to_rgb='hsv_to_rgb',
        polygon_modifier=_polygon_modifier,
    )

    ############################################################################
    # Game rules
    ############################################################################

    def _make_opaque(s):
        s.opacity = 255

    def _make_transparent(s):
        s.opacity = 0

    # Screen Phase

    screen_phase = gr.Phase(duration=1, name='screen')

    # Visible Phase

    disappear_screen = gr.ModifySprites('screen', _make_transparent)
    visible_phase = gr.Phase(one_time_rules=disappear_screen,
                             duration=2,
                             name='visible')

    # Motion Phase

    def _move(s):
        s.velocity = np.random.uniform(-0.25, 0.25, size=(2, ))

    cover_targets = gr.ModifySprites('covers', _make_opaque)
    begin_motion = BeginMotion(angle_vel_range=(0.1, 0.3))
    motion_phase = gr.Phase(
        one_time_rules=[cover_targets, begin_motion],
        duration=100,
        name='motion',
    )

    # Response Phase

    def _stop(s):
        s.angle_vel = 0.
        s.velocity = np.zeros(2)

    def _unglue(s):
        s.mass = 1.

    appear_cue = gr.ModifySprites('cue', _make_opaque)
    stop_targets = gr.ModifySprites(('targets', 'covers'), _stop)
    unglue_agent = gr.ModifySprites(('agent', 'cue'), _unglue)
    make_targets_discoverable = gr.ModifyOnContact(
        layers_0='agent', layers_1='covers', modifier_1=_make_transparent)

    response_phase = gr.Phase(
        one_time_rules=[appear_cue, stop_targets, unglue_agent],
        continual_rules=make_targets_discoverable,
        name='response',
    )

    phase_sequence = gr.PhaseSequence(
        screen_phase,
        visible_phase,
        motion_phase,
        response_phase,
        meta_state_phase_name_key='phase',
    )

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {
            'image': observer
        },
        'game_rules': (phase_sequence, ),
        'meta_state_initializer': lambda: {
            'phase': ''
        }
    }
    return config
Exemplo n.º 7
0
def get_config(_):
    """Get environment config."""

    ############################################################################
    # Sprite initialization
    ############################################################################

    # Agent
    agent_factors = distribs.Product(
        [distribs.Continuous('x', 0.1, 0.9),
         distribs.Continuous('y', 0.1, 0.9)],
        shape='circle', scale=0.1, c0=0.33, c1=1., c2=0.66,
    )

    # Predators
    shape_0 = 1.8 * np.array(
        [[-0.3, -0.3], [0.1, -0.7], [0.4, 0.6], [-0.1, 0.25]])
    shape_1 = 1.5 * np.array(
        [[-0.5, -0.3], [-0.1, -0.7], [0.7, 0.1], [0., -0.1], [-0.3, 0.25]])
    predator_factors = distribs.Product(
        [distribs.Continuous('x', 0.2, 0.8),
         distribs.Continuous('y', 0.2, 0.8),
         distribs.Discrete(
             'shape', [shape_0, shape_1, 'star_5', 'triangle', 'spoke_5']),
         distribs.Continuous('angle', 0., 2 * np.pi),
         distribs.Continuous('aspect_ratio', 0.75, 1.25),
         distribs.Continuous('scale', 0.1, 0.15),
         distribs.Continuous('x_vel', -0.03, 0.03),
         distribs.Continuous('y_vel', -0.03, 0.03),
         distribs.Continuous('angle_vel', -0.05, 0.05)],
        c0=0., c1=1., c2=0.8,
    )

    # Walls
    walls = shapes.border_walls(visible_thickness=0.05, c0=0., c1=0., c2=0.5)

    # Create callable initializer returning entire state
    agent_generator = sprite_generators.generate_sprites(
        agent_factors, num_sprites=1)
    predator_generator = sprite_generators.generate_sprites(
        predator_factors, num_sprites=5)

    def state_initializer():
        predators = predator_generator(
            disjoint=True, without_overlapping=walls)
        agent = agent_generator(without_overlapping=walls + predators)
        state = collections.OrderedDict([
            ('walls', walls),
            ('predators', predators),
            ('agent', agent),
        ])
        return state

    ############################################################################
    # Physics
    ############################################################################

    agent_friction_force = physics_lib.Drag(coeff_friction=0.25)
    asymmetric_collision = physics_lib.Collision(
        elasticity=1., symmetric=False, update_angle_vel=True)
    symmetric_collision = physics_lib.Collision(
        elasticity=1., symmetric=True, update_angle_vel=True)
    agent_wall_collision = physics_lib.Collision(
        elasticity=0., symmetric=False, update_angle_vel=False)
    
    forces = (
        (agent_friction_force, 'agent'),
        (symmetric_collision, 'predators', 'predators'),
        (asymmetric_collision, 'predators', 'walls'),
        (agent_wall_collision, 'agent', 'walls'),
    )
    
    physics = physics_lib.Physics(*forces, updates_per_env_step=10)

    ############################################################################
    # Task
    ############################################################################

    predator_task = tasks.ContactReward(
        -5, layers_0='agent', layers_1='predators')
    stay_alive_task = tasks.StayAlive(
        reward_period=20,
        reward_value=0.2,
    )
    task = tasks.CompositeTask(
        predator_task, stay_alive_task, timeout_steps=200)

    ############################################################################
    # Action space
    ############################################################################

    action_space = action_spaces.Joystick(
        scaling_factor=0.01, action_layers='agent')

    ############################################################################
    # Observer
    ############################################################################

    observer = observers.PILRenderer(
        image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb')

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {'image': observer},
    }
    return config
def _get_config(translucent_occluder):
    """Get environment config."""

    ############################################################################
    # Physics
    ############################################################################

    elastic_collision = physics_lib.Collision(elasticity=1.,
                                              symmetric=False,
                                              update_angle_vel=False)
    physics = physics_lib.Physics(
        (elastic_collision, 'targets', 'walls'),
        updates_per_env_step=10,
    )

    def _predict_contact(state):
        """Predict whether targets will contact."""
        while True:
            if state['targets'][0].overlaps_sprite(state['targets'][1]):
                return True
            if all(s.y > 1.1 and s.y_vel > 0 for s in state['targets']):
                # Both targets above screen and moving up
                break
            physics.step(state)
        return False

    ############################################################################
    # Sprite initialization
    ############################################################################

    # Targets
    target_y_speed = 0.02
    target_factors = distribs.Product(
        [
            distribs.Continuous('x', 0.15, 0.85),
            distribs.Continuous('x_vel', -target_y_speed, target_y_speed)
        ],
        y_vel=-target_y_speed,
        scale=0.16,
        shape='circle',
        opacity=192,
        c0=255,
        c1=0,
        c2=0,
    )

    # Occluder
    occluder = sprite.Sprite(x=0.5,
                             y=0.2,
                             shape='square',
                             scale=1.,
                             c0=192,
                             c1=192,
                             c2=128,
                             opacity=128 if translucent_occluder else 255)

    # Walls
    bottom_wall = [[-1, 0.1], [2, 0.1], [2, -1], [-1, -1]]
    left_wall = [[0.05, -1], [0.05, 4], [-1, 4], [-1, -1]]
    right_wall = [[0.95, -1], [0.95, 4], [2, 4], [2, -1]]
    walls = [
        sprite.Sprite(shape=np.array(v), x=0, y=0, c0=128, c1=128, c2=128)
        for v in [bottom_wall, left_wall, right_wall]
    ]

    # Make response boxes and tokens
    response_box_factors = dict(y=0.05,
                                scale=0.12,
                                shape='square',
                                aspect_ratio=0.5,
                                c0=0,
                                c1=0,
                                c2=0)
    response_boxes = [
        sprite.Sprite(x=0.4, **response_box_factors),
        sprite.Sprite(x=0.6, **response_box_factors),
    ]
    response_token_factors = dict(y=0.05,
                                  scale=0.03,
                                  shape='circle',
                                  c0=255,
                                  c1=0,
                                  c2=0,
                                  opacity=192)
    response_tokens = [
        sprite.Sprite(x=x, **response_token_factors)
        for x in [0.37, 0.43, 0.59, 0.61]
    ]

    def state_initializer():
        """Callable returning state ordereddict each episode reset."""
        agent = sprite.Sprite(x=0.5,
                              y=0.05,
                              scale=0.03,
                              shape='spoke_4',
                              c0=255,
                              c1=255,
                              c2=255)
        target_0 = sprite.Sprite(y=1.4, **target_factors.sample())
        target_1 = sprite.Sprite(y=np.random.uniform(1.7, 2.4),
                                 **target_factors.sample())
        screen = sprite.Sprite(x=0.5,
                               y=0.5,
                               shape='square',
                               c0=128,
                               c1=128,
                               c2=128)

        state = collections.OrderedDict([
            ('targets', [target_0, target_1]),
            ('occluders', [occluder]),
            ('walls', walls),
            ('response_boxes', response_boxes),
            ('response_tokens', response_tokens),
            ('agent', [agent]),
            ('screen', [screen]),
        ])

        # Predict whether targets will contact, putting this information in
        # agent metadata
        orig_pos = [np.copy(s.position) for s in state['targets']]
        orig_vel = [np.copy(s.velocity) for s in state['targets']]
        agent.metadata = {'will_contact': _predict_contact(state)}
        for s, pos, vel in zip(state['targets'], orig_pos, orig_vel):
            s.position = pos
            s.velocity = vel

        return state

    ############################################################################
    # Task
    ############################################################################

    def _reward_fn(state):
        agent = state['agent'][0]
        if agent.overlaps_sprite(state['response_boxes'][0]):
            # Collision response
            return -1 if agent.metadata['will_contact'] else 1
        elif agent.overlaps_sprite(state['response_boxes'][1]):
            # No collision response
            return 1 if agent.metadata['will_contact'] else -1
        else:
            return 0

    conditional_task = tasks.Reset(
        condition=lambda state: _reward_fn(state) != 0,
        reward_fn=_reward_fn,
        steps_after_condition=5,
    )
    task = tasks.CompositeTask(conditional_task, timeout_steps=1000)

    ############################################################################
    # Action space
    ############################################################################

    action_space = action_spaces.Grid(
        scaling_factor=0.015,
        action_layers='agent',
        control_velocity=True,
    )

    ############################################################################
    # Observer
    ############################################################################

    observer = observers.PILRenderer(image_size=(64, 64), anti_aliasing=1)

    ############################################################################
    # Game rules
    ############################################################################

    screen_vanish = game_rules.VanishByFilter('screen')
    screen_vanish = game_rules.TimedRule(step_interval=(15, 16),
                                         rules=(screen_vanish, ))

    rules = (screen_vanish, )

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {
            'image': observer
        },
        'game_rules': rules,
    }
    return config
Exemplo n.º 9
0
def get_config(_):
    """Get environment config."""

    ############################################################################
    # Sprite initialization
    ############################################################################

    # Agent
    agent = sprite.Sprite(x=0.5,
                          y=0.5,
                          shape='circle',
                          scale=0.04,
                          c0=0.33,
                          c1=1.,
                          c2=0.66)
    annulus_vertices = shapes.annulus_vertices(inner_radius=0.08,
                                               outer_radius=0.3)
    agent_annulus = sprite.Sprite(x=0.5,
                                  y=0.5,
                                  shape=annulus_vertices,
                                  scale=1.,
                                  c0=0.6,
                                  c1=1.,
                                  c2=1.)

    # Predator generator
    max_predator_vel = 0.02
    predator_pos = _get_boundary_pos_distribution(_FIELD_BUFFER)
    predator_vel = _get_vel_distribution(0.5 * max_predator_vel,
                                         max_predator_vel)
    predator_factors = distribs.Product(
        [predator_pos, predator_vel,
         distribs.Continuous('scale', 0.07, 0.13)],
        shape='circle',
        c0=0.,
        c1=1.,
        c2=0.8,
    )

    # Prey generator
    max_prey_vel = 0.01
    prey_pos = _get_boundary_pos_distribution(_FIELD_BUFFER)
    prey_vel = _get_vel_distribution(0.5 * max_prey_vel, max_prey_vel)
    prey_factors = distribs.Product(
        [prey_pos, prey_vel,
         distribs.Continuous('scale', 0.07, 0.13)],
        shape='circle',
        c0=0.2,
        c1=1.,
        c2=1.,
    )

    # Grid
    grid = shapes.grid_lines(grid_x=_GRID_SIZE,
                             grid_y=_GRID_SIZE,
                             buffer_border=1.,
                             c0=0.,
                             c1=0.,
                             c2=0.5)

    def state_initializer():
        state = collections.OrderedDict([
            ('grid', grid),
            ('prey', []),
            ('agent', [agent]),
            ('predators', []),
            ('agent_annulus', [agent_annulus]),
        ])
        return state

    ############################################################################
    # Physics
    ############################################################################

    agent_friction_force = physics_lib.Drag(coeff_friction=0.25)
    physics = physics_lib.Physics(
        (agent_friction_force, ['agent', 'agent_annulus']),
        updates_per_env_step=10,
    )

    ############################################################################
    # Task
    ############################################################################

    def _predator_reward_fn(_, predator_sprite):
        return -2. * predator_sprite.scale

    predator_task = tasks.ContactReward(
        reward_fn=_predator_reward_fn,
        layers_0='agent',
        layers_1='predators',
        reset_steps_after_contact=0,
    )

    def _prey_reward_fn(_, prey_sprite):
        return prey_sprite.scale

    prey_task = tasks.ContactReward(
        reward_fn=_prey_reward_fn,
        layers_0='agent',
        layers_1='prey',
    )
    task = tasks.CompositeTask(predator_task, prey_task)

    ############################################################################
    # Action space
    ############################################################################

    action_space = action_spaces.Joystick(
        scaling_factor=0.003,
        action_layers=('agent', 'agent_annulus'),
        constrained_lr=False,
    )

    ############################################################################
    # Observer
    ############################################################################

    _polygon_modifier = observers.polygon_modifiers.FirstPersonAgent(
        agent_layer='agent')
    observer = observers.PILRenderer(
        image_size=(64, 64),
        anti_aliasing=1,
        color_to_rgb='hsv_to_rgb',
        polygon_modifier=_polygon_modifier,
    )

    ############################################################################
    # Game rules
    ############################################################################

    # Make predators appear randomly
    predator_appear_generator = sprite_generators.generate_sprites(
        predator_factors, num_sprites=1)
    predator_appear = game_rules.ConditionalRule(
        condition=lambda state: np.random.binomial(1, p=0.5),
        rules=game_rules.CreateSprites('predators', predator_appear_generator),
    )

    # Make prey appear randomly
    prey_appear_generator = sprite_generators.generate_sprites(prey_factors,
                                                               num_sprites=1)
    prey_appear = game_rules.ConditionalRule(
        condition=lambda state: np.random.binomial(1, p=0.2),
        rules=game_rules.CreateSprites('prey', prey_appear_generator),
    )

    # Make predators and prey vanish when they are distant enough and moving
    # away.
    vanish_range = [-1. * _VANISH_DIST, 1. + _VANISH_DIST]

    def _should_vanish(s):
        pos_too_small = (s.position < vanish_range[0]) * (s.velocity < 0.)
        pos_too_large = (s.position > vanish_range[1]) * (s.velocity > 0.)
        return any(pos_too_small) or any(pos_too_large)

    predator_vanish = game_rules.VanishByFilter('predators', _should_vanish)
    prey_vanish = game_rules.VanishByFilter('prey', _should_vanish)

    # Keep agent near center
    keep_near_center = game_rules.KeepNearCenter(
        agent_layer='agent',
        layers_to_center=['agent_annulus', 'predators', 'prey'],
        grid_x=_GRID_SIZE,
    )

    # Make prey vanish when contacted by agent
    prey_caught = game_rules.VanishOnContact(vanishing_layer='prey',
                                             contacting_layer='agent')

    rules = (predator_appear, prey_appear, prey_vanish, predator_vanish,
             keep_near_center, prey_caught)

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {
            'image': observer
        },
        'game_rules': rules,
    }
    return config
Exemplo n.º 10
0
def get_config(_):
    """Get environment config."""

    ############################################################################
    # Sprite initialization
    ############################################################################

    # Agents
    agent_factors = distribs.Product(
        [distribs.Continuous('x', 0., 1.),
         distribs.Continuous('y', 0.35, 0.65)],
        shape='circle', scale=0.1, c1=1., c2=0.7,
    )
    agent_0_factors = distribs.Product([agent_factors], c0=0.2)
    agent_1_factors = distribs.Product([agent_factors], c0=0.1)
    agent_2_factors = distribs.Product([agent_factors], c0=0.)

    # Walls
    walls = shapes.border_walls(visible_thickness=0.05, c0=0., c1=0., c2=0.5)

    # Fountains
    fountain_factors = {
        'shape': 'circle', 'scale': 0.05, 'c0': 0.6, 'c1': 1., 'c2': _BAD_VALUE}
    fountains_across = np.linspace(0.1, 0.9, 6)
    fountains_up = np.linspace(0.75, 0.9, 2)
    fountains_grid_x, fountains_grid_y = np.meshgrid(fountains_across,
                                                     fountains_up)
    fountains_positions = zip(np.ravel(fountains_grid_x),
                              np.ravel(fountains_grid_y))
    fountain_sprites = [
        sprite.Sprite(x=x, y=y, **fountain_factors)
        for (x, y) in fountains_positions
    ]

    # Fruits
    fruit_factors = {
        'shape': 'circle', 'scale': 0.05, 'c0': 0.3, 'c1': 1., 'c2': _BAD_VALUE}
    fruits_across = np.linspace(0.1, 0.9, 6)
    fruits_up = np.linspace(0.1, 0.25, 2)
    fruits_grid_x, fruits_grid_y = np.meshgrid(fruits_across, fruits_up)
    fruits_positions = zip(np.ravel(fruits_grid_x), np.ravel(fruits_grid_y))
    fruit_sprites = [
        sprite.Sprite(x=x, y=y, **fruit_factors)
        for (x, y) in fruits_positions
    ]

    # Create callable initializer returning entire state
    agent_0_generator = sprite_generators.generate_sprites(
        agent_0_factors, num_sprites=1)
    agent_1_generator = sprite_generators.generate_sprites(
        agent_1_factors, num_sprites=1)
    agent_2_generator = sprite_generators.generate_sprites(
        agent_2_factors, num_sprites=1)

    def state_initializer():
        agent_0 = agent_0_generator(without_overlapping=walls)
        agent_1 = agent_1_generator(without_overlapping=walls)
        agent_2 = agent_2_generator(without_overlapping=walls)
        state = collections.OrderedDict([
            ('walls', walls),
            ('fountains', fountain_sprites),
            ('fruits', fruit_sprites),
            ('agent_2', agent_2),
            ('agent_1', agent_1),
            ('agent_0', agent_0),
        ])
        return state

    ############################################################################
    # Physics
    ############################################################################

    agent_friction_force = physics_lib.Drag(coeff_friction=0.25)
    asymmetric_collision = physics_lib.Collision(
        elasticity=0.25, symmetric=False)
    
    forces = (
        (agent_friction_force, ['agent_0', 'agent_1', 'agent_2']),
        (asymmetric_collision, ['agent_0', 'agent_1', 'agent_2'], 'walls'),
    )
    
    physics = physics_lib.Physics(*forces, updates_per_env_step=5)

    ############################################################################
    # Task
    ############################################################################

    task = tasks.ContactReward(
        1, layers_0='agent_0', layers_1='fruits',
        condition=lambda s_0, s_1: s_1.c2 > _VALUE_THRESHOLD,
    )

    ############################################################################
    # Action space
    ############################################################################

    action_space = action_spaces.Composite(
        agent_0=action_spaces.Joystick(
            scaling_factor=0.005, action_layers='agent_0'),
        agent_1=action_spaces.Joystick(
            scaling_factor=0.005, action_layers='agent_1'),
        agent_2=action_spaces.Joystick(
            scaling_factor=0.005, action_layers='agent_2'),
    )

    ############################################################################
    # Observer
    ############################################################################

    image_observer = observers.PILRenderer(
        image_size=(64, 64),
        anti_aliasing=1,
        color_to_rgb='hsv_to_rgb',
    )
    raw_state_observer = observers.RawState()  # needed by hand-crafted agents

    ############################################################################
    # Game rules
    ############################################################################

    def _spoil_fruit(sprite):
        sprite.c2 = _BAD_VALUE
    def _ripen_fruit(sprite):
        sprite.c2 = _GOOD_VALUE
    def _poison_fountain(sprite):
        sprite.c2 = _BAD_VALUE
    def _clean_fountain(sprite):
        sprite.c2 = _GOOD_VALUE

    def agents_contacting_layer(state, layer, value):
        n_contact = 0
        for s in state[layer]:
            if s.c2 != value:
                continue
            n_contact += (
                s.overlaps_sprite(state['agent_0'][0]) or 
                s.overlaps_sprite(state['agent_1'][0]) or 
                s.overlaps_sprite(state['agent_2'][0])
            )
        return n_contact
    
    poison_fountains = game_rules.ModifySprites(
        layers='fountains', modifier=_poison_fountain, sample_one=True,
        filter_fn=lambda s: s.c2 > _VALUE_THRESHOLD)
    poison_fountains = game_rules.ConditionalRule(
        condition=lambda s: agents_contacting_layer(s, 'fruits', _GOOD_VALUE),
        rules=poison_fountains,
    )
    ripen_fruits = game_rules.ModifySprites(
        layers='fruits', modifier=_ripen_fruit, sample_one=True,
        filter_fn=lambda s: s.c2 < _VALUE_THRESHOLD)
    ripen_fruits = game_rules.ConditionalRule(
        condition=lambda s: agents_contacting_layer(s, 'fountains', _BAD_VALUE),
        rules=ripen_fruits,
    )

    spoil_fruits = game_rules.ModifyOnContact(
        layers_0='fruits',
        layers_1=('agent_0', 'agent_1', 'agent_2'),
        modifier_0=_spoil_fruit,
        filter_0=lambda s: s.c2 > _VALUE_THRESHOLD)
    clean_fountains = game_rules.ModifyOnContact(
        layers_0='fountains',
        layers_1=('agent_0', 'agent_1', 'agent_2'),
        modifier_0=_clean_fountain,
        filter_0=lambda s: s.c2 < _VALUE_THRESHOLD)
    
    rules = (poison_fountains, spoil_fruits, ripen_fruits, clean_fountains)

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {'image': image_observer, 'state': raw_state_observer},
        'game_rules': rules,
    }
    return config
Exemplo n.º 11
0
def _get_config(num_prey, num_predators):
    """Get environment config."""

    ############################################################################
    # Sprite initialization
    ############################################################################

    # Agent
    agent_factors = distribs.Product(
        [distribs.Continuous('x', 0., 1.),
         distribs.Continuous('y', 0., 1.)],
        scale=0.08, c0=0, c1=255, c2=0,
    )

    # Predators
    predator_factors = distribs.Product(
        [distribs.Continuous('x', 0., 1.),
         distribs.Continuous('y', 0., 1.),
         distribs.Continuous('x_vel', -0.02, 0.02),
         distribs.Continuous('y_vel', -0.02, 0.02),],
        scale=0.08, shape='circle', opacity=192, c0=255, c1=0, c2=0,
    )

    # Prey
    prey_factors = distribs.Product(
        [distribs.Continuous('x', 0., 1.),
         distribs.Continuous('y', 0., 1.),
         distribs.Continuous('x_vel', -0.02, 0.02),
         distribs.Continuous('y_vel', -0.02, 0.02),],
        scale=0.08, shape='circle', opacity=192, c0=255, c1=255, c2=0,
    )

    # Create callable initializer returning entire state
    predator_generator = sprite_generators.generate_sprites(
        predator_factors, num_sprites=num_predators)
    prey_generator = sprite_generators.generate_sprites(
        prey_factors, num_sprites=num_prey)

    def state_initializer():
        """Callable returning state at every episode reset."""
        agent = sprite.Sprite(**agent_factors.sample())
        predators = predator_generator(without_overlapping=(agent,))
        prey = prey_generator(without_overlapping=(agent,))

        state = collections.OrderedDict([
            ('prey', prey),
            ('predators', predators),
            ('agent', [agent]),
        ])
        return state

    ############################################################################
    # Physics
    ############################################################################

    agent_friction_force = physics_lib.Drag(coeff_friction=0.25)
    random_force = physics_lib.RandomForce(max_force_magnitude=0.01)
    predator_attraction = physics_lib.DistanceForce(
        physics_lib.linear_force_fn(zero_intercept=-0.001, slope=0.0005))
    prey_avoid = physics_lib.DistanceForce(
        physics_lib.linear_force_fn(zero_intercept=0.001, slope=-0.0005))

    forces = (
        (agent_friction_force, 'agent'),
        (random_force, ['predators', 'prey']),
        (predator_attraction, 'agent', 'predators'),
        (prey_avoid, 'agent', 'prey'),
    )

    constant_speed = physics_lib.ConstantSpeed(
        layer_names=['prey', 'predators'], speed=0.015)

    physics = physics_lib.Physics(
        *forces,
        updates_per_env_step=10,
        corrective_physics=[constant_speed],
    )

    ############################################################################
    # Task
    ############################################################################

    predator_task = tasks.ContactReward(
        -5, layers_0='agent', layers_1='predators', reset_steps_after_contact=0)
    prey_task = tasks.ContactReward(1, layers_0='agent', layers_1='prey')
    reset_task = tasks.Reset(
        condition=lambda state: len(state['prey']) == 0,
        steps_after_condition=5,
    )
    task = tasks.CompositeTask(
        reset_task, predator_task, prey_task, timeout_steps=300)

    ############################################################################
    # Action space
    ############################################################################

    action_space = action_spaces.Joystick(
        scaling_factor=0.025, action_layers='agent', control_velocity=True)

    ############################################################################
    # Observer
    ############################################################################

    observer = observers.PILRenderer(
        image_size=(64, 64),
        anti_aliasing=1,
        polygon_modifier=polygon_modifiers.TorusGeometry(
            ['agent', 'predators', 'prey']),
    )

    ############################################################################
    # Game rules
    ############################################################################

    prey_vanish = game_rules.VanishOnContact(
        vanishing_layer='prey', contacting_layer='agent')
    def _torus_position_wrap(s):
        s.position = np.remainder(s.position, 1)
    torus_position_wrap = game_rules.ModifySprites(
        ('agent', 'predators', 'prey'), _torus_position_wrap)

    rules = (prey_vanish, torus_position_wrap)

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {'image': observer},
        'game_rules': rules,
    }
    return config
Exemplo n.º 12
0
def get_config(num_predators):
    """Get config dictionary of kwargs for environment constructor.
    
    Args:
        num_predators: Int. Number of predators.
    """

    ############################################################################
    # Sprite initialization
    ############################################################################

    state_initialization = StateInitialization(
        num_predators=num_predators,
        step_scaling_factor=0.1,
        threshold_trial_len=200,
    )

    ############################################################################
    # Physics
    ############################################################################

    agent_friction_force = physics_lib.Drag(coeff_friction=0.25)
    predator_friction_force = physics_lib.Drag(coeff_friction=0.04)
    predator_random_force = physics_lib.RandomForce(max_force_magnitude=0.03)
    predator_attraction = physics_lib.DistanceForce(
        physics_lib.linear_force_fn(zero_intercept=-0.0025, slope=0.0001))
    elastic_asymmetric_collision = physics_lib.Collision(elasticity=1.,
                                                         symmetric=False)
    inelastic_asymmetric_collision = physics_lib.Collision(elasticity=0.,
                                                           symmetric=False)

    forces = (
        (agent_friction_force, 'agent'),
        (predator_friction_force, 'predators'),
        (predator_random_force, 'predators'),
        (predator_attraction, 'agent', 'predators'),
        (elastic_asymmetric_collision, 'predators', 'walls'),
        (inelastic_asymmetric_collision, 'agent', 'walls'),
    )

    physics = physics_lib.Physics(*forces, updates_per_env_step=10)

    ############################################################################
    # Task
    ############################################################################

    task = tasks.ContactReward(-1,
                               layers_0='agent',
                               layers_1='predators',
                               reset_steps_after_contact=0)

    ############################################################################
    # Action space
    ############################################################################

    action_space = action_spaces.Joystick(scaling_factor=0.01,
                                          action_layers='agent')

    ############################################################################
    # Observer
    ############################################################################

    observer = observers.PILRenderer(image_size=(64, 64),
                                     anti_aliasing=1,
                                     color_to_rgb='hsv_to_rgb')

    ############################################################################
    # Game rules
    ############################################################################

    def _increment_count(meta_state):
        meta_state['count'] += 1

    rules = game_rules.ModifyMetaState(_increment_count)

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initialization.state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {
            'image': observer
        },
        'meta_state_initializer': state_initialization.meta_state_initializer,
    }
    return config
Exemplo n.º 13
0
def _get_config(num_obstacles, valid_step_range):
    """Get environment config.
    
    Args:
        num_obstacles: Int. Number of obstacles.
        valid_step_range: 2-iterable of ints. (min_num_steps, max_num_steps).
            All trials must have duration in this step range.
    
    Returns:
        config: Config dictionary to pass to environment constructor.
    """

    ############################################################################
    # Physics
    ############################################################################

    elastic_collision = physics_lib.Collision(elasticity=1.,
                                              symmetric=False,
                                              update_angle_vel=False)
    physics = physics_lib.Physics(
        (elastic_collision, 'ball', 'walls'),
        updates_per_env_step=10,
    )

    def _predict_trial_end(state):
        """Predict whether a trial will end in step range and true response.

        Args:
            state: OrderedDict of sprite layers. Initial state of environment.
        
        Returns:
            valid_trial: Bool. Whether trial will end with number of steps in
                valid_step_range.
            contact_color: Binary. 0 if ball will contact red first, 1 if it
                will contact green first.
        """
        for step in range(valid_step_range[1]):
            red_overlap = state['ball'][0].overlaps_sprite(state['red'][0])
            green_overlap = state['ball'][0].overlaps_sprite(state['green'][0])
            if red_overlap or green_overlap:
                if step < valid_step_range[0]:
                    return False, None
                else:
                    contact_color = 0 if red_overlap else 1
                    return True, contact_color
            physics.step(state)
        return False, None

    ############################################################################
    # Sprite initialization
    ############################################################################

    # Ball generator
    ball_factors = distribs.Product(
        [
            distribs.Continuous('x', 0.15, 0.85),
            distribs.Continuous('y', 0.15, 0.85),
            RadialVelocity(speed=0.03)
        ],
        scale=0.05,
        shape='circle',
        c0=64,
        c1=64,
        c2=255,
    )
    ball_generator = sprite_generators.generate_sprites(
        ball_factors,
        num_sprites=1,
        max_recursion_depth=100,
        fail_gracefully=True)

    # Obstacle generator
    obstacle_factors = distribs.Product(
        [
            distribs.Continuous('x', 0.2, 0.8),
            distribs.Continuous('y', 0.2, 0.8)
        ],
        scale=0.2,
        shape='square',
        c0=128,
        c1=128,
        c2=128,
    )
    obstacle_generator = sprite_generators.generate_sprites(
        obstacle_factors,
        num_sprites=2 + num_obstacles,
        max_recursion_depth=100,
        fail_gracefully=True)

    # Walls
    bottom_wall = [[-1, 0.1], [2, 0.1], [2, -1], [-1, -1]]
    top_wall = [[-1, 0.95], [2, 0.95], [2, 2], [-1, 2]]
    left_wall = [[0.05, -1], [0.05, 4], [-1, 4], [-1, -1]]
    right_wall = [[0.95, -1], [0.95, 4], [2, 4], [2, -1]]
    walls = [
        sprite.Sprite(shape=np.array(v), x=0, y=0, c0=128, c1=128, c2=128)
        for v in [bottom_wall, top_wall, left_wall, right_wall]
    ]

    def state_initializer():
        """Callable returning new state at each episode reset."""
        obstacles = obstacle_generator(disjoint=True)
        ball = ball_generator(without_overlapping=obstacles)
        if len(obstacles) < num_obstacles + 2 or not ball:
            # Max recursion depth failed trying to generate without overlapping
            return state_initializer()

        red = obstacles[0]
        green = obstacles[1]
        obstacles = obstacles[2:]

        # Set the colors of the red and green boxes
        red.c0 = 255
        red.c1 = 0
        red.c2 = 0
        green.c0 = 0
        green.c1 = 255
        green.c2 = 0

        # Create agent and response tokens at the bottom of the sreen
        agent = sprite.Sprite(x=0.5,
                              y=0.06,
                              shape='spoke_4',
                              scale=0.03,
                              c0=255,
                              c1=255,
                              c2=255)
        responses = [
            sprite.Sprite(x=0.6,
                          y=0.06,
                          shape='square',
                          scale=0.03,
                          c0=255,
                          c1=0,
                          c2=0),
            sprite.Sprite(x=0.4,
                          y=0.06,
                          shape='square',
                          scale=0.03,
                          c0=0,
                          c1=255,
                          c2=0),
        ]

        state = collections.OrderedDict([
            ('walls', walls + obstacles),
            ('red', [red]),
            ('green', [green]),
            ('ball', ball),
            ('responses', responses),
            ('agent', [agent]),
        ])

        # Rejection sampling if trial won't finish in valid step range
        original_ball_position = np.copy(ball[0].position)
        original_ball_velocity = np.copy(ball[0].velocity)
        valid_trial, contact_red = _predict_trial_end(state)
        if valid_trial:
            ball[0].position = original_ball_position
            ball[0].velocity = original_ball_velocity
            agent.metadata = {'true_contact_color': contact_red}
        else:
            return state_initializer()

        return state

    ############################################################################
    # Task
    ############################################################################

    def _reward_fn(sprite_agent, sprite_response):
        response_green = sprite_response.c0 < 128
        if sprite_agent.metadata['true_contact_color'] == response_green:
            return 1.
        else:
            return -1.

    contact_reward = tasks.ContactReward(
        reward_fn=_reward_fn,
        layers_0='agent',
        layers_1='responses',
        reset_steps_after_contact=10,
    )
    task = tasks.CompositeTask(contact_reward, timeout_steps=400)

    ############################################################################
    # Action space
    ############################################################################

    action_space = action_spaces.Grid(
        scaling_factor=0.015,
        action_layers='agent',
        control_velocity=True,
    )

    ############################################################################
    # Observer
    ############################################################################

    observer = observers.PILRenderer(image_size=(64, 64), anti_aliasing=1)

    ############################################################################
    # Game rules
    ############################################################################

    # Stop ball on contact with red or green box
    def _stop_ball(s):
        s.velocity = np.zeros(2)

    stop_ball = game_rules.ModifyOnContact(layers_0='ball',
                                           layers_1=('red', 'green'),
                                           modifier_0=_stop_ball)

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {
            'image': observer
        },
        'game_rules': (stop_ball, ),
    }
    return config
Exemplo n.º 14
0
def get_config(_):
    """Get environment config"""

    ############################################################################
    # Sprite initialization
    ############################################################################

    # Ball generator
    ball_factors = distribs.Product(
        [
            distribs.Continuous('x', 0.25, 0.75),
            distribs.Continuous('y', 0.5, 0.9),
            distribs.Continuous('x_vel', -0.01, 0.01)
        ],
        scale=0.1,
        shape='circle',
        c0=0,
        c1=0,
        c2=255,
        mass=1.,
    )
    ball_generator = sprite_generators.generate_sprites(ball_factors,
                                                        num_sprites=4)

    # Walls
    bottom_wall = [[-1, 0.1], [2, 0.1], [2, -1], [-1, -1]]
    left_wall = [[0.05, -0.1], [0.05, 1.1], [-1, 1.1], [-1, -0.1]]
    right_wall = [[0.95, -0.1], [0.95, 1.1], [2, 1.1], [2, -0.1]]
    divider = [[0.45, -1], [0.45, 0.3], [0.55, 0.3], [0.55, -1]]
    walls = [
        sprite_lib.Sprite(shape=np.array(v), x=0, y=0, c0=128, c1=128, c2=128)
        for v in [bottom_wall, left_wall, right_wall, divider]
    ]

    def state_initializer():
        """Callable returning new state at each episode reset."""
        state = collections.OrderedDict([
            ('walls', walls),
            ('balls', ball_generator(disjoint=True)),
            ('agent', []),
        ])

        return state

    ############################################################################
    # Physics
    ############################################################################

    # Setting max_recursion_depth > 0 can increase stability
    # Setting update_angle_vel = False is recommended for stability
    collision = physics_lib.Collision(
        elasticity=0.6,
        symmetric=False,
        update_angle_vel=False,
        max_recursion_depth=2,
    )
    physics = physics_lib.Physics(
        (collision, 'balls', ['balls', 'walls']),
        (physics_lib.DownGravity(g=-0.001), 'balls'),
        updates_per_env_step=20,
    )

    ############################################################################
    # Task
    ############################################################################

    task = tasks.CompositeTask(timeout_steps=100)

    ############################################################################
    # Action space
    ############################################################################

    # Need an action space, so let it control an empty agent layer
    action_space = action_spaces.Grid(action_layers='agent')

    ############################################################################
    # Observer
    ############################################################################

    observer = observers.PILRenderer(image_size=(64, 64), anti_aliasing=1)

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {
            'image': observer
        },
        'game_rules': (),
    }
    return config
Exemplo n.º 15
0
def get_config(_):
    """Get environment config."""

    ############################################################################
    # Sprite initialization
    ############################################################################

    # Agent
    agent_factors = distribs.Product(
        [
            distribs.Continuous('x', 0.1, 0.9),
            distribs.Continuous('y', 0.1, 0.9)
        ],
        shape='circle',
        scale=0.1,
        c0=0.33,
        c1=1.,
        c2=0.7,
    )

    # Predators
    predator_factors = distribs.Product(
        [
            distribs.Continuous('x', 0.1, 0.9),
            distribs.Continuous('y', 0.1, 0.9)
        ],
        shape='circle',
        scale=0.1,
        c0=0.,
        c1=1.,
        c2=0.8,
    )

    # Prey
    prey_factors = distribs.Product(
        [
            distribs.Continuous('x', 0.1, 0.9),
            distribs.Continuous('y', 0.1, 0.9)
        ],
        shape='circle',
        scale=0.1,
        c0=0.2,
        c1=1.,
        c2=1.,
    )

    # Boosters
    booster_factors = distribs.Product(
        [
            distribs.Continuous('x', 0.1, 0.9),
            distribs.Continuous('y', 0.1, 0.9)
        ],
        shape='triangle',
        scale=0.1,
        c0=0.6,
        c1=1.,
        c2=1.,
    )

    # Portals
    portal_factors = dict(shape='square', scale=0.1, c0=0., c1=0., c2=0.95)
    portal_sprites = [
        sprite.Sprite(x=0.125, y=0.125, **portal_factors),
        sprite.Sprite(x=0.875, y=0.875, **portal_factors),
    ]

    # Walls
    wall_color = dict(c0=0., c1=0., c2=0.5)
    island_wall_shape_0 = np.array([[0.2, 0.2], [0.4, 0.2], [0.4, 0.4],
                                    [0.2, 0.4]])
    island_wall_shapes = [
        island_wall_shape_0,
        island_wall_shape_0 + np.array([[0., 0.4]]),
        island_wall_shape_0 + np.array([[0.4, 0.4]]),
        island_wall_shape_0 + np.array([[0.4, 0.]]),
    ]
    island_walls = [
        sprite.Sprite(shape=shape, x=0., y=0., **wall_color)
        for shape in island_wall_shapes
    ]
    boundary_walls = shapes.border_walls(visible_thickness=0.05, **wall_color)
    walls = boundary_walls + island_walls

    # Callable sprite generators
    agent_generator = sprite_generators.generate_sprites(agent_factors,
                                                         num_sprites=1)
    predator_generator = sprite_generators.generate_sprites(predator_factors,
                                                            num_sprites=1)
    prey_generator = sprite_generators.generate_sprites(
        prey_factors, num_sprites=lambda: np.random.randint(2, 5))
    booster_generator = sprite_generators.generate_sprites(booster_factors,
                                                           num_sprites=2)

    # Create callable initializer returning entire state
    def state_initializer():
        portals = portal_sprites
        agent = agent_generator(without_overlapping=walls)
        predators = predator_generator(without_overlapping=walls + agent)
        boosters = booster_generator(without_overlapping=walls + agent)
        prey = prey_generator(without_overlapping=walls)
        state = collections.OrderedDict([
            ('walls', walls),
            ('portals', portals),
            ('boosters', boosters),
            ('prey', prey),
            ('predators', predators),
            ('agent', agent),
        ])
        return state

    ############################################################################
    # Physics
    ############################################################################

    agent_friction_force = physics_lib.Drag(coeff_friction=0.25)
    predator_friction_force = physics_lib.Drag(coeff_friction=0.05)
    predator_random_force = physics_lib.RandomForce(max_force_magnitude=0.02)
    prey_friction_force = physics_lib.Drag(coeff_friction=0.02)
    prey_random_force = physics_lib.RandomForce(max_force_magnitude=0.02)
    predator_attraction = physics_lib.DistanceForce(
        force_fn=physics_lib.linear_force_fn(zero_intercept=-0.002,
                                             slope=0.001))
    asymmetric_collision = physics_lib.Collision(elasticity=0.25,
                                                 symmetric=False,
                                                 update_angle_vel=False)

    forces = (
        (agent_friction_force, 'agent'),
        (predator_friction_force, 'predators'),
        (predator_random_force, 'predators'),
        (prey_friction_force, 'prey'),
        (prey_random_force, 'prey'),
        (predator_attraction, 'agent', 'predators'),
        (asymmetric_collision, ['agent', 'predators', 'prey'], 'walls'),
    )

    physics = physics_lib.Physics(*forces, updates_per_env_step=5)

    ############################################################################
    # Task
    ############################################################################

    predator_task = tasks.ContactReward(-5,
                                        layers_0='agent',
                                        layers_1='predators',
                                        reset_steps_after_contact=0)
    prey_task = tasks.ContactReward(1, layers_0='agent', layers_1='prey')
    reset_task = tasks.Reset(
        condition=lambda state: len(state['prey']) == 0,
        steps_after_condition=5,
    )
    task = tasks.CompositeTask(predator_task,
                               prey_task,
                               reset_task,
                               timeout_steps=400)

    ############################################################################
    # Action space
    ############################################################################

    action_space = action_spaces.Joystick(
        scaling_factor=0.01,
        action_layers='agent',
    )

    ############################################################################
    # Observer
    ############################################################################

    observer = observers.PILRenderer(
        image_size=(64, 64),
        anti_aliasing=1,
        color_to_rgb='hsv_to_rgb',
    )

    ############################################################################
    # Game rules
    ############################################################################

    disappear_rule = game_rules.VanishOnContact(vanishing_layer='prey',
                                                contacting_layer='agent')
    portal_rule = game_rules.Portal(teleporting_layer='agent',
                                    portal_layer='portals')
    rules = (disappear_rule, portal_rule, Booster())

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {
            'image': observer
        },
        'game_rules': rules,
    }
    return config
Exemplo n.º 16
0
def get_config(_):
    """Get environment config."""

    ############################################################################
    # Sprite initialization
    ############################################################################

    # Occluder
    occluder_shape = np.array([[-0.1, 0.2], [1.1, 0.2], [1.1, 0.6],
                               [-0.1, 0.6]])
    occluder = sprite.Sprite(x=0.,
                             y=0.,
                             shape=occluder_shape,
                             scale=1.,
                             c0=0.6,
                             c1=1.,
                             c2=1.)

    # Prey
    prey_factors = distribs.Product(
        [
            distribs.Continuous('x', 0.1, 0.8),
            distribs.Continuous('x_vel', -0.01, 0.01)
        ],
        y=1.2,
        y_vel=-0.007,
        shape='circle',
        scale=0.07,
        c0=0.2,
        c1=1.,
        c2=1.,
    )

    # Walls
    left_wall = [[0.05, -0.2], [0.05, 2], [-1, 2], [-1, -0.2]]
    right_wall = [[0.95, -0.2], [0.95, 2], [2, 2], [2, -0.2]]
    walls = [
        sprite.Sprite(shape=np.array(v), x=0, y=0, c0=0., c1=0., c2=0.5)
        for v in [left_wall, right_wall]
    ]

    def state_initializer():
        agent = sprite.Sprite(x=0.5,
                              y=0.1,
                              shape='square',
                              aspect_ratio=0.2,
                              scale=0.1,
                              c0=0.33,
                              c1=1.,
                              c2=0.66)
        state = collections.OrderedDict([
            ('walls', walls),
            ('prey', [sprite.Sprite(**prey_factors.sample())]),
            ('agent', [agent]),
            ('occluder', [occluder]),
        ])
        return state

    ############################################################################
    # Physics
    ############################################################################

    agent_friction_force = physics_lib.Drag(coeff_friction=0.25)
    asymmetric_collision = physics_lib.Collision(elasticity=1.,
                                                 symmetric=False,
                                                 update_angle_vel=False)
    inelastic_collision = physics_lib.Collision(elasticity=0.,
                                                symmetric=False,
                                                update_angle_vel=False)
    physics = physics_lib.Physics(
        (agent_friction_force, 'agent'),
        (inelastic_collision, 'agent', 'walls'),
        (asymmetric_collision, 'prey', 'walls'),
        updates_per_env_step=10,
    )

    ############################################################################
    # Task
    ############################################################################

    contact_task = tasks.ContactReward(1., layers_0='agent', layers_1='prey')
    reset_task = tasks.Reset(
        condition=lambda state: all([s.y < 0. for s in state['prey']]),
        steps_after_condition=15,
    )
    task = tasks.CompositeTask(contact_task, reset_task)

    ############################################################################
    # Action space
    ############################################################################

    action_space = action_spaces.Joystick(scaling_factor=0.002,
                                          action_layers='agent',
                                          constrained_lr=True)

    ############################################################################
    # Observer
    ############################################################################

    observer = observers.PILRenderer(image_size=(64, 64),
                                     anti_aliasing=1,
                                     color_to_rgb='hsv_to_rgb')

    ############################################################################
    # Game rules
    ############################################################################

    prey_vanish = game_rules.VanishOnContact(
        vanishing_layer='prey',
        contacting_layer='agent',
    )
    rules = (prey_vanish, )

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {
            'image': observer
        },
        'game_rules': rules,
    }
    return config
def get_config(num_targets):
    """Get environment config.
    
    Args:
        num_targets: Int. Number of targets.
    """

    ############################################################################
    # Sprite initialization
    ############################################################################

    # Target circles
    target_factors = distribs.Product(
        [
            distribs.Continuous('x', 0.1, 0.9),
            distribs.Continuous('y', 0.1, 0.9),
            RadialVelocity(speed=0.01),
        ],
        scale=0.1,
        shape='circle',
        c0=0.,
        c1=0.,
        c2=0.9,
    )

    # Target bars
    bar_factors = dict(scale=0.1,
                       shape='square',
                       aspect_ratio=0.3,
                       c0=0.,
                       c1=0.,
                       c2=0.2)

    # Walls
    bottom_wall = [[-1, 0], [2, 0], [2, -1], [-1, -1]]
    top_wall = [[-1, 1], [2, 1], [2, 2], [-1, 2]]
    left_wall = [[0, -1], [0, 4], [-1, 4], [-1, -1]]
    right_wall = [[1, -1], [1, 4], [2, 4], [2, -1]]
    walls = [
        sprite.Sprite(shape=np.array(v), x=0, y=0, c0=0., c1=0., c2=0.5)
        for v in [bottom_wall, top_wall, left_wall, right_wall]
    ]

    # Occluder
    occluder_factors = dict(x=0.5, y=0.5, c0=0.6, c1=0.25, c2=0.5, opacity=0)

    # Cross shape for agent and fixation cross
    cross_shape = 0.1 * np.array([[-5, 1], [-1, 1], [-1, 5], [1, 5], [1, 1],
                                  [5, 1], [5, -1], [1, -1], [1, -5], [-1, -5],
                                  [-1, -1], [-5, -1]])

    def state_initializer():

        fixation = sprite.Sprite(x=0.5,
                                 y=0.5,
                                 shape=cross_shape,
                                 scale=0.1,
                                 c0=0.,
                                 c1=0.,
                                 c2=0.)
        screen = sprite.Sprite(x=0.5,
                               y=0.5,
                               shape='square',
                               scale=2.,
                               c0=0.,
                               c1=0.,
                               c2=1.)

        agent = sprite.Sprite(x=0.5,
                              y=0.5,
                              scale=0.04,
                              shape=cross_shape,
                              c0=0.33,
                              c1=1.,
                              c2=1.)
        occluder_shape = shapes.annulus_vertices(0.13, 2.)
        occluder = sprite.Sprite(shape=occluder_shape, **occluder_factors)

        targets = [
            sprite.Sprite(**target_factors.sample())
            for _ in range(num_targets)
        ]

        bar_angles = 0.5 * np.pi * np.random.binomial(1, 0.5, (num_targets))
        bars = [
            sprite.Sprite(x=s.x,
                          y=s.y,
                          x_vel=s.x_vel,
                          y_vel=s.y_vel,
                          angle=angle,
                          **bar_factors)
            for s, angle in zip(targets, bar_angles)
        ]

        state = collections.OrderedDict([
            ('walls', walls),
            ('targets', targets),
            ('bars', bars),
            ('occluder', [occluder]),
            ('screen', [screen]),
            ('fixation', [fixation]),
            ('agent', [agent]),
        ])
        return state

    ############################################################################
    # Physics
    ############################################################################

    elastic_collision = physics_lib.Collision(elasticity=1.,
                                              symmetric=False,
                                              update_angle_vel=False)
    tether = physics_lib.TetherZippedLayers(layer_names=('targets', 'bars'),
                                            update_angle_vel=False)
    physics = physics_lib.Physics(
        (elastic_collision, 'targets', 'walls'),
        updates_per_env_step=10,
        corrective_physics=[tether],
    )

    ############################################################################
    # Task
    ############################################################################

    def _reward_condition(_, meta_state):
        return meta_state['phase'] == 'reward'

    task = tasks.Reset(
        condition=_reward_condition,
        reward_fn=lambda _: 1,
        steps_after_condition=10,
    )

    ############################################################################
    # Action space
    ############################################################################

    action_space = action_spaces.SetPosition(action_layers=('agent',
                                                            'occluder'))

    ############################################################################
    # Observer
    ############################################################################

    observer = observers.PILRenderer(
        image_size=(64, 64),
        anti_aliasing=1,
        color_to_rgb=observers.color_maps.hsv_to_rgb,
    )

    ############################################################################
    # Game rules
    ############################################################################

    # Fixation phase

    fixation_rule = gr.Fixation('agent', 'fixation', _FIXATION_THRESHOLD,
                                'fixation_duration')

    def _should_end_fixation(_, meta_state):
        return meta_state['fixation_duration'] >= 15

    fixation_phase = gr.Phase(
        continual_rules=fixation_rule,
        end_condition=_should_end_fixation,
        name='fixation',
    )

    # Visible Phase

    vanish_fixation = gr.VanishByFilter('fixation', lambda _: True)
    vanish_screen = gr.VanishByFilter('screen', lambda _: True)

    visible_phase = gr.Phase(
        one_time_rules=[vanish_fixation, vanish_screen],
        duration=5,
        name='visible',
    )

    # Tracking Phase

    def _make_opaque(s):
        s.opacity = 255

    appear_occluder = gr.ModifySprites('occluder', _make_opaque)

    tracking_phase = gr.Phase(
        one_time_rules=appear_occluder,
        duration=lambda: np.random.randint(40, 80),
        name='tracking',
    )

    # Change Phase

    fixation_response_rule = gr.Fixation('agent', 'targets',
                                         _FIXATION_THRESHOLD,
                                         'response_duration')

    def _should_end_change(_, meta_state):
        return meta_state['response_duration'] >= 30

    change_phase = gr.Phase(
        one_time_rules=ChangeTargetFeature(),
        continual_rules=fixation_response_rule,
        name='change',
        end_condition=_should_end_change,
    )

    # Reward Phase

    def _make_transparent(s):
        s.opacity = 0

    disappear_occluder = gr.ModifySprites('occluder', _make_transparent)

    def _glue(s):
        s.velocity = np.zeros(2)

    glue_targets = gr.ModifySprites(('targets', 'bars'), _glue)

    reward_phase = gr.Phase(
        one_time_rules=(disappear_occluder, glue_targets),
        name='reward',
    )

    phase_sequence = gr.PhaseSequence(
        fixation_phase,
        visible_phase,
        tracking_phase,
        change_phase,
        reward_phase,
        meta_state_phase_name_key='phase',
    )

    ############################################################################
    # Final config
    ############################################################################

    config = {
        'state_initializer': state_initializer,
        'physics': physics,
        'task': task,
        'action_space': action_space,
        'observers': {
            'image': observer,
            'state': observers.RawState()
        },
        'game_rules': (phase_sequence, ),
        'meta_state_initializer': lambda: {
            'phase': ''
        },
    }
    return config