示例#1
0
    def __init__(self,
                 observation_space,
                 action_space,
                 eval_mode=False,
                 ignore_response=True,
                 discretization_bounds=(0.0, 10.0),
                 number_bins=100,
                 exploration_policy='epsilon_greedy',
                 exploration_temperature=0.99,
                 learning_rate=0.1,
                 gamma=0.99,
                 **kwargs):
        """TabularQAgent init.

    Args:
      observation_space: a gym.spaces object specifying the format of
        observations.
      action_space: a gym.spaces object that specifies the format of actions.
      eval_mode: Boolean indicating whether the agent is in training or eval
        mode.
      ignore_response: Boolean indicating whether the agent should ignore the
        response part of the observation.
      discretization_bounds: pair of real numbers indicating the min and max
        value for continuous attributes discretization. Values below the min
        will all be grouped in the first bin, while values above the max will
        all be grouped in the last bin. See the documentation of numpy.digitize
        for further details.
      number_bins: positive integer number of bins used to discretize continuous
        attributes.
      exploration_policy: either one of ['epsilon_greedy', 'min_count'] or a
        custom function. TODO(mmladenov): formalize requirements of this
          function.
      exploration_temperature: a real number passed as parameter to the
        exploration policy.
      learning_rate: a real number between 0 and 1 indicating how much to update
        Q-values, i.e. Q_t+1(s,a) = (1 - learning_rate) * Q_t(s, a)
                                     + learning_rate * (R(s,a) + ...).
      gamma: real value between 0 and 1 indicating the discount factor of the
        MDP.
      **kwargs: additional arguments like eval_mode.
    """
        self._kwargs = kwargs
        super(TabularQAgent, self).__init__(action_space)
        # hard params
        self._gamma = gamma
        self._eval_mode = eval_mode
        self._previous_slate = None
        self._learning_rate = learning_rate
        # storage
        self._q_value_table = {}
        self._state_action_counts = {}
        self._previous_state_action_index = None
        # discretization and spaces
        self._discretization_bins = np.linspace(discretization_bounds[0],
                                                discretization_bounds[1],
                                                num=number_bins)
        single_doc_space = list(
            observation_space.spaces['doc'].spaces.values())[0]
        slate_tuple = tuple([single_doc_space] * self._slate_size)
        action_space = spaces.Tuple(slate_tuple)
        self._ignore_response = ignore_response
        state_action_space = {
            'user': observation_space.spaces['user'],
            'action': action_space
        }
        if not self._ignore_response:
            state_action_space['response'] = observation_space.spaces[
                'response']
        self._state_action_space = spaces.Dict(state_action_space)
        self._observation_featurizer = agent_utils.GymSpaceWalker(
            self._state_action_space, self._discretize_gym_leaf)
        # exploration
        self._exploration_policy = exploration_policy
        self._exploration_temperature = exploration_temperature
        self._base_exploration_temperature = self._exploration_temperature
        self._exploration_functions = {
            'epsilon_greedy':
            lambda observation: agent_utils.epsilon_greedy_exploration(  # pylint: disable=g-long-lambda
                self._enumerate_state_action_indices(observation), self.
                _q_value_table, self._exploration_temperature),
            'min_count':
            lambda observation: agent_utils.min_count_exploration(  # pylint: disable=g-long-lambda
                self._enumerate_state_action_indices(observation), self.
                _state_action_counts)
        }
示例#2
0
    def __init__(self, config, home_team, away_team, opp_actor=None):
        self.__version__ = "0.0.3"
        self.config = config
        self.config.competition_mode = False
        self.config.fast_mode = True
        self.game = None
        self.team_id = None
        self.ruleset = load_rule_set(config.ruleset, all_rules=False)
        self.home_team = home_team
        self.away_team = away_team
        self.actor = Agent("Gym Learner", human=True)
        self.opp_actor = opp_actor if opp_actor is not None else RandomBot(
            "Random")
        self._seed = None
        self.seed()
        self.root = None
        self.cv = None
        self.last_obs = None
        self.last_report_idx = 0
        self.last_ball_team = None
        self.last_ball_x = None
        self.own_team = None
        self.opp_team = None

        self.layers = [
            OccupiedLayer(),
            OwnPlayerLayer(),
            OppPlayerLayer(),
            OwnTackleZoneLayer(),
            OppTackleZoneLayer(),
            UpLayer(),
            StunnedLayer(),
            UsedLayer(),
            RollProbabilityLayer(),
            BlockDiceLayer(),
            ActivePlayerLayer(),
            TargetPlayerLayer(),
            MALayer(),
            STLayer(),
            AGLayer(),
            AVLayer(),
            MovemenLeftLayer(),
            BallLayer(),
            OwnHalfLayer(),
            OwnTouchdownLayer(),
            OppTouchdownLayer(),
            SkillLayer(Skill.BLOCK),
            SkillLayer(Skill.DODGE),
            SkillLayer(Skill.SURE_HANDS),
            SkillLayer(Skill.CATCH),
            SkillLayer(Skill.PASS)
        ]

        for action_type in FFAIEnv.positional_action_types:
            self.layers.append(AvailablePositionLayer(action_type))

        arena = load_arena(self.config.arena)

        self.observation_space = spaces.Dict({
            'board':
            spaces.Box(low=0,
                       high=1,
                       shape=(len(self.layers), arena.height, arena.width)),
            'state':
            spaces.Box(low=0, high=1, shape=(50, )),
            'procedures':
            spaces.Box(low=0, high=1, shape=(len(FFAIEnv.procedures), )),
            'available-action-types':
            spaces.Box(low=0, high=1, shape=(len(FFAIEnv.actions), ))
        })

        self.action_space = spaces.Dict({
            'action-type':
            spaces.Discrete(len(FFAIEnv.actions)),
            'x':
            spaces.Discrete(arena.width),
            'y':
            spaces.Discrete(arena.height)
        })
示例#3
0
    def __init__(self,
                 env,
                 pixels_only=True,
                 render_kwargs=None,
                 normalize=False,
                 observation_key='pixels',
                 camera_ids=(-1, )):
        """Initializes a new pixel Wrapper.

        Args:
            env: The environment to wrap.
            pixels_only: If `True` (default), the original observation returned
                by the wrapped environment will be discarded, and a dictionary
                observation will only include pixels. If `False`, the
                observation dictionary will contain both the original
                observations and the pixel observations.
            render_kwargs: Optional `dict` containing keyword arguments passed
                to the `self.render` method.
            observation_key: Optional custom string specifying the pixel
                observation's key in the `OrderedDict` of observations.
                Defaults to 'pixels'.

        Raises:
            ValueError: If `env`'s observation spec is not compatible with the
                wrapper. Supported formats are a single array, or a dict of
                arrays.
            ValueError: If `env`'s observation already contains the specified
                `observation_key`.
        """
        super(PixelObservationWrapper, self).__init__(env)
        if render_kwargs is None:
            render_kwargs = {}

        render_mode = render_kwargs.pop('mode', 'rgb_array')
        assert render_mode == 'rgb_array', render_mode
        render_kwargs['mode'] = 'rgb_array'

        # Specify number of cameras and their ids to render with
        # The observation will become a depthwise concatenation of all the
        # images gathered from these specified cameras.
        self._camera_ids = camera_ids
        self._render_kwargs_per_camera = [{
            'mode': 'rgb_array',
            'width': render_kwargs['width'],
            'height': render_kwargs['height'],
            'camera_id': camera_id,
        } for camera_id in self._camera_ids]

        wrapped_observation_space = env.observation_space

        self._env = env
        self._pixels_only = pixels_only
        self._render_kwargs = render_kwargs
        self._observation_key = observation_key
        if 'box_warp' in render_kwargs.keys():
            self._box_warp = render_kwargs.pop('box_warp')
        else:
            self._box_warp = False

        # import ipdb; ipdb.set_trace()
        if isinstance(wrapped_observation_space, spaces.Box):
            self._observation_is_dict = False
            invalid_keys = set([STATE_KEY])
        elif isinstance(wrapped_observation_space,
                        (spaces.Dict, collections.MutableMapping)):
            self._observation_is_dict = True
            invalid_keys = set(wrapped_observation_space.spaces.keys())
        else:
            raise ValueError("Unsupported observation space structure.")

        if not pixels_only and observation_key in invalid_keys:
            raise ValueError(
                "Duplicate or reserved observation key {!r}.".format(
                    observation_key))

        if pixels_only:
            self.observation_space = spaces.Dict()
        elif self._observation_is_dict:
            self.observation_space = copy.deepcopy(wrapped_observation_space)
        else:
            self.observation_space = spaces.Dict()
            self.observation_space.spaces[
                STATE_KEY] = wrapped_observation_space

        self._normalize = normalize

        # Extend observation space with pixels.
        pixels = self._get_pixels()

        if np.issubdtype(pixels.dtype, np.integer):
            low, high = (0, 255)
        elif np.issubdtype(pixels.dtype, np.float):
            low, high = (-float('inf'), float('inf')
                         )  # Fix for normalized between [-1, 1]
        else:
            raise TypeError(pixels.dtype)

        pixels_space = spaces.Box(shape=pixels.shape,
                                  low=low,
                                  high=high,
                                  dtype=pixels.dtype)
        self.observation_space.spaces[observation_key] = pixels_space
示例#4
0
tf1, tf, tfv = try_import_tf()
_, nn = try_import_torch()

DICT_SPACE = spaces.Dict({
    "sensors":
    spaces.Dict({
        "position":
        spaces.Box(low=-100, high=100, shape=(3, )),
        "velocity":
        spaces.Box(low=-1, high=1, shape=(3, )),
        "front_cam":
        spaces.Tuple((spaces.Box(low=0, high=1, shape=(10, 10, 3)),
                      spaces.Box(low=0, high=1, shape=(10, 10, 3)))),
        "rear_cam":
        spaces.Box(low=0, high=1, shape=(10, 10, 3)),
    }),
    "inner_state":
    spaces.Dict({
        "charge":
        spaces.Discrete(100),
        "job_status":
        spaces.Dict({
            "task": spaces.Discrete(5),
            "progress": spaces.Box(low=0, high=100, shape=()),
        })
    })
})

DICT_SAMPLES = [DICT_SPACE.sample() for _ in range(10)]

TUPLE_SPACE = spaces.Tuple([
示例#5
0
    def __init__(self, env=None):
        # BowV1Env attributes
        self.env_id = 'NovelGridworld-Bow-v1'
        self.env = env  # env to restore in reset
        self.map_size = 10
        self.map = np.zeros((self.map_size, self.map_size),
                            dtype=int)  # 2D Map
        self.agent_location = (1, 1)  # row, column
        self.direction_id = {'NORTH': 0, 'SOUTH': 1, 'WEST': 2, 'EAST': 3}
        self.agent_facing_str = 'NORTH'
        self.agent_facing_id = self.direction_id[self.agent_facing_str]
        self.block_in_front_str = 'air'
        self.block_in_front_id = 0  # air
        self.block_in_front_location = (0, 0)  # row, column
        self.items = {
            'air', 'bow', 'crafting_table', 'plank', 'stick', 'string',
            'tree_log', 'wall', 'wool'
        }
        self.items_id = self.set_items_id(
            self.items
        )  # {'crafting_table': 1, 'plank': 2, ...}  # air's ID is 0
        self.unbreakable_items = {'air', 'wall'}
        self.goal_item_to_craft = 'bow'
        # items_quantity when the episode starts, do not include wall, quantity must be more than 0
        self.items_quantity = {'crafting_table': 1, 'tree_log': 3, 'wool': 2}
        self.inventory_items_quantity = {item: 0 for item in self.items}
        self.selected_item = ''
        self.entities = set()
        self.available_locations = []  # locations that do not have item placed
        self.not_available_locations = [
        ]  # locations that have item placed or are above, below, left, right to an item

        # Action Space
        self.actions_id = dict()
        self.manipulation_actions_id = {
            'Forward': 0,
            'Left': 1,
            'Right': 2,
            'Break': 3,
            'Extract_string': 4
        }
        self.actions_id.update(self.manipulation_actions_id)
        self.recipes = {
            'bow': {
                'input': {
                    'stick': 3,
                    'string': 3
                },
                'output': {
                    'bow': 1
                }
            },
            'stick': {
                'input': {
                    'plank': 2
                },
                'output': {
                    'stick': 4
                }
            },
            'plank': {
                'input': {
                    'tree_log': 1
                },
                'output': {
                    'plank': 4
                }
            }
        }
        # Add a Craft action for each recipe
        self.craft_actions_id = {
            'Craft_' + item: len(self.actions_id) + i
            for i, item in enumerate(sorted(self.recipes.keys()))
        }
        self.actions_id.update(self.craft_actions_id)
        # Add a Select action for each item except unbreakable items
        self.select_actions_id = {
            'Select_' + item: len(self.actions_id) + i
            for i, item in enumerate(
                sorted(list(self.items ^ self.unbreakable_items)))
        }
        self.actions_id.update(self.select_actions_id)
        self.action_space = spaces.Discrete(len(self.actions_id))

        self.last_action = 'Forward'  # last actions executed
        self.step_count = 0  # no. of steps taken
        self.last_step_cost = 0  # last received step_cost

        # Observation Space
        self.max_items = 20
        self.observation_space = spaces.Box(low=0,
                                            high=self.max_items,
                                            shape=(self.map_size,
                                                   self.map_size, 1))
        self.observation_space = spaces.Dict({'map': self.observation_space})

        # Reward
        self.last_reward = 0  # last received reward
        self.reward_intermediate = 10
        self.reward_done = 50

        self.last_done = False  # last done
示例#6
0
    def __init__(
            self,
            render_dt_msec=0,
            action_l2norm_penalty=0,  # disabled for now
            render_onscreen=False,
            render_size=84,
            reward_type="dense",
            action_scale=1.0,
            target_radius=0.60,
            boundary_dist=4,
            ball_radius=0.50,
            include_colors_in_obs=False,
            walls=None,
            num_colors=8,
            change_colors=True,
            fixed_colors=False,
            fixed_goal=None,
            randomize_position_on_reset=True,
            images_are_rgb=False,  # else black and white
            show_goal=True,
            use_env_labels=False,
            num_objects=1,
            include_white=False,
            **kwargs):
        if walls is None:
            walls = []
        if walls is None:
            walls = []
        if fixed_goal is not None:
            fixed_goal = np.array(fixed_goal)
        if len(kwargs) > 0:
            LOGGER = logging.getLogger(__name__)
            LOGGER.log(logging.WARNING, "WARNING, ignoring kwargs:", kwargs)
        self.quick_init(locals())
        self.render_dt_msec = render_dt_msec
        self.action_l2norm_penalty = action_l2norm_penalty
        self.render_onscreen = render_onscreen
        self.render_size = render_size
        self.reward_type = reward_type
        self.action_scale = action_scale
        self.target_radius = target_radius
        self.boundary_dist = boundary_dist
        self.ball_radius = ball_radius
        self.walls = walls
        self.fixed_goal = fixed_goal
        self.randomize_position_on_reset = randomize_position_on_reset
        self.images_are_rgb = images_are_rgb
        self.show_goal = show_goal
        self.num_objects = num_objects
        self.max_target_distance = self.boundary_dist - self.target_radius
        self.change_colors = change_colors
        self.fixed_colors = fixed_colors
        self.num_colors = num_colors
        self._target_position = None
        self._position = np.zeros((2))
        self.include_white = include_white
        self.initial_pass = False
        self.white_passed = False

        if change_colors:
            self.randomize_colors()
        else:
            self.object_colors = [Color('blue')]

        u = np.ones(2)
        self.action_space = spaces.Box(-u, u, dtype=np.float32)
        self.include_colors_in_obs = include_colors_in_obs
        o = self.boundary_dist * np.ones(2)
        ohe_range = spaces.Box(np.zeros(self.num_colors),
                               np.ones(self.num_colors),
                               dtype='float32')
        if include_colors_in_obs and self.fixed_colors:
            high = np.concatenate((o, np.ones(self.num_colors)))
            low = np.concatenate((-o, np.zeros(self.num_colors)))
        else:
            high = o
            low = -o

        self.obs_range = spaces.Box(low, high, dtype='float32')
        self.use_env_labels = use_env_labels
        if self.use_env_labels:

            self.observation_space = spaces.Dict([
                ('label', ohe_range),
                ('observation', self.obs_range),
                ('desired_goal', self.obs_range),
                ('achieved_goal', self.obs_range),
                ('state_observation', self.obs_range),
                ('state_desired_goal', self.obs_range),
                ('state_achieved_goal', self.obs_range),
            ])

        else:
            self.observation_space = spaces.Dict([
                ('observation', self.obs_range),
                ('desired_goal', self.obs_range),
                ('achieved_goal', self.obs_range),
                ('state_observation', self.obs_range),
                ('state_desired_goal', self.obs_range),
                ('state_achieved_goal', self.obs_range),
            ])

        self.drawer = None
        self.render_drawer = None

        self.color_index = 0
        self.colors = [
            Color('green'),
            Color('red'),
            Color('blue'),
            Color('black'),
            Color('purple'),
            Color('brown'),
            Color('pink'),
            Color('orange'),
            Color('grey'),
            Color('yellow')
        ]
示例#7
0
    def _init_train(self):
        if self.config.RL.DDPPO.force_distributed:
            self._is_distributed = True

        if is_slurm_batch_job():
            add_signal_handlers()

        if self._is_distributed:
            local_rank, tcp_store = init_distrib_slurm(
                self.config.RL.DDPPO.distrib_backend
            )
            if rank0_only():
                logger.info(
                    "Initialized DD-PPO with {} workers".format(
                        torch.distributed.get_world_size()
                    )
                )

            self.config.defrost()
            self.config.TORCH_GPU_ID = local_rank
            self.config.SIMULATOR_GPU_ID = local_rank
            # Multiply by the number of simulators to make sure they also get unique seeds
            self.config.TASK_CONFIG.SEED += (
                torch.distributed.get_rank() * self.config.NUM_ENVIRONMENTS
            )
            self.config.freeze()

            random.seed(self.config.TASK_CONFIG.SEED)
            np.random.seed(self.config.TASK_CONFIG.SEED)
            torch.manual_seed(self.config.TASK_CONFIG.SEED)
            self.num_rollouts_done_store = torch.distributed.PrefixStore(
                "rollout_tracker", tcp_store
            )
            self.num_rollouts_done_store.set("num_done", "0")

        if rank0_only() and self.config.VERBOSE:
            logger.info(f"config: {self.config}")

        profiling_wrapper.configure(
            capture_start_step=self.config.PROFILING.CAPTURE_START_STEP,
            num_steps_to_capture=self.config.PROFILING.NUM_STEPS_TO_CAPTURE,
        )

        self._init_envs()

        ppo_cfg = self.config.RL.PPO
        if torch.cuda.is_available():
            self.device = torch.device("cuda", self.config.TORCH_GPU_ID)
            torch.cuda.set_device(self.device)
        else:
            self.device = torch.device("cpu")

        if rank0_only() and not os.path.isdir(self.config.CHECKPOINT_FOLDER):
            os.makedirs(self.config.CHECKPOINT_FOLDER)

        self._setup_actor_critic_agent(ppo_cfg)
        if self._is_distributed:
            self.agent.init_distributed(find_unused_params=True)

        logger.info(
            "agent number of parameters: {}".format(
                sum(param.numel() for param in self.agent.parameters())
            )
        )

        obs_space = self.obs_space
        if self._static_encoder:
            self._encoder = self.actor_critic.net.visual_encoder
            obs_space = spaces.Dict(
                {
                    "visual_features": spaces.Box(
                        low=np.finfo(np.float32).min,
                        high=np.finfo(np.float32).max,
                        shape=self._encoder.output_shape,
                        dtype=np.float32,
                    ),
                    **obs_space.spaces,
                }
            )

        self._nbuffers = 2 if ppo_cfg.use_double_buffered_sampler else 1
        self.rollouts = RolloutStorage(
            ppo_cfg.num_steps,
            self.envs.num_envs,
            obs_space,
            self.envs.action_spaces[0],
            ppo_cfg.hidden_size,
            num_recurrent_layers=self.actor_critic.net.num_recurrent_layers,
            is_double_buffered=ppo_cfg.use_double_buffered_sampler,
        )
        self.rollouts.to(self.device)

        observations = self.envs.reset()
        batch = batch_obs(
            observations, device=self.device, cache=self._obs_batching_cache
        )
        batch = apply_obs_transforms_batch(batch, self.obs_transforms)

        if self._static_encoder:
            with torch.no_grad():
                batch["visual_features"] = self._encoder(batch)

        self.rollouts.buffers["observations"][0] = batch

        self.current_episode_reward = torch.zeros(self.envs.num_envs, 1)
        self.running_episode_stats = dict(
            count=torch.zeros(self.envs.num_envs, 1),
            reward=torch.zeros(self.envs.num_envs, 1),
        )
        self.window_episode_stats = defaultdict(
            lambda: deque(maxlen=ppo_cfg.reward_window_size)
        )

        self.env_time = 0.0
        self.pth_time = 0.0
        self.t_start = time.time()
示例#8
0
    def __init__(
        self,
        wad_name='semantic_goal_map_dev.wad',
        input_types=(RGB, TIME, AUTOMAP),
        episode_start_time=14,
        episode_timeout=1000,
        max_actions=None,
        set_window_visible=False,
        interactive=False,
        randomize_textures=1,
        randomize_maps=None,
        repeat_count=3,
        n_actions=DEFAULT_N_ACTIONS,
        set_automap_buffer_enabled=True,
        set_render_crosshair=False,
        set_render_corpses=False,
        set_render_decals=False,
        set_render_effects_sprites=False,
        set_render_hud=False,
        set_render_messages=False,
        set_render_minimal_hud=False,
        set_render_particles=False,
        set_render_weapon=True,
        set_screen_resolution=ScreenResolution.RES_320X240,
        set_sound_enabled=False,
        max_onscreen_objects=30,
    ):
        '''
            Args:
                wad_name: .wad file to use
                sensor_type: subset of ['RGB', 'D', 'L'] 
                    RGB: RGB image as uint8 
                    D: Depth as uint8
                    L: Labels (semantic, pixelwise)
                randomize_textures: wad must support randomizing textures
                randomize_maps: (map_number_low, map_number_high)
        '''
        game = DoomGame()
        scenarios_dir = os.path.join(os.path.dirname(__file__), 'scenarios')
        game_path = os.path.join(os.path.dirname(__file__), 'freedoom2.wad')
        assert os.path.isfile(game_path)
        game.set_doom_scenario_path(os.path.join(scenarios_dir, wad_name))

        # if map_cfg is None:
        #     raise ValueError("map_cfg cannot be none--needed to normalize agent coords.")

        # game.set_doom_game_path(game_path)
        game.set_doom_map("map01")

        # Get observation spaces
        _obs_space = {}
        if set_screen_resolution not in VIABLE_SCREEN_RESOLUTIONS:
            raise NotImplementedError(
                'Some screen resolutions do not work with gym.Monitor. Screen resolution must be one of {}'
                .format(VIABLE_SCREEN_RESOLUTIONS))
        else:
            game.set_screen_resolution(set_screen_resolution)
        game.set_screen_format(ScreenFormat.RGB24)
        game.set_render_hud(set_render_hud)
        game.set_render_minimal_hud(
            set_render_minimal_hud)  # If hud is enabled
        game.set_render_crosshair(set_render_crosshair)
        game.set_render_weapon(set_render_weapon)
        game.set_render_decals(set_render_decals)
        game.set_render_particles(set_render_particles)
        game.set_render_effects_sprites(set_render_effects_sprites)
        game.set_render_messages(set_render_messages)
        game.set_render_corpses(set_render_corpses)
        game.add_available_game_variable(GameVariable.AMMO2)
        game.set_episode_timeout(episode_timeout)
        game.set_episode_start_time(episode_start_time)
        game.set_window_visible(set_window_visible)
        game.set_sound_enabled(set_sound_enabled)
        if interactive:
            game.set_mode(Mode.SPECTATOR)
        else:
            game.set_mode(Mode.PLAYER)

        assert n_actions == 3, "Number of actions must be 3"
        game.add_available_button(Button.TURN_LEFT)
        game.add_available_button(Button.TURN_RIGHT)
        game.add_available_button(Button.MOVE_FORWARD)

        #game.add_game_args("-host 1 -deathmatch +sv_forcerespawn 1 +sv_noautoaim 1\
        #+sv_respawnprotect 1 +sv_cheats 1")  #  +sv_spawnfarthest
        game.add_game_args("+sv_cheats 1")  #  +sv_spawnfarthest 1

        if RGB in input_types:
            _obs_space[INPUT_TYPE_TO_SENSOR_NAME[RGB]] = spaces.Box(
                0,
                255, (game.get_screen_height(), game.get_screen_width(),
                      game.get_screen_channels()),
                dtype=np.uint8)

        game.set_depth_buffer_enabled(DEPTH in input_types)
        if DEPTH in input_types:
            _obs_space[INPUT_TYPE_TO_SENSOR_NAME[DEPTH]] = spaces.Box(
                0,
                255, (game.get_screen_height(), game.get_screen_width(), 1),
                dtype=np.uint8)

        game.set_labels_buffer_enabled(LABELS in input_types)
        if LABELS in input_types:
            _obs_space[INPUT_TYPE_TO_SENSOR_NAME[LABELS]] = spaces.Box(
                0,
                255, (game.get_screen_height(), game.get_screen_width(), 1),
                dtype=np.uint8)
            _obs_space[INPUT_TYPE_TO_SENSOR_NAME[OBJECT_ID_NUM]] = spaces.Box(
                0, 255, (max_onscreen_objects, ), dtype=np.uint8)
            _obs_space[
                INPUT_TYPE_TO_SENSOR_NAME[OBJECT_LOC_BBOX]] = spaces.Box(
                    0, 255, (max_onscreen_objects, 4), dtype=np.uint8)
        if TIME in input_types:
            _obs_space[INPUT_TYPE_TO_SENSOR_NAME[TIME]] = spaces.Box(
                0, episode_timeout, (1, ), dtype=np.float32)

        if GOAL in input_types:
            raise NotImplementedError(
                "GOAL observation not defined in base environment")

        game.set_automap_buffer_enabled(AUTOMAP in input_types)
        #set_automap_buffer_enabled)
        if AUTOMAP in input_types:
            game.set_automap_mode(AutomapMode.OBJECTS_WITH_SIZE)
            game.add_game_args("+viz_am_center 1")  # Fixed map
            _obs_space[INPUT_TYPE_TO_SENSOR_NAME[AUTOMAP]] = spaces.Box(
                0,
                255, (game.get_screen_height(), game.get_screen_width(),
                      game.get_screen_channels()),
                dtype=np.uint8)
        self.observation_space = spaces.Dict(_obs_space)
        self.action_space = spaces.Discrete(n_actions)
        self.game = game
        self.max_actions = max_actions
        self.max_onscreen_objects = max_onscreen_objects
        self.episode_number = 0
        self.randomize_maps = randomize_maps
        self.randomize_textures = randomize_textures
        self.visualize = set_window_visible
        self.interactive = interactive
        self.interactive_delay = 0.02
        self.repeat_count = repeat_count
        self.info = {'skip.repeat_count': self.repeat_count}
        self.screen_resolution = set_screen_resolution
        # self.viewer = None

        self.game.init()
        super().__init__()
示例#9
0
 def dict2dict_space(d):
     if isinstance(d, dict):
         return spaces.Dict({k: dict2dict_space(v) for k, v in d.items()})
     else:
         return d
示例#10
0
tf = try_import_tf()
_, nn = try_import_torch()

DICT_SPACE = spaces.Dict({
    "sensors":
    spaces.Dict({
        "position":
        spaces.Box(low=-100, high=100, shape=(3, )),
        "velocity":
        spaces.Box(low=-1, high=1, shape=(3, )),
        "front_cam":
        spaces.Tuple((spaces.Box(low=0, high=1, shape=(10, 10, 3)),
                      spaces.Box(low=0, high=1, shape=(10, 10, 3)))),
        "rear_cam":
        spaces.Box(low=0, high=1, shape=(10, 10, 3)),
    }),
    "inner_state":
    spaces.Dict({
        "charge":
        spaces.Discrete(100),
        "job_status":
        spaces.Dict({
            "task": spaces.Discrete(5),
            "progress": spaces.Box(low=0, high=100, shape=()),
        })
    })
})

DICT_SAMPLES = [DICT_SPACE.sample() for _ in range(10)]

TUPLE_SPACE = spaces.Tuple([
示例#11
0
    def __init__(self, config_file, **kwargs):
        gym.Env.__init__(self)
        Receptor.__init__(self)

        self.config = config = Configuration.from_file(config_file)

        # Add any optional args
        for k, v in kwargs.items():
            config.set(k, v)

        self.name = config.name
        self._max_episode_steps = config.get('max_episode_steps') if 'max_episode_steps' in config else None
        self.hot_start = config.get('hot_start', 1)

        if config.get('render', False):
            distance = config.get('camera_distance', 2.0)
            yaw = config.get('camera_yaw', 180)
            pitch = config.get('camera_pitch', -41)
            target_position = config.get('camera_target_position', [0.0, 0.20, 0.50])
            cId = p.connect(p.GUI)#, options="--opengl2")
            # cId = p.connect(p.GUI, "window_backend=2") # HACK
            p.resetDebugVisualizerCamera(distance, yaw, pitch, target_position)
            p.configureDebugVisualizer(p.COV_ENABLE_SEGMENTATION_MARK_PREVIEW, 0)
        else:
            cId = p.connect(p.SHARED_MEMORY)
            if cId < 0: p.connect(p.DIRECT)
            # p.loadPlugin("eglRendererPlugin") # HACK

        p.setAdditionalSearchPath(pybullet_data.getDataPath())

        p.resetSimulation()

        timestep = config.get('timestep', 1 / 240.)
        sub_steps = int(1. / config.get('update_freq', 100) / timestep)
        iterations = config.get('solver_iterations', 150)
        p.setPhysicsEngineParameter(numSolverIterations=iterations, numSubSteps=sub_steps, fixedTimeStep=timestep)

        gravity = config.get('gravity', [0.0, 0.0, -9.81])
        p.setGravity(gravity[0], gravity[1], gravity[2])

        self.models = OrderedDict(
            sorted({child.name: Model(child)
                    for child in config.find_all('model')}.items(), key=lambda t: t[0]))
        self.addons = OrderedDict(
            sorted(
                {child.name: AddonFactory.build(child.get('addon'), self, child)
                 for child in config.find_all('addon')}.items(),
                key=lambda t: t[0]))
        self.receptors = OrderedDict(sorted({**self.models, self.name: self}.items(), key=lambda t: t[0]))

        self.collapse_rewards_func = sum if config.get('sum_rewards', False) else None
        self.collapse_terminals_func = any if config.get(
            'terminal_if_any', False) else all if config.get('terminal_if_all', False) else None
        self.flatten_observations = config.get('flatten_observations', False)
        self.flatten_actions=config.get('flatten_actions', False)
        self.action_repeat=config.get('action_repeat', 1)

        self.seed()
        self.reset()

        self.observation_space, self.action_space = spaces.Dict({}), spaces.Dict({})

        for name, receptor in self.receptors.items():
            obs_space, act_space = receptor.build_spaces()

            if len(obs_space.spaces):
                self.observation_space.spaces[name] = obs_space

            if len(act_space.spaces):
                self.action_space.spaces[name] = act_space

        if self.flatten_observations:
            lows, highs = [flatten(get_bounds_for_space(self.observation_space, opt)) for opt in [True, False]]
            self.original_observation_space = self.observation_space
            self.observation_space = gym.spaces.Box(low=lows, high=highs)

        if self.flatten_actions:
            lows, highs = (flatten(get_bounds_for_space(self.action_space, opt)) for opt in [True, False])
            self.original_action_space = self.action_space
            self.action_space = gym.spaces.Box(low=lows, high=highs)
示例#12
0
    def __init__(
        self, env, pixels_only=True, render_kwargs=None, pixel_keys=("pixels",)
    ):
        """Initializes a new pixel Wrapper.

        Args:
            env: The environment to wrap.
            pixels_only: If `True` (default), the original observation returned
                by the wrapped environment will be discarded, and a dictionary
                observation will only include pixels. If `False`, the
                observation dictionary will contain both the original
                observations and the pixel observations.
            render_kwargs: Optional `dict` containing keyword arguments passed
                to the `self.render` method.
            pixel_keys: Optional custom string specifying the pixel
                observation's key in the `OrderedDict` of observations.
                Defaults to 'pixels'.

        Raises:
            ValueError: If `env`'s observation spec is not compatible with the
                wrapper. Supported formats are a single array, or a dict of
                arrays.
            ValueError: If `env`'s observation already contains any of the
                specified `pixel_keys`.
        """

        super(PixelObservationWrapper, self).__init__(env)

        if render_kwargs is None:
            render_kwargs = {}

        for key in pixel_keys:
            render_kwargs.setdefault(key, {})

            render_mode = render_kwargs[key].pop("mode", "rgb_array")
            assert render_mode == "rgb_array", render_mode
            render_kwargs[key]["mode"] = "rgb_array"

        wrapped_observation_space = env.observation_space

        if isinstance(wrapped_observation_space, spaces.Box):
            self._observation_is_dict = False
            invalid_keys = set([STATE_KEY])
        elif isinstance(wrapped_observation_space, (spaces.Dict, MutableMapping)):
            self._observation_is_dict = True
            invalid_keys = set(wrapped_observation_space.spaces.keys())
        else:
            raise ValueError("Unsupported observation space structure.")

        if not pixels_only:
            # Make sure that now keys in the `pixel_keys` overlap with
            # `observation_keys`
            overlapping_keys = set(pixel_keys) & set(invalid_keys)
            if overlapping_keys:
                raise ValueError(
                    "Duplicate or reserved pixel keys {!r}.".format(overlapping_keys)
                )

        if pixels_only:
            self.observation_space = spaces.Dict()
        elif self._observation_is_dict:
            self.observation_space = copy.deepcopy(wrapped_observation_space)
        else:
            self.observation_space = spaces.Dict()
            self.observation_space.spaces[STATE_KEY] = wrapped_observation_space

        # Extend observation space with pixels.

        pixels_spaces = {}
        for pixel_key in pixel_keys:
            pixels = self.env.render(**render_kwargs[pixel_key])

            if np.issubdtype(pixels.dtype, np.integer):
                low, high = (0, 255)
            elif np.issubdtype(pixels.dtype, np.float):
                low, high = (-float("inf"), float("inf"))
            else:
                raise TypeError(pixels.dtype)

            pixels_space = spaces.Box(
                shape=pixels.shape, low=low, high=high, dtype=pixels.dtype
            )
            pixels_spaces[pixel_key] = pixels_space

        self.observation_space.spaces.update(pixels_spaces)

        self._env = env
        self._pixels_only = pixels_only
        self._render_kwargs = render_kwargs
        self._pixel_keys = pixel_keys
示例#13
0
    def __init__(self,
                 task_class,
                 observation_mode='state',
                 action_mode='joint',
                 multi_action_space=False,
                 discrete_action_space=False):
        self.num_skip_control = 20
        self.epi_obs = []
        self.obs_record = True
        self.obs_record_id = None

        self._observation_mode = observation_mode
        self.obs_config = ObservationConfig()
        if observation_mode == 'state':
            self.obs_config.set_all_high_dim(False)
            self.obs_config.set_all_low_dim(True)
        elif observation_mode == 'state-goal':
            self.obs_config.set_all_high_dim(False)
            self.obs_config.set_all_low_dim(True)
            self.obs_config.set_goal_info(True)
        elif observation_mode == 'vision':
            self.obs_config.set_all(False)
            self.obs_config.set_camera_rgb(True)
        elif observation_mode == 'both':
            self.obs_config.set_all(True)
        else:
            raise ValueError('Unrecognised observation_mode: %s.' %
                             observation_mode)

        self._action_mode = action_mode
        self.ac_config = None
        if action_mode == 'joint':
            self.ac_config = ActionConfig(
                SnakeRobotActionConfig.ABS_JOINT_POSITION)
        elif action_mode == 'trigon':
            self.ac_config = ActionConfig(
                SnakeRobotActionConfig.TRIGON_MODEL_PARAM,
                is_discrete=discrete_action_space)
        else:
            raise ValueError('Unrecognised action_mode: %s.' % action_mode)

        self.env = Environment(action_config=self.ac_config,
                               obs_config=self.obs_config,
                               headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)
        self.max_episode_steps = self.task.episode_len
        _, obs = self.task.reset()

        if action_mode == 'joint':
            self.action_space = spaces.Box(
                low=-1.7,
                high=1.7,
                shape=(self.ac_config.action_size, ),
                dtype=np.float32)
        elif action_mode == 'trigon':
            if multi_action_space:
                # action_space1 = spaces.MultiBinary(n=1)
                low1 = np.array([-0.8, -0.8])
                high1 = np.array([0.8, 0.8])
                action_space1 = spaces.Box(low=low1,
                                           high=high1,
                                           dtype=np.float32)
                # low = np.array([-0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1])
                # high = np.array([0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1])
                low2 = np.array([1.0])
                high2 = np.array([3.0])
                action_space2 = spaces.Box(low=low2,
                                           high=high2,
                                           dtype=np.float32)
                self.action_space = spaces.Tuple(
                    (action_space1, action_space2))
            elif discrete_action_space:
                self.action_space = spaces.MultiDiscrete([4, 4, 4])
            else:
                # low = np.array([0.0, -0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1])
                # high = np.array([1.0, 0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1])
                low = np.array([-1, -1, -1])
                high = np.array([1, 1, 1])
                self.action_space = spaces.Box(low=low,
                                               high=high,
                                               dtype=np.float32)

        if observation_mode == 'state' or observation_mode == 'state-goal':
            self.observation_space = spaces.Box(
                low=-np.inf,
                high=np.inf,
                shape=(obs.get_low_dim_data().shape[0] *
                       self.num_skip_control, ))
            if observation_mode == 'state-goal':
                self.goal_dim = obs.get_goal_dim()
        elif observation_mode == 'vision':
            self.observation_space = spaces.Box(
                low=0, high=1, shape=obs.head_camera_rgb.shape)
        elif observation_mode == 'both':
            self.observation_space = spaces.Dict({
                "state":
                spaces.Box(low=-np.inf,
                           high=np.inf,
                           shape=obs.get_low_dim_data().shape),
                "rattler_eye_rgb":
                spaces.Box(low=0, high=1, shape=obs.head_camera_rgb.shape),
                "rattler_eye_depth":
                spaces.Box(low=0, high=1, shape=obs.head_camera_depth.shape),
            })

        self._gym_cam = None
    def __init__(self, map_size_vertical, map_size_horizontal, no_goals,
                 view_straightforward, view_left_right, start_position_x,
                 start_position_y, start_orientation, reward_range,
                 max_no_steps):

        # set some parameters
        self.map_size_vertical = map_size_vertical
        self.map_size_horizontal = map_size_horizontal
        self.no_goals = no_goals
        self.view_straightforward = view_straightforward
        self.view_left_right = view_left_right
        self.reward_range = reward_range
        self.max_no_steps = max_no_steps
        self.living_reward = 0.5 / self.max_no_steps

        # determine goal rewards
        if self.no_goals == 2:
            self.goal_rewards = [0.25, 0.75]
        elif self.no_goals == 3:
            self.goal_rewards = [0.125, 0.125, 0.75]
        elif self.no_goals == 4:
            self.goal_rewards = [0.1, 0.1, 0.1, 0.7]

        # build map with dummy surrounding
        self.map = np.zeros(
            (self.no_goals + 1, map_size_vertical + 2 *
             (self.view_straightforward - 1), map_size_horizontal + 2 *
             (self.view_straightforward - 1)),
            dtype=np.float32)
        self.map[0, :, :] = 1
        self.map[0, (self.view_straightforward -
                     1):(1 - self.view_straightforward),
                 (self.view_straightforward -
                  1):(1 - self.view_straightforward)] = 0

        # shift positions due to dummy surrounding
        self.start_position_x = self.position_x = start_position_x + self.view_straightforward - 1
        self.start_position_y = self.position_y = start_position_y + self.view_straightforward - 1
        self.start_orientation = self.orientation = start_orientation  # down=0, right=1, up=2, left=3

        # determine random goal positions and place corresponding indicators in the map
        self.start_position_index = (start_position_y *
                                     map_size_horizontal) + start_position_x
        self.max_goal_index = (map_size_horizontal * map_size_vertical) - 1

        goal1_index = np.random.random_integers(0, self.max_goal_index)
        while goal1_index == self.start_position_index:
            goal1_index = np.random.random_integers(0, self.max_goal_index)
        self.goal1_position_x = (goal1_index % self.map_size_horizontal
                                 ) + self.view_straightforward - 1
        self.goal1_position_y = (goal1_index // self.map_size_vertical
                                 ) + self.view_straightforward - 1
        self.map[1, self.goal1_position_y, self.goal1_position_x] = 1.

        goal2_index = np.random.random_integers(0, self.max_goal_index)
        while goal2_index == goal1_index:
            goal2_index = np.random.random_integers(0, self.max_goal_index)
        self.goal2_position_x = (goal2_index % self.map_size_horizontal
                                 ) + self.view_straightforward - 1
        self.goal2_position_y = (goal2_index // self.map_size_vertical
                                 ) + self.view_straightforward - 1
        self.map[2, self.goal2_position_y, self.goal2_position_x] = 1.

        goal3_index = np.random.random_integers(0, self.max_goal_index)
        while goal3_index == goal2_index:
            goal3_index = np.random.random_integers(0, self.max_goal_index)
        self.goal3_position_x = (goal3_index % self.map_size_horizontal
                                 ) + self.view_straightforward - 1
        self.goal3_position_y = (goal3_index // self.map_size_vertical
                                 ) + self.view_straightforward - 1
        if self.no_goals > 2:
            self.map[3, self.goal3_position_y, self.goal3_position_x] = 1.

        goal4_index = np.random.random_integers(0, self.max_goal_index)
        while goal4_index == goal3_index:
            goal4_index = np.random.random_integers(0, self.max_goal_index)
        self.goal4_position_x = (goal4_index % self.map_size_horizontal
                                 ) + self.view_straightforward - 1
        self.goal4_position_y = (goal4_index // self.map_size_vertical
                                 ) + self.view_straightforward - 1
        if self.no_goals > 3:
            self.map[4, self.goal4_position_y, self.goal4_position_x] = 1.

        # set observation and action space
        observation_shape = (self.map.shape[0], self.view_straightforward,
                             2 * self.view_left_right + 1)
        self.observation_space = spaces.Dict({
            "position":
            spaces.MultiDiscrete([self.map.shape[2], self.map.shape[1], 4]),
            "observation":
            spaces.Box(low=0.,
                       high=1.,
                       shape=observation_shape,
                       dtype=np.float32)
        })
        self.action_space = spaces.Discrete(3)

        self.last_done = False
        self.last_obs = np.zeros(observation_shape)

        # init internal state and step counter
        self.internal_state = 'Empty'
        self.step_counter = 0.
示例#15
0
    def __init__(self,
                 config: Config,
                 dataset: Optional[Dataset] = None) -> None:
        """Constructor. Mimics the :ref:`Env` constructor.

        Args:
            :param config: config for the environment. Should contain id for
            simulator and ``task_name`` for each task, which are passed into ``make_sim`` and
            ``make_task``.
            :param dataset: reference to dataset used for first task instance level
            information. Can be defined as :py:`None` in which case
            dataset will be built using ``make_dataset`` and ``config``.
        """
        # let superclass instantiate current task by merging first task in TASKS to TASK
        if len(config.MULTI_TASK.TASKS):
            logger.info(
                "Overwriting config.TASK ({}) with first entry in config.MULTI_TASK.TASKS ({})."
                .format(config.TASK.TYPE, config.MULTI_TASK.TASKS[0].TYPE))
            config.defrost()
            config.TASK.merge_from_other_cfg(config.MULTI_TASK.TASKS[0])
            config.freeze()
            # TASKS[0] dataset has higher priority over default one (if specified), instatiate it before
            if dataset is None and config.MULTI_TASK.TASKS[0].DATASET.TYPE:
                dataset = make_dataset(
                    id_dataset=config.MULTI_TASK.TASKS[0].DATASET.TYPE,
                    config=config.MULTI_TASK.TASKS[0].DATASET,
                )
        # initialize first task leveraging Env
        super().__init__(config, dataset=dataset)
        # instatiate other tasks
        self._tasks = [self._task]
        self._curr_task_idx = 0
        # keep each tasks episode iterator to avoid re-creation
        self._episode_iterators = [self._episode_iterator]
        for task in config.MULTI_TASK.TASKS[1:]:
            self._tasks.append(
                make_task(
                    task.TYPE,
                    config=task,
                    sim=self._sim,
                    # each task gets its dataset
                    dataset=make_dataset(  # TODO: lazy make_dataset support
                        id_dataset=task.DATASET.TYPE,
                        config=task.DATASET),
                ))
            # get task episode iterator
            iter_option_dict = {
                k.lower(): v
                for k, v in task.EPISODE_ITERATOR_OPTIONS.items()
            }
            iter_option_dict["seed"] = self._config.SEED
            task_ep_iterator: EpisodeIterator = self._tasks[
                -1]._dataset.get_episode_iterator(**iter_option_dict)
            self._episode_iterators.append(task_ep_iterator)
        # episode counter
        self._eps_counter = -1
        self._cumulative_steps_counter = 0
        # when and how to change task is defined here
        self._task_sampling_behavior = (
            config.MULTI_TASK.TASK_ITERATOR.TASK_SAMPLING)
        self._change_task_behavior = (
            config.MULTI_TASK.TASK_ITERATOR.TASK_CHANGE_TIMESTEP)
        # custom task label can be specified
        self._curr_task_label = self._task._config.get("TASK_LABEL",
                                                       self._curr_task_idx)
        # add task_idx to observation space
        self.observation_space = spaces.Dict({
            **self._sim.sensor_suite.observation_spaces.spaces,
            **self._task.sensor_suite.observation_spaces.spaces,
            "task_idx":
            spaces.Discrete(len(self._tasks)),
        })
    def __init__(self):
        """
        Initialise the environment
        """

        self.random_goal = True
        self.goal_oriented = False
        self.obs_type = 5
        self.reward_type = 1
        self.action_type = 1

        # Define action space
        self.action_space = spaces.Box(
            low=np.float32(
                np.array([-0.5, -0.25, -0.25, -0.25, -0.5, -0.005]) / 30),
            high=np.float32(
                np.array([0.5, 0.25, 0.25, 0.25, 0.5, 0.005]) / 30),
            dtype=np.float32)

        # Define observation space
        if self.obs_type == 1:
            self.obs_space_low = np.float32(
                np.concatenate((MIN_END_EFF_COORDS, JOINT_MIN), axis=0))
            self.obs_space_high = np.float32(
                np.concatenate((MAX_END_EFF_COORDS, JOINT_MAX), axis=0))

        elif self.obs_type == 2:
            self.obs_space_low = np.float32(
                np.concatenate((MIN_GOAL_COORDS, JOINT_MIN), axis=0))
            self.obs_space_high = np.float32(
                np.concatenate((MAX_GOAL_COORDS, JOINT_MAX), axis=0))

        elif self.obs_type == 3:
            self.obs_space_low = np.float32(
                np.concatenate(([-1.0] * 6, JOINT_MIN), axis=0))
            self.obs_space_high = np.float32(
                np.concatenate(([1.0] * 6, JOINT_MAX), axis=0))

        elif self.obs_type == 4:
            self.obs_space_low = np.float32(
                np.concatenate(([-1.0] * 3, JOINT_MIN), axis=0))
            self.obs_space_high = np.float32(
                np.concatenate(([1.0] * 3, JOINT_MAX), axis=0))

        elif self.obs_type == 5:
            self.obs_space_low = np.float32(
                np.concatenate(([-1.0] * 6, MIN_GOAL_COORDS, JOINT_MIN),
                               axis=0))
            self.obs_space_high = np.float32(
                np.concatenate(([1.0] * 6, MAX_GOAL_COORDS, JOINT_MAX),
                               axis=0))

        self.observation_space = spaces.Box(low=self.obs_space_low,
                                            high=self.obs_space_high,
                                            dtype=np.float32)

        if self.goal_oriented:
            self.observation_space = spaces.Dict(
                dict(desired_goal=spaces.Box(low=np.float32(MIN_GOAL_COORDS),
                                             high=np.float32(MAX_GOAL_COORDS),
                                             dtype=np.float32),
                     achieved_goal=spaces.Box(
                         low=np.float32(MIN_END_EFF_COORDS),
                         high=np.float32(MAX_END_EFF_COORDS),
                         dtype=np.float32),
                     observation=self.observation_space))

        # Initialise goal position
        if self.random_goal:
            self.goal = self.sample_random_goal()
        else:
            self.goal = FIXED_GOAL_COORDS

        # Connect to physics client. By default, do not render
        self.physics_client = p.connect(p.DIRECT)

        # Load URDFs
        self.create_world()

        # reset environment
        self.reset()
示例#17
0
    def __init__(
        self,
        model_path: str,
        frame_skip: int,
        camera_settings: Optional[Dict] = None,
    ):
        """Initializes a new MuJoCo environment.

        Args:
            model_path: The path to the MuJoCo XML file.
            frame_skip: The number of simulation steps per environment step. On
              hardware this influences the duration of each environment step.
            camera_settings: Settings to initialize the simulation camera. This
              can contain the keys `distance`, `azimuth`, and `elevation`.
        """
        self._seed()
        if not os.path.isfile(model_path):
            raise IOError('[MujocoEnv]: Model path does not exist: {}'.format(
                model_path))
        self.frame_skip = frame_skip

        self.sim_robot = MujocoSimRobot(model_path,
                                        camera_settings=camera_settings)
        self.sim = self.sim_robot.sim
        self.model = self.sim_robot.model
        self.data = self.sim_robot.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array', 'depth_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
        assert not done

        bounds = self.model.actuator_ctrlrange.copy()
        act_upper = bounds[:, 1]
        act_lower = bounds[:, 0]

        # Define the action and observation spaces.
        # HACK: MJRL is still using gym 0.9.x so we can't provide a dtype.
        try:
            self.action_space = spaces.Box(act_lower,
                                           act_upper,
                                           dtype=np.float32)
            if isinstance(observation, collections.Mapping):
                self.observation_space = spaces.Dict({
                    k: spaces.Box(-np.inf,
                                  np.inf,
                                  shape=v.shape,
                                  dtype=np.float32)
                    for k, v in observation.items()
                })
            else:
                self.obs_dim = np.sum([
                    o.size for o in observation
                ]) if type(observation) is tuple else observation.size
                self.observation_space = spaces.Box(-np.inf,
                                                    np.inf,
                                                    (observation.shape[0], ),
                                                    dtype=np.float32)

        except TypeError:
            # Fallback case for gym 0.9.x
            self.action_space = spaces.Box(act_lower, act_upper)
            assert not isinstance(
                observation, collections.Mapping
            ), 'gym 0.9.x does not support dictionary observation.'
            self.obs_dim = np.sum([
                o.size for o in observation
            ]) if type(observation) is tuple else observation.size
            self.observation_space = spaces.Box(-np.inf, np.inf,
                                                observation.shape)
示例#18
0
    def __init__(self, env_id,
                 using_gym_wrapper = True,
                 state_list=None,
                 max_steps=50,
                 reward="sparse",
                 using_demo_init=False,
                 demo_path = "demos",
                 render = False):
        """
        :param env_id: env name
        :param using_gym_wrapper: whether using gym wrapper provided by SURREAL
        :param using_demo_init: whether apply initialization of demonstration states
        :param state_list: select some dimension of states for the observation (None means all)
                NOTE: this option is only used when not using gym wrapper (using_gym_wrapper = False)
        """
        self.reward_type = reward

        self.env = suite.make(env_id,
                    has_renderer=render,
                    has_offscreen_renderer=False,
                    use_object_obs=True,
                    use_camera_obs=False,
                    reward_shaping=False if reward == "sparse" else True,
                    control_freq=10,
                    ignore_done=True,
                    )

        if using_demo_init:
            self.env = DemoSamplerWrapper(self.env,
                                          demo_path = os.path.join(demo_path, env_id),
                                          scheme_ratios = [0.5, 0.5])

        self.using_gym_wrapper = using_gym_wrapper
        if using_gym_wrapper:
            self.env = GymWrapper(self.env, keys = state_list
                                                    if state_list is not None else ["robot-state", "object-state"])

        ob, rew, done, _ = self.env.step(np.random.rand(self.env.dof))

        if not using_gym_wrapper:
            # ob = self.env.sim.get_state().flatten()
            self.state_list = state_list
            if self.state_list:
                ob = np.concatenate([ob[i] for i in self.state_list])

        self.max_episode_steps = max_steps
        self.n_steps = 0

        self.n_actions = self.env.dof
        self.d_observations = ob.size
        self.d_goals = self.get_d_goals()

        self.observation_space = spaces.Dict({
            # "observation": self.env.observation_space,
            "observation": spaces.Box(- np.inf * np.ones(self.d_observations), np.inf * np.ones(self.d_observations)),
            "desired_goal": spaces.Box(- np.inf * np.ones(self.d_goals), np.inf * np.ones(self.d_goals)),
            "achieved_goal": spaces.Box(- np.inf * np.ones(self.d_goals), np.inf * np.ones(self.d_goals)),
        })
        self.action_space = self.env.action_space

        self.acc_rew = 0
示例#19
0
from gym import spaces
import numpy as np
import pytest

base_spaces = [
    spaces.Discrete(n=10),
    spaces.Box(0, 1, [3, 32, 32], dtype=np.float32),
    spaces.Tuple([
        spaces.Discrete(n=10),
        spaces.Box(0, 1, [3, 32, 32], dtype=np.float32),
    ]),
    spaces.Dict({
        "x":
        spaces.Tuple([
            spaces.Discrete(n=10),
            spaces.Box(0, 1, [3, 32, 32], dtype=np.float32),
        ]),
        "t":
        spaces.Discrete(1),
    }),
]


def equals(value, expected) -> bool:
    assert type(value) == type(expected)
    if isinstance(value, (int, float, bool)):
        return value == expected
    if isinstance(value, np.ndarray):
        return value.tolist() == expected.tolist()
    if isinstance(value, (tuple, list)):
        assert len(value) == len(expected)
示例#20
0
文件: point2d.py 项目: kylehkhsu/umrl
    def __init__(
            self,
            render_dt_msec=0,
            action_l2norm_penalty=0,  # disabled for now
            render_onscreen=True,
            render_size=84,
            reward_type="dense",
            target_radius=0.5,
            boundary_dist=4,
            ball_radius=0.25,
            walls=None,
            fixed_goal=None,
            randomize_position_on_reset=True,
            images_are_rgb=False,  # else black and white
            show_goal=True,
            action_limit=1,
            initial_position=(0, 0),
            **kwargs
    ):
        if walls is None:
            walls = []
        if len(kwargs) > 0:
            import logging
            LOGGER = logging.getLogger(__name__)
            LOGGER.log(logging.WARNING, "WARNING, ignoring kwargs:", kwargs)
        self.quick_init(locals())
        self.render_dt_msec = render_dt_msec
        self.action_l2norm_penalty = action_l2norm_penalty
        self.render_onscreen = render_onscreen
        self.render_size = render_size
        self.reward_type = reward_type
        self.target_radius = target_radius
        self.boundary_dist = boundary_dist
        self.ball_radius = ball_radius
        self.walls = walls
        self.fixed_goal = fixed_goal
        self.randomize_position_on_reset = randomize_position_on_reset
        self.images_are_rgb = images_are_rgb
        self.show_goal = show_goal
        self.action_limit = action_limit

        self._max_episode_steps = 50
        self.max_target_distance = self.boundary_dist - self.target_radius

        self._target_position = None
        self._position = np.zeros((2))

        u = self.action_limit * np.ones(2)
        self.action_space = spaces.Box(-u, u, dtype=np.float32)

        o = self.boundary_dist * np.ones(2)
        self.obs_range = spaces.Box(-o, o, dtype='float32')
        self.observation_space = spaces.Dict([
            ('observation', self.obs_range),
            ('desired_goal', self.obs_range),
            ('achieved_goal', self.obs_range),
            ('state_observation', self.obs_range),
            ('state_desired_goal', self.obs_range),
            ('state_achieved_goal', self.obs_range),
        ])
        self.observation_space.shape = (2,)   # hack
        self._step = 0
        self.initial_position = initial_position

        self.drawer = None
示例#21
0
    def __init__(self,
                 action_mode="cart",
                 action_buckets=[-1, 0, 1],
                 action_stepsize=1.0,
                 reward_type="sparse"):
        """
        Parameters:
            action_mode {"cart","cartmixed","cartprod","impulse","impulsemixed"}
            action_stepsize: Step size of the action to perform.
                            Int for cart and impulse
                            List for cartmixed and impulsemixed
            action_buckets: List of buckets used when mode is cartprod
            reward_mode = {"sparse","dense"}

        Reward Mode:
            `sparse` rewards are like the standard HPG rewards.
            `dense` rewards (from the paper/gym) give -(distance to goal) at every timestep.

        Modes:
            `cart` is for manhattan style movement where an action moves the arm in one direction
                for every action.

            `impulse` treats the action dimensions as velocity and adds/decreases
                the velocity by action_stepsize depending on the direction picked.
                Adds current direction
                velocity to state


            `impulsemixed` and `cartmixed` does the above with multiple magnitudes of action_stepsize.
                Needs the action_stepsize as a list.

            `cartprod` takes combinations of actions as input
        """

        try:
            self.env = gym.make("FetchReach-v1")
        except Exception as e:
            print(
                "You do not have the latest version of gym (gym-0.10.5). Falling back to v0 with movable table"
            )
            self.env = gym.make("FetchReach-v0")

        self.action_directions = np.array([[1, 0, 0, 0], [0, 1, 0, 0],
                                           [0, 0, 1, 0]])
        self.valid_action_directions = np.float32(
            np.any(self.action_directions, axis=0))

        self.distance_threshold = self.env.distance_threshold
        # self.distance_threshold = 0.06
        self.action_mode = action_mode
        self.num_actions = self.generate_action_map(action_buckets,
                                                    action_stepsize)

        obs_dim = 10 + 4 * (action_mode == "impulse"
                            or action_mode == "impulsemixed")
        self.observation_space = spaces.Dict(
            dict(
                desired_goal=spaces.Box(-np.inf,
                                        np.inf,
                                        shape=(3, ),
                                        dtype='float32'),
                achieved_goal=spaces.Box(-np.inf,
                                         np.inf,
                                         shape=(3, ),
                                         dtype='float32'),
                observation=spaces.Box(-np.inf,
                                       np.inf,
                                       shape=(obs_dim, ),
                                       dtype='float32'),
            ))
        self._max_episode_steps = self.env._max_episode_steps
        self.reward_type = reward_type
from ..LoggedTestCase import LoggedTestCase
from aienvs.Environment import Env
from unittest.mock import Mock
from aiagents.AgentFactory import createAgent, classForName, classForNameTyped
from gym import spaces

compoundclass = 'aiagents.multi.BasicComplexAgent.BasicComplexAgent'
randomclass = 'aiagents.single.RandomAgent.RandomAgent'

action_space = spaces.Dict({'robot1': spaces.Discrete(4)})


class testComplexAgentComponent(LoggedTestCase):
    def test_smoke_no_agents(self):
        params = {}
        with self.assertRaises(Exception) as context:
            createAgent(action_space, None, params)
        self.assertEquals("Parameters must have key 'class' but got {}",
                          str(context.exception))

    def test_smoke_agents_no_dict(self):
        params = {'class': randomclass, 'parameters': {}, 'subAgentList': 3}
        with self.assertRaises(Exception) as context:
            createAgent(action_space, None, params)
        self.assertEquals(
            "the subAgentList parameter must contain a list, but got {'class': 'aiagents.single.RandomAgent.RandomAgent', 'parameters': {}, 'subAgentList': 3}",
            str(context.exception))

    def test_smoke_bad_parameters(self):
        params = {
            'class': randomclass,
    def __init__(self):
        """
        Initialise the environment
        """

        self.random_goal = True
        self.goal_oriented = False
        self.obs_type = 4
        self.reward_type = 1
        self.action_type = 1
        self.joint_limits = "small"
        self.action_coeff = 30

        self.endeffector_pos = None
        self.torso_pos = None
        self.end_torso = None
        self.end_goal = None
        self.joint_positions = None
        self.reward = None
        self.obs = None
        self.action = None
        self.new_joint_positions = None
        self.dist = None
        self.old_dist = None
        self.term1 = None
        self.term2 = None

        if self.joint_limits == "small":
            self.joint_min = np.array([-3.1, -1.6, -1.6, -1.8, -3.1, 0.0])
            self.joint_max = np.array([3.1, 1.6, 1.6, 1.8, 3.1, 0.0])
        elif self.joint_limits == "large":
            self.joint_min = np.array([-3.2, -3.2, -3.2, -3.2, -3.2, -3.2])
            self.joint_max = np.array([3.2, 3.2, 3.2, 3.2, 3.2, 3.2])

        # Define action space
        self.action_space = spaces.Box(
            low=np.float32(
                np.array([-0.5, -0.25, -0.25, -0.25, -0.5, -0.005]) /
                self.action_coeff),
            high=np.float32(
                np.array([0.5, 0.25, 0.25, 0.25, 0.5, 0.005]) /
                self.action_coeff),
            dtype=np.float32)

        # Define observation space
        if self.obs_type == 1:
            self.obs_space_low = np.float32(
                np.concatenate((MIN_END_EFF_COORDS, self.joint_min), axis=0))
            self.obs_space_high = np.float32(
                np.concatenate((MAX_END_EFF_COORDS, self.joint_max), axis=0))

        elif self.obs_type == 2:
            self.obs_space_low = np.float32(
                np.concatenate((MIN_GOAL_COORDS, self.joint_min), axis=0))
            self.obs_space_high = np.float32(
                np.concatenate((MAX_GOAL_COORDS, self.joint_max), axis=0))

        elif self.obs_type == 3:
            self.obs_space_low = np.float32(
                np.concatenate(([-1.0] * 6, self.joint_min), axis=0))
            self.obs_space_high = np.float32(
                np.concatenate(([1.0] * 6, self.joint_max), axis=0))

        elif self.obs_type == 4:
            self.obs_space_low = np.float32(
                np.concatenate(([-1.0] * 3, self.joint_min), axis=0))
            self.obs_space_high = np.float32(
                np.concatenate(([1.0] * 3, self.joint_max), axis=0))

        elif self.obs_type == 5:
            self.obs_space_low = np.float32(
                np.concatenate(([-1.0] * 6, MIN_GOAL_COORDS, self.joint_min),
                               axis=0))
            self.obs_space_high = np.float32(
                np.concatenate(([1.0] * 6, MAX_GOAL_COORDS, self.joint_max),
                               axis=0))

        self.observation_space = spaces.Box(low=self.obs_space_low,
                                            high=self.obs_space_high,
                                            dtype=np.float32)

        if self.goal_oriented:
            self.observation_space = spaces.Dict(
                dict(desired_goal=spaces.Box(low=np.float32(MIN_GOAL_COORDS),
                                             high=np.float32(MAX_GOAL_COORDS),
                                             dtype=np.float32),
                     achieved_goal=spaces.Box(
                         low=np.float32(MIN_END_EFF_COORDS),
                         high=np.float32(MAX_END_EFF_COORDS),
                         dtype=np.float32),
                     observation=self.observation_space))

        # Initialise goal position
        if self.random_goal:
            self.goal = self.sample_random_goal()
        else:
            self.goal = FIXED_GOAL_COORDS

        # Connect to physics client. By default, do not render
        self.physics_client = p.connect(p.DIRECT)

        # Load URDFs
        self.create_world()

        # reset environment
        self.reset()
示例#24
0
            low = self.action_space.low
            high = self.action_space.high

            action = 2 * (action - low) / (high - low) - 1
            action = np.clip(action, low, high)

            return action

    max_steps = 200
    episodes = 50

    env = NormalizedEnv(gym.make("Pendulum-v0"))

    observation_space = spaces.Dict({
        'robot': env.observation_space,
        'task': spaces.Box(-1, 1, shape=(0, )),
        'goal': spaces.Box(-1, 1, shape=(1, ))
    })

    def reward_function(goal, **kwargs):
        reward = goal["achieved"]

        return reward

    agent = AgentSAC(agent_config, observation_space, env.action_space)

    task = np.array([]).reshape((0, ))
    goal = np.zeros(1)

    results_episodes = []
    rewards_episodes = []
示例#25
0
    def create_state_space(self):
        ''' needs:
                self.n_shapes -- self.n_bouncers '''
        if self.moving_walls:
            n_objects = self.n_bouncers + 1  # plus walls
        else:
            n_objects = self.n_bouncers
        state_space = spaces.Dict({
            # a vector containing the index of every shapes body in self.space.bodies; -1 for static
            "shape_body_ids":
            spaces.MultiDiscrete([self.n_shapes] * self.n_shapes),
            # sort of header, the index of every shapes' body in the list
            # a list of ? strings giving the type of each of the bodies in the list (excluding the static body)
            "body_types":
            spaces.MultiDiscrete([len(bc.ObjectTypes)] * (n_objects)),
            "bouncer_sizes":
            spaces.Box(low=-pymunk.inf,
                       high=pymunk.inf,
                       shape=(self.n_bouncers, 2),
                       dtype=np.float32),
            "bouncer_weight":
            spaces.Box(low=-pymunk.inf,
                       high=pymunk.inf,
                       shape=(self.n_bouncers, ),
                       dtype=np.float32),
            "positions":
            spaces.Box(low=-pymunk.inf,
                       high=pymunk.inf,
                       shape=(self.n_shapes, 2),
                       dtype=np.float32),
            "velocities":
            spaces.Box(low=-pymunk.inf,
                       high=pymunk.inf,
                       shape=(self.n_shapes, 2),
                       dtype=np.float32),
            "angles":
            spaces.Box(low=0.,
                       high=2. * np.pi,
                       shape=(self.n_shapes, 1),
                       dtype=np.float32),
            "angular_velocities":
            spaces.Box(low=-pymunk.inf,
                       high=pymunk.inf,
                       shape=(self.n_shapes, 1),
                       dtype=np.float32),
            # 3 coordinates: dx, dy - the distance between the centers of the objects, and the
            #  third coordinate contains the absolute distance ( ==  norm((dx,dy)) - o1.radius - o2.radius)
            "relative_position_matrix":
            spaces.Box(low=-pymunk.inf,
                       high=pymunk.inf,
                       shape=(self.n_shapes, self.n_shapes, 3),
                       dtype=np.float32),
            # estimate; maybe a non-wall collision comes first
            "next_wall_to_hit_id":
            spaces.MultiDiscrete([4] * self.n_bouncers),
            "next_wall_to_hit_where":
            spaces.Box(low=-pymunk.inf,
                       high=pymunk.inf,
                       shape=(self.n_bouncers, 2),
                       dtype=np.float32),
            "next_wall_to_hit_when":
            spaces.Box(low=-pymunk.inf,
                       high=pymunk.inf,
                       shape=(self.n_bouncers, 1),
                       dtype=np.float32)
        })
        # 1's for every pair that just started to collide this round
        # "collision_matrix":         spaces.Discrete(2, shape=(self.n_bouncers + self.n_iwalls, self.n_bouncers + self.n_iwalls)),
        # "observation":              self.observation_space})
        self.state_space = state_space
        self.state = OrderedDict({
            'shape_body_ids': None,
            'body_types': None,
            'bouncer_sizes': None,
            'bouncer_weight': None,
            'positions': None,
            'velocities': None,
            'angles': None,
            'angular_velocities': None,
            'relative_position_matrix': None,
            'next_wall_to_hit_id': None,
            'next_wall_to_hit_where': None,
            'next_wall_to_hit_when': None
        })

        return self.state_space, self.state
示例#26
0
 def response_space(cls):
     return spaces.Dict({
         'reward':
         spaces.Box(low=-10.0, high=5.0, shape=tuple(), dtype=np.float32)
     })
示例#27
0
class DevStrat_4_6(BTgymBaseStrategy):
    """
    Objectives:
        external state data feature search:
            time_embedded three-channeled vector:
                - `Open` channel is one time-step difference of Open price;
                - `High` and `Low` channels are differences
                  between current Open price and current High or Low prices respectively

        internal state data feature search:
            time_embedded concatenated vector of broker and portfolio statistics

        reward shaping search:
           potential-based shaping functions


    Data:
        synthetic/real
    """
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params
    params = dict(
        # Note fake `Width` dimension to use 2d conv etc.:
        state_shape=
            {
                'external': spaces.Box(low=-1, high=1, shape=(time_dim, 1, 3)),
                'internal': spaces.Box(low=-2, high=2, shape=(time_dim, 1, 5)),
                'metadata': spaces.Dict(
                    {
                        'trial_num': spaces.Box(
                            shape=(),
                            low=0,
                            high=10**10
                        ),
                        'sample_num': spaces.Box(
                            shape=(),
                            low=0,
                            high=10**10
                        ),
                        'first_row': spaces.Box(
                            shape=(),
                            low=0,
                            high=10**10
                        )
                    }
                )
            },
        drawdown_call=5,
        target_call=19,
        portfolio_actions=('hold', 'buy', 'sell', 'close'),
        skip_frame=10,
        metadata={}
    )

    def __init__(self, **kwargs):
        """

        Args:
            **kwargs:   see BTgymBaseStrategy args.
        """
        #super(DevStrat001, self)._set_params(self.params)

        super(DevStrat_4_6, self).__init__(**kwargs)

        self.log.debug('DEV_state_shape: {}'.format(self.p.state_shape))
        self.log.debug('DEV_skip_frame: {}'.format(self.p.skip_frame))
        self.log.debug('DEV_portfolio_actions: {}'.format(self.p.portfolio_actions))
        self.log.debug('DEV_drawdown_call: {}'.format(self.p.drawdown_call))
        self.log.debug('DEV_target_call: {}'.format(self.p.target_call))
        self.log.debug('DEV_dataset_stat:\n{}'.format(self.p.dataset_stat))
        self.log.debug('DEV_episode_stat:\n{}'.format(self.p.episode_stat))

        # Define data channels:
        self.channel_O = bt.Sum(self.data.open, - self.data.open(-1))
        self.channel_H = bt.Sum(self.data.high, - self.data.open)
        self.channel_L = bt.Sum(self.data.low,  - self.data.open)

        # Episodic metadata:
        self.state['metadata'] = {
            'trial_num': np.asarray(self.p.metadata['trial_num']),
            'sample_num': np.asarray(self.p.metadata['sample_num']),
            'first_row': np.asarray(self.p.metadata['first_row'])
        }

    def get_state(self):

        T = 2e3  # EURUSD
        # T = 1e2 # EURUSD, Z-norm
        # T = 1 # BTCUSD

        x = np.stack(
            [
                np.frombuffer(self.channel_O.get(size=self.dim_time)),
                np.frombuffer(self.channel_H.get(size=self.dim_time)),
                np.frombuffer(self.channel_L.get(size=self.dim_time)),
            ],
            axis=-1
        )
        # Log-scale: NOT used. Seems to hurt performance.
        # x = log_transform(x)

        # Amplify and squash in [-1,1], seems to be best option as of 4.10.17:
        # T param is supposed to keep most of the signal in 'linear' part of tanh while squashing spikes.
        x_market = tanh(x * T)

        # Update inner state statistic and compose state:
        self.update_sliding_stat()

        x_broker = np.concatenate(
            [
                np.asarray(self.sliding_stat['unrealized_pnl'])[..., None],
                # max_unrealized_pnl[..., None],
                # min_unrealized_pnl[..., None],
                np.asarray(self.sliding_stat['realized_pnl'])[..., None],
                np.asarray(self.sliding_stat['broker_value'])[..., None],
                np.asarray(self.sliding_stat['broker_cash'])[..., None],
                np.asarray(self.sliding_stat['exposure'])[..., None],
                # norm_episode_duration, gamma=5)[...,None],
                # norm_position_duration, gamma=2)[...,None],
            ],
            axis=-1
        )

        self.state['external'] = x_market[:, None, :]
        self.state['internal'] = x_broker[:, None, :]

        return self.state

    def get_reward(self):
        """
        Shapes reward function as normalized single trade realized profit/loss,
        augmented with potential-based reward shaping functions in form of:
        F(s, a, s`) = gamma * FI(s`) - FI(s);

        - potential FI_1 is current normalized unrealized profit/loss;
        - potential FI_2 is current normalized broker value.

        Paper:
            "Policy invariance under reward transformations:
             Theory and application to reward shaping" by A. Ng et al., 1999;
             http://www.robotics.stanford.edu/~ang/papers/shaping-icml99.pdf
        """

        # All sliding statistics for this step are already updated by get_state().
        #
        # TODO: window size for stats averaging? Now it is time_dim - 1, can better be other?
        # TODO: pass actual gamma as strategy param. OR:  maybe: compute reward on algo side?

        # Potential-based shaping function 1:
        # based on potential of averaged profit/loss for current opened trade (unrealized p/l):
        unrealised_pnl = np.asarray(self.sliding_stat['unrealized_pnl'])
        f1 = .99 * np.average(unrealised_pnl[1:]) - np.average(unrealised_pnl[:-1])

        # Potential-based shaping function 2:
        # based on potential of averaged broker value, normalized wrt to max drawdown and target bounds.
        norm_broker_value = np.asarray(self.sliding_stat['broker_value'])
        f2 = .99 * np.average(norm_broker_value[1:]) - np.average(norm_broker_value[:-1])

        # Main reward function: normalized realized profit/loss:
        realized_pnl = np.asarray(self.sliding_stat['realized_pnl'])[-1]

        # Weights are subject to tune:
        self.reward = 1.0 * f1 + 1.0 * f2 + 10.0 * realized_pnl
        # TODO: ------ignore-----:
        # 'Close-at-the-end' shaping term:
        # - 1.0 * self.exp_scale(avg_norm_episode_duration, gamma=6) * abs_max_norm_exposure
        # 'Do-not-expose-for-too-long' shaping term:
        # - 1.0 * self.exp_scale(avg_norm_position_duration, gamma=3)

        self.reward = np.clip(self.reward, -1, 1)

        return self.reward
示例#28
0
    def _change_task(self, is_reset=False):
        """Change current task according to specified loop behaviour.

        Args:
            is_reset (bool, optional):  Whether we're changing task during a ``reset()``.
            When false, we need to handle mid-episode change. Defaults to False.
        """
        prev_task_idx = self._curr_task_idx
        if self._task_sampling_behavior == TI_TASK_SAMPLING.SEQUENTIAL.name:
            self._curr_task_idx = (self._curr_task_idx + 1) % len(self._tasks)
        elif self._task_sampling_behavior == TI_TASK_SAMPLING.RANDOM.name:
            # sample from other tasks with equal probability
            self._curr_task_idx = np.random.choice(
                [
                    i for i in range(len(self._tasks))
                    if i != self._curr_task_idx
                ],
                1,
            ).item()

        self._task = self._tasks[self._curr_task_idx]
        self._episode_iterator = self._episode_iterators[self._curr_task_idx]

        self.action_space = self._task.action_space
        # task observation space may change too
        self.observation_space = spaces.Dict({
            **self._sim.sensor_suite.observation_spaces.spaces,
            **self._task.sensor_suite.observation_spaces.spaces,
            "task_idx":
            spaces.Discrete(len(self._tasks)),
        })
        self._curr_task_label = self._task._config.get("TASK_LABEL",
                                                       self._curr_task_idx)

        # point to new dataset and episodes
        self._dataset = self._task._dataset
        self.number_of_episodes = len(self._dataset.episodes)
        self._episodes = self._dataset.episodes

        logger.info(
            "Current task changed from {} (id: {}) to {} (id: {}).".format(
                self._tasks[prev_task_idx].__class__.__name__,
                prev_task_idx,
                self._task.__class__.__name__,
                self._curr_task_idx,
            ))
        # handle mid-episode change of task
        if not is_reset:
            # keep current position and sim state only if scene does not change
            # if same exact task is passed, you'll get same episode back
            if (self.current_episode.scene_id ==
                    self._episode_iterator.episodes[0].scene_id):
                self._episode_iterator.episodes[
                    0].start_position = self.current_episode.start_position
                self._episode_iterator.episodes[
                    0].start_rotation = self.current_episode.start_rotation
                # self._episode_iterator._iterator = iter(self._episode_iterator.episodes)
                # self.reconfigure(self._config)

                # observations = self.task.reset(episode=self.current_episode)
                # update current episode
                self._current_episode = next(self._episode_iterator)
                # reset prev position
                self._task.measurements.reset_measures(
                    episode=self.current_episode, task=self.task)
            else:
                # scene is different, can't keep old position, end episode
                self._episode_over = True
示例#29
0
    def __init__(self,
                 display=False,
                 steps_to_roll=10,
                 episode_len=600,
                 pause_frac=0.66,
                 n_cells=1000):

        self.display = display
        if self.display:
            p.connect(p.GUI)
        else:
            p.connect(p.DIRECT)

        # set up the world, endEffector is the tip of the left finger
        self.baxterId, self.endEffectorId, self.objectId = setUpWorld()

        # change friction  of object
        p.changeDynamics(self.objectId, -1, lateralFriction=1)

        # save the state of the environment for future reset (save time on URDF loading)
        self.savestate = p.saveState()

        # get joint ranges
        self.lowerLimits, self.upperLimits, self.jointRanges, self.restPoses = getJointRanges(
            self.baxterId, includeFixed=False)
        # deal with display
        if self.display:
            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, 1)
            p.resetDebugVisualizerCamera(2., 180, 0., [0.52, 0.2, np.pi / 4.])
            p.getCameraImage(320, 200, renderer=p.ER_BULLET_HARDWARE_OPENGL)
            sleep(1.)

        # number of pybullet steps to roll at each gym step
        self.steps_to_roll = steps_to_roll

        # number of gym steps to roll before the end of the movement
        self.move_len = int(episode_len * pause_frac)

        # number of pybullet steps to roll after the movement keeping the same action
        self.pause_steps = (episode_len - self.episode_len) * steps_to_roll

        # counts the number of gym steps
        self.step_counter = 0

        # variable to store grasping orientation
        self.diff_or_at_grab = None

        # create the discretisation for the goal spaces (quaternion spaces)
        bounds = [[-1, 1], [-1, 1], [-1, 1], [-1, 1]]
        self.cvt = utils.CVT(n_cells, bounds)

        self.grasped = False

        # observation and action space
        self.observation_space = spaces.Dict({
            'observation':
            spaces.Box(-1, 1, shape=(34, )),
            'desired_goal':
            spaces.Discrete(n_cells),
            'achieved_goal':
            spaces.Discrete(n_cells)
        })
        self.action_space = spaces.Box(-1, 1, shape=(8, ))
示例#30
0
 def observation_space(cls):
   return spaces.Dict({'user_id': spaces.Discrete(utils.NUM_USERS)})