示例#1
0
class FooCarEnv(gym.Env):
	_channel = EnvironmentParametersChannel()

	PathSpace = {
		'xyz': 0,
		'xy': 2,
		'yz': 2,
		'xz': 2
	}

	def __init__(self, no_graphics:bool=False, seed:int=1, **config):
		self._config = config
		worker_id = 0
		if 'worker_id' in config:
			worker_id = config['worker_id']
		self._unity_env = UnityEnvironment(
			file_name=UNITY_ENV_EXE_FILE,
			# file_name=None, # Unity Editor Mode (debug)
			no_graphics=no_graphics,
			seed=seed, 
			side_channels=[self._channel],
			worker_id=worker_id
		)
		for key, value in config.items():
			self._channel.set_float_parameter(key, float(value))
		
		self._gym_env = UnityToGymWrapper(self._unity_env)

	def step(self, action):
		obs, reward, done, info = self._gym_env.step(action)
		size = self.observation_size

		return obs[:size], reward, done, info

	def reset(self):
		obs = self._gym_env.reset()
		size = self.observation_size
		return obs[:size]

	def render(self, mode="rgb_array"):
		return self._gym_env.render(mode=mode)
	
	def seed(self, seed=None):
		self._gym_env.seed(seed=seed) # it will throw a warning

	def close(self):
		self._gym_env.close()

	@property
	def metadata(self):
		return self._gym_env.metadata

	@property
	def reward_range(self) -> Tuple[float, float]:
		return self._gym_env.reward_range

	@property
	def action_space(self):
		return self._gym_env.action_space

	@property
	def observation_space(self):
		config = self._config
		space = self.PathSpace

		path_space = config['path_space'] if 'path_space' in config else space['xz']
		r = config['radius_anchor_circle'] if 'radius_anchor_circle' in config else 8.0
		r_e = config['radius_epsilon_ratio'] if 'radius_epsilon_ratio' in config else 0.7
		h = config['max_anchor_height'] if 'max_anchor_height' in config else 1.0

		xyz_mode = (path_space == space['xyz'])
		bound = max(r * (1 + r_e), h if xyz_mode else 0)
		shape = (self.observation_size,)
		
		return gym.spaces.Box(-bound, +bound, dtype=np.float32, shape=shape)
		
	@property
	def observation_size(self):
		# Reference: readonly variable (Unity)FooCar/CarAgent.ObservationSize
		config = self._config
		space = self.PathSpace

		path_space = config['path_space'] if 'path_space' in config else space['xz']
		ticker_end = config['ticker_end'] if 'ticker_end' in config else 5
		ticker_start = config['ticker_start'] if 'ticker_start' in config else -3

		xyz_mode = (path_space == space['xyz'])
		basic_num = 6
		point_dim = 3 if xyz_mode else 2

		return basic_num + 2 * point_dim * (ticker_end - ticker_start + 1)
示例#2
0
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    engine_configuration_channel = EngineConfigurationChannel()
    unity_env = UnityEnvironment(side_channels=[engine_configuration_channel],
                                 file_name=args.env)
    engine_configuration_channel.set_configuration_parameters(
        width=200,
        height=200,
        quality_level=5,
        time_scale=1 if args.show else 20,
        target_frame_rate=-1,
        capture_frame_rate=60)

    env = UnityToGymWrapper(unity_env=unity_env)
    env.seed(args.seed)
    env.action_space.seed(args.seed)
    train_tools.EVAL_SEED = args.seed

    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]
    act_bound = env.action_space.high[0]

    # create nets
    actor_net = DDPGMLPActor(obs_dim=obs_dim,
                             act_dim=act_dim,
                             act_bound=act_bound,
                             hidden_size=[400, 300],
                             hidden_activation=nn.ReLU)

    critic_net1 = MLPQsaNet(obs_dim=obs_dim,