def __init__(self, cash_flow_flag=0, dg_random_seed=1, num_sim=500002, sabr_flag=False, continuous_action_flag=False, ticksize=0.1, multi=1, init_ttm=5, trade_freq=1, num_contract=1, sim_flag=True, mu=0.05, vol=0.2, S=100, K=100, r=0, q=0, beta=1, rho=-0.4, volvol=0.6, ds=0.001, k=0): """ cash_flow_flag: 1 if reward is defined using cash flow, 0 if profit and loss; dg_random_seed: random seed for simulation; num_sim: number of paths to simulate; sabr_flag: whether use sabr model; continuous_action_flag: continuous or discrete action space; init_ttm: initial time to maturity in unit of day; trade_freq: trading frequency in unit of day; num_contract: number of call option to short; sim_flag = simulation or market data; Assume the trading cost include spread cost and price impact cost, cost(n)= multi*ticksize*(|n|+0.01*n^2) Reward is about ((change in value) - k/2 * (change in value)^2 ), where k is risk attitude, k=0 if risk neutral """ # observation if sim_flag: # generate data now if sabr_flag: self.path, self.option_price_path, self.delta_path, self.bartlett_delta_path = get_sim_path_sabr( M=init_ttm, freq=trade_freq, np_seed=dg_random_seed, num_sim=num_sim, mu=0.05, vol=0.2, S=100, K=100, r=0, q=0, beta=1, rho=-0.4, volvol=0.6, ds=0.001) else: self.path, self.option_price_path, self.delta_path = get_sim_path( M=init_ttm, freq=trade_freq, np_seed=dg_random_seed, num_sim=num_sim, mu=0.05, vol=0.2, S=100, K=100, r=0, q=0) else: # use actual data ---> to be continued return # other attributes self.num_path = self.path.shape[0] # set num_period: initial time to maturity * daily trading freq + 1 (see get_sim_path() in simulation.py): (s0,s1...sT) -->T+1 self.num_period = self.path.shape[1] # print("***", self.num_period) # time to maturity array self.ttm_array = np.arange(init_ttm, -trade_freq, -trade_freq) # print(self.ttm_array) # cost part self.ticksize = ticksize self.multi = multi # change to correlate with the time of the day--6 trading hours # risk attitude self.k = k # step function initialization depending on cash_flow_flag if cash_flow_flag == 1: self.step = self.step_cash_flow # see step_cash_flow() definition below. Internal reference use self. else: self.step = self.step_profit_loss self.num_contract = num_contract self.strike_price = 100 # track the index of simulated path in use self.sim_episode = -1 # track time step within an episode (it's step) self.t = None # action space for holding # With L contracts, each for 100 shares, one would not want to trade more than 100·L shares #action space justify? if continuous_action_flag: self.action_space = spaces.Box(low=np.array([0]), high=np.array([num_contract * 100]), dtype=np.float32) else: self.num_action = num_contract * 100 + 1 self.action_space = spaces.Discrete( self.num_action) #number from 0 to self.num_action-1 # state element, assumed to be 3: current price, current holding, ttm self.num_state = 3 self.state = [] # initialize current state # seed and start self.seed() # call this function when intialize ...
def action_space(self): return spaces.Discrete(5)
def __init__(self, env, lower_level_agent, timesteps_to_give_up_control_for, num_skills): Wrapper.__init__(self, env) self.action_space = spaces.Discrete(num_skills) self.lower_level_agent = lower_level_agent self.timesteps_to_give_up_control_for = timesteps_to_give_up_control_for
def __init__(self): # import image of city and convert values to height # Original image #self.city_array = np.array((Image.open(r"../data/images/RasterAstanaCropped.png")), dtype=np.uint16) # crop the image with center at camera location # self.camera_location = (3073, 11684, 350) # x,y,z coordinate # (11685, 7074, 350) - RasterAstana.png # self.coverage_radius = 2000 # .. km square from the center # self.city_array = self.city_array[self.camera_location[1]-self.coverage_radius:self.camera_location[1]+self.coverage_radius, # self.camera_location[0]-self.coverage_radius:self.camera_location[0]+self.coverage_radius] # Resize 1000 > 250 self.city_array = np.array((Image.open(r"../data/images/RasterAstanaCropped250x250.png")), dtype=np.uint16) self.city_array = self.city_array/100 - 285 # convert to meter # crop the image with center at camera location self.camera_location = (126, 126, 350) # x,y,z coordinate # (11685, 7074, 350) - RasterAstana.png self.coverage_radius = 125 # .. km square from the center or in pixels # Get image params self.im_height, self.im_width = self.city_array.shape # reshape (width, height) [300,500] --> example: height = 500, width = 300 # CAMERA params self.camera_number = 1 self.camera_location_cropped = (int(self.coverage_radius), int(self.coverage_radius), (self.camera_location[2]-313)/4) # 313 or 285s self.observer_height = self.camera_location_cropped[2] + 2 self.max_distance_min_zoom = 100/4 # at min zoom - 20mm - the max distance 50 self.max_distance_max_zoom = 4000/4 # at min zoom - 800mm - the max distance 2000 self.scale = 2 self.horizon_fov_min = 0.5*self.scale # 0.5 # at min zoom - 20mm - the max distance 50 self.horizon_fov_max = 21*self.scale # 21 # at min zoom - 20mm - the max distance 50 self.vertical_fov_min = 0.3*self.scale # 0.3 # 11.8 # Field of View deg self.vertical_fov_max = 11.8*self.scale # 11.8 # 11.8 # Field of View deg # PTZ initalize self.pan_pos = 0 #360*np.random.rand() # 0 np.random.rand() self.tilt_pos = -45 #-25*np.random.rand() # -45 self.zoom_pos = 20 # 20 # 0 - 20mm (min), 1 - 800 mm (max) print('ptz init: ', self.pan_pos, self.tilt_pos, self.zoom_pos) self.delta_pan = 5 # deg self.delta_tilt = 2 # deg self.delta_zoom = 1.25 # 1.25x times self.horizon_fov = self.horizon_fov_max # 21 # Field of View deg self.vertical_fov = self.vertical_fov_max # 11.8 # Field of View deg self.zoom_distance = self.max_distance_min_zoom # GYM env params self.observation_space = spaces.Box(low=0, high=255, shape=(self.im_width,self.im_height, 1), dtype = np.uint8) self.action_space = spaces.Discrete(6) # 6 different actions self.action = 0 # render self.max_render = 100 self.is_render = 'True' self.iteration = 0 self.info = 0 # reward self.ratio_threshhold = 0.02 self.reward_good_step = 1 self.reward_bad_step = -0.05 self.max_iter = 300 self.reward_temp = 0 self.reward_prev = 0.0 # coverage # self.city_coverage = np.asarray(Image.open(r"../data/images/RasterTotalCoverage.png")) self.city_coverage = np.asarray(Image.open(r"../data/images/RasterAstanaCropped250x250CoverageBinary.png")) self.rad_matrix, self.angle_matrix = self.create_cartesian() # inputs self.state_visible_points = np.zeros((self.im_height, self.im_width)) self.state_total_coverage = self.city_coverage #np.zeros((self.im_height, self.im_width)) self.state_gray_coverage = np.zeros((self.im_height, self.im_width))
def __init__(self, config, home_team, away_team, opp_actor=None): self.__version__ = "0.0.1" self.config = config self.config.competition_mode = False self.config.fast_mode = True self.config.time_limits = None self.game = None self.team_id = None self.ruleset = get_rule_set(config.ruleset, all_rules=False) self.home_team = home_team self.away_team = away_team self.actor = Agent("Gym Learner", human=True) self.opp_actor = opp_actor if opp_actor is not None else RandomBot( "Random") self.seed() self.root = None self.cv = None self.last_obs = None self.layers = [ OccupiedLayer(), OwnPlayerLayer(), OppPlayerLayer(), OwnTackleZoneLayer(), OppTackleZoneLayer(), UpLayer(), UsedLayer(), AvailablePlayerLayer(), AvailablePositionLayer(), RollProbabilityLayer(), BlockDiceLayer(), ActivePlayerLayer(), TargetPlayerLayer(), MALayer(), STLayer(), AGLayer(), AVLayer(), MovemenLeftLayer(), BallLayer(), OwnHalfLayer(), OwnTouchdownLayer(), OppTouchdownLayer(), SkillLayer(Skill.BLOCK), SkillLayer(Skill.DODGE), SkillLayer(Skill.SURE_HANDS), SkillLayer(Skill.PASS) ] arena = get_arena(self.config.arena) self.observation_space = spaces.Dict({ 'board': spaces.Box(low=0, high=1, shape=(arena.height, arena.width, len(self.layers))), 'state': spaces.Box(low=0, high=1, shape=(50, )), 'procedure': spaces.Box(low=0, high=1, shape=(len(FFAIEnv.procedures), )), }) self.actions = FFAIEnv.actions self.action_space = spaces.Dict({ 'action-type': spaces.Discrete(len(self.actions)), 'x': spaces.Discrete(arena.width), 'y': spaces.Discrete(arena.height) })
def __init__(self, world, scenario, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None, shared_viewer=True, discrete_action_space=False, discrete_action_input=False): self.world = world self.scenario = scenario self.agents = self.world.policy_agents # set required vectorized gym env property self.n = len(world.policy_agents) # scenario callbacks self.reset_callback = reset_callback self.reward_callback = reward_callback self.observation_callback = observation_callback self.info_callback = info_callback self.done_callback = done_callback # environment parameters # self.discrete_action_space = True self.discrete_action_space = discrete_action_space # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector # self.discrete_action_input = False self.discrete_action_input = discrete_action_input # if true, even the action is continuous, action will be performed discretely self.force_discrete_action = world.discrete_action if hasattr( world, 'discrete_action') else False # if true, every agent has the same reward self.shared_reward = world.collaborative if hasattr( world, 'collaborative') else False self.time = 0 # configure spaces self.action_space = [] self.observation_space = [] for agent in self.agents: total_action_space = [] # physical action space if self.discrete_action_space: u_action_space = spaces.Discrete(world.dim_p * 2 + 1) else: u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p, ), dtype=np.float32) if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(world.dim_c) else: c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c, ), dtype=np.float32) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space if all([ isinstance(act_space, spaces.Discrete) for act_space in total_action_space ]): act_space = MultiDiscrete( [[0, act_space.n - 1] for act_space in total_action_space]) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) else: self.action_space.append(total_action_space[0]) # observation space obs_dim = len(observation_callback(agent, self.world)) self.observation_space.append( spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim, ), dtype=np.float32)) agent.action.c = np.zeros(self.world.dim_c) # rendering self.shared_viewer = shared_viewer if self.shared_viewer: self.viewers = [None] else: self.viewers = [None] * self.n self._reset_render()
def __init__(self, env): super(NoisyTVEnvWrapperMario, self).__init__(env) self.action_space = spaces.Discrete(15) self.noisy_action = 14
def __init__(self): low = np.array([0, 0]) high = np.array([11, 11]) self.observation_space = spaces.Box(low, high, dtype=np.int) self.action_space = spaces.Discrete(4)
def __init__(self, keep_mode=True, mode=NORMAL, verbose=False): """ mode: is the game mode: NORMAL, TRAIN_SHOOTING, TRAIN_DEFENSE, keep_mode: whether the puck gets catched by it can be changed later using the reset function """ EzPickle.__init__(self) self.seed() self.viewer = None self.mode = mode self.keep_mode = keep_mode self.player1_has_puck = 0 self.player2_has_puck = 0 self.world = Box2D.b2World([0, 0]) self.player1 = None self.player2 = None self.puck = None self.goal_player_1 = None self.goal_player_2 = None self.world_objects = [] self.drawlist = [] self.done = False self.winner = 0 self.one_starts = True # player one starts the game (alternating) self.timeStep = 1.0 / FPS self.time = 0 self.max_timesteps = None # see reset self.closest_to_goal_dist = 1000 # 0 x pos player one # 1 y pos player one # 2 angle player one # 3 x vel player one # 4 y vel player one # 5 angular vel player one # 6 x player two # 7 y player two # 8 angle player two # 9 y vel player two # 10 y vel player two # 11 angular vel player two # 12 x pos puck # 13 y pos puck # 14 x vel puck # 15 y vel puck # Keep Puck Mode # 16 time left player has puck # 17 time left other player has puck self.observation_space = spaces.Box(-np.inf, np.inf, shape=(18, ), dtype=np.float32) # linear force in (x,y)-direction and torque self.num_actions = 3 if not self.keep_mode else 4 self.action_space = spaces.Box(-1, +1, (self.num_actions * 2, ), dtype=np.float32) # see discrete_to_continous_action() self.discrete_action_space = spaces.Discrete(7) self.verbose = verbose self.reset(self.one_starts)
def _set_action_space(self): self.action_space = spaces.Tuple( tuple([spaces.Discrete(6)] + [spaces.Discrete(self._radio_vocab_size)] * self._radio_num_words))
def _set_config( self, game_color=1, # State (frame) color option: 0 = RGB, 1 = Grayscale, 2 = Green only indicators=True, # show or not bottom Info Panel frames_per_state=4, # stacked (rolling history) Frames on each state [1-inf], latest observation always on first Frame skip_frames=3, # number of consecutive Frames to skip between state saves [0-4] discre=ACT, # Action discretization function, format [[steer0, throtle0, brake0], [steer1, ...], ...]. None for continuous use_track=3, # number of times to use the same Track, [1-100]. More than 20 high risk of overfitting!! episodes_per_track=5, # number of evenly distributed starting points on each track [1-20]. Every time you call reset(), the env automatically starts at the next point tr_complexity=12, # generated Track geometric Complexity, [6-20] tr_width=45, # relative Track Width, [30-50] patience=2.0, # max time in secs without Progress, [0.5-20] off_track=1.0, # max time in secs Driving on Grass, [0.0-5] f_reward=CONT_REWARD, # Reward Funtion coefficients, refer to Docu for details num_obstacles=5, # Obstacle objects placed on track [0-10] end_on_contact=False, # Stop Episode on contact with obstacle, not recommended for starting-phase of training obst_location=0, # array pre-setting obstacle Location, in %track. Negative value means tracks's left-hand side. 0 for random location oily_patch=False, # use all obstacles as Low-friction road (oily patch) verbose=2): #Verbosity self.verbose = verbose #obstacle parameters self.num_obstacles = np.clip(num_obstacles, 0, 10) self.end_on_contact = end_on_contact self.oily_patch = oily_patch if obst_location != 0 and len(obst_location) < num_obstacles: print("#####################################") print("Warning: incomplete obstacle location") print("Defaulting to random placement") self.obst_location = 0 #None else: self.obst_location = np.array(obst_location) #reward coefs verification if len(f_reward) < len(CONT_REWARD): print("####################################") print("Warning: incomplete reward function") print("Defaulting to predefined function!!!") self.f_reward = CONT_REWARD else: self.f_reward = f_reward # Times to use same track, up to 100 times. More than 20 high risk of overfitting!! self.repeat_track = np.clip(use_track, 1, 100) self.track_use = +np.inf # Number of episodes on same track, with evenly distributed starting points, # not more than 20 episodes self.episodes_per_track = np.clip(episodes_per_track, 1, 20) # track generation complexity self.complexity = np.clip(tr_complexity, 6, 20) # track width self.tr_width = np.clip(tr_width, 30, 50) / SCALE # Max time without progress self.patience = np.clip(patience, 0.5, 20) # Max time off-track self.off_track = np.clip(off_track, 0, 5) # Show or not bottom info panel self.indicators = indicators # Grayscale and acceptable frames self.grayscale = game_color if not self.grayscale: if frames_per_state > 1: print("####################################") print("Warning: making frames_per_state = 1") print("No support for several frames in RGB") frames_per_state = 1 skip_frames = 0 # Frames to be skipped from state (max 4) self.skip_frames = np.clip(skip_frames + 1, 1, 5) # Frames per state self.frames_per_state = frames_per_state if frames_per_state > 0 else 1 if self.frames_per_state > 1: lst = list( range(0, self.frames_per_state * self.skip_frames, self.skip_frames)) self._update_index = [lst[-1]] + lst[:-1] # Gym spaces, observation and action self.discre = discre if discre == None: self.action_space = spaces.Box( np.array([-0.4, 0, 0]), np.array([+0.4, +1, +1]), dtype=np.float32) # steer, gas, brake else: self.action_space = spaces.Discrete(len(discre)) if game_color: self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, self.frames_per_state), dtype=np.uint8) else: self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)
def __init__(self, num_vehicles=50, task_num=30): self.num_vehicles = num_vehicles self.task_num_per_episode = task_num self.vehicle_count = 0 self.maxR = 500 #m, max relative distance between request vehicle and other vehicles self.maxV = 30 #km/h, max relative velocity between requst vehicle and other vehicles self.max_v = 50 # maximum vehicles in the communication range of request vehicle self.max_local_task = 10 self.bandwidth = 6 # MHz self.snr_ref = 1 # reference SNR, which is used to compute rate by B*log2(1+snr_ref*d^-a) self.snr_alpha = 2 self.vehicle_F = range(5, 11) #GHz self.data_size = [0.05, 0.1] #MBytes self.comp_size = [0.2, 0.4] #GHz self.tau = [0.5, 1, 2, 4] #s self.max_datasize = max(self.data_size) self.max_compsize = max(self.comp_size) self.max_tau = max(self.tau) self.priority = [0.5, 1] self.ref_price = 0.1 self.price = 0.1 self.price_level = 10 self.service_threshold = 0.1 self.local_priority = 0.01 self.distance_factor = 1 self.high_priority_factor = -np.log(1 + self.max_tau) self.low_priority_factor = np.log(1 + np.min(self.tau)) self.action_space = spaces.Discrete(self.num_vehicles * self.price_level) self.observation_space = spaces.Dict({ "snr": spaces.Box(0, self.snr_ref, shape=(self.max_v, ), dtype='float32'), # "time_remain":spaces.Box(0,100,shape=(self.max_v,),dtype='float32'), "freq_remain": spaces.Box(0, 6, shape=(self.max_v, ), dtype='float32'), "serv_prob": spaces.Box(0, 1, shape=(self.max_v, ), dtype='float32'), "u_max": spaces.Box(0, self.max_local_task * self.max_tau, shape=(self.max_v, ), dtype='float32'), "task": spaces.Box(0, max(self.max_datasize, self.max_compsize, self.max_tau, max(self.priority)), shape=(4, ), dtype='float32') }) self.seed() self.reward_threshold = 0.0 self.trials = 100 self.max_episode_steps = 100 self._max_episode_steps = 100 self.id = "VEC" self.high_count = [0, 0, 0, 0] self.high_delay = [0, 0, 0, 0] self.low_count = [0, 0, 0, 0] self.low_delay = [0, 0, 0, 0] self.count_file = "sac.txt" self.utility = 0 self.vehicles = [] #vehicles in the range self.tasks = [] #tasks for offloading self.init_vehicles() # self.generate_offload_tasks() self.generate_local_tasks()
def __init__( self, environment_name: str, num_spawn_envs: int = 1, worker_id: int = 0, marathon_envs_path: str = None, no_graphics: bool = False, use_editor: bool = False, inference: bool = False, ): """ Environment initialization :param environment_name: The Marathon Environment :param num_spawn_envs: The number of environments to spawn per instance :param worker_id: Worker number for environment. :param marathon_envs_path: alternative path for environment :param no_graphics: Whether to run the Unity simulator in no-graphics mode :param use_editor: If True, assume Unity Editor is the envionment (use for debugging) :param inference: If True, run in inference mode (normal framerate) """ use_visual: bool = False uint8_visual: bool = False multiagent: bool = True # force multiagent flatten_branched: bool = False allow_multiple_visual_obs: bool = False base_port = 5005 # use if we want to work with Unity Editoe if use_editor: base_port = DEFAULT_EDITOR_PORT marathon_envs_path = None elif marathon_envs_path is None: marathon_envs_path = 'Tennis' if platform == "win32": marathon_envs_path = os.path.join('Tennis_Windows_x86_64', 'Tennis.exe') self._env = UnityEnvironment(marathon_envs_path, worker_id=worker_id, base_port=base_port, no_graphics=no_graphics) # Take a single step so that the brain information will be sent over if not self._env.get_agent_groups(): self._env.step() self.visual_obs = None self._n_agents = -1 self.agent_mapper = AgentIdIndexMapper() # Save the step result from the last time all Agents requested decisions. self._previous_step_result: BrainInfo = None self._multiagent = multiagent self._flattener = None # Hidden flag used by Atari environments to determine if the game is over self.game_over = False self._allow_multiple_visual_obs = allow_multiple_visual_obs # Check brain configuration if len(self._env.get_agent_groups()) != 1: raise MarathonEnvsException( "There can only be one brain in a UnityEnvironment " "if it is wrapped in a gym.") self.brain_name = self._env.get_agent_groups()[0] self.name = self.brain_name self.group_spec = self._env.get_agent_group_spec(self.brain_name) if use_visual and self._get_n_vis_obs() == 0: raise MarathonEnvsException( "`use_visual` was set to True, however there are no" " visual observations as part of this environment.") self.use_visual = self._get_n_vis_obs() >= 1 and use_visual if not use_visual and uint8_visual: logger.warning( "`uint8_visual was set to true, but visual observations are not in use. " "This setting will not have any effect.") else: self.uint8_visual = uint8_visual if self._get_n_vis_obs() > 1 and not self._allow_multiple_visual_obs: logger.warning( "The environment contains more than one visual observation. " "You must define allow_multiple_visual_obs=True to received them all. " "Otherwise, please note that only the first will be provided in the observation." ) # Check for number of agents in scene. self._env.reset() step_result = self._env.get_step_result(self.brain_name) self._check_agents(step_result.n_agents()) self._previous_step_result = step_result self.agent_mapper.set_initial_agents( list(self._previous_step_result.agent_id)) # Set observation and action spaces if self.group_spec.is_action_discrete(): branches = self.group_spec.discrete_action_branches if self.group_spec.action_shape == 1: self._action_space = spaces.Discrete(branches[0]) else: if flatten_branched: self._flattener = ActionFlattener(branches) self._action_space = self._flattener.action_space else: self._action_space = spaces.MultiDiscrete(branches) else: if flatten_branched: logger.warning( "The environment has a non-discrete action space. It will " "not be flattened.") high = np.array([1] * self.group_spec.action_shape) self._action_space = spaces.Box(-high, high, dtype=np.float32) high = np.array([np.inf] * self._get_vec_obs_size()) if self.use_visual: shape = self._get_vis_obs_shape() if uint8_visual: self._observation_space = spaces.Box(0, 255, dtype=np.uint8, shape=shape) else: self._observation_space = spaces.Box(0, 1, dtype=np.float32, shape=shape) else: self._observation_space = spaces.Box(-high, high, dtype=np.float32)
def __init__(self): """ This Task Env is designed for having the TurtleBot2 in some kind of maze. It will learn how to move around the maze without crashing. """ # Only variable needed to be set here number_actions = rospy.get_param('/turtlebot2/n_actions') self.action_space = spaces.Discrete(number_actions) # We set the reward range, which is not compulsory but here we do it. self.reward_range = (-numpy.inf, numpy.inf) #number_observations = rospy.get_param('/turtlebot2/n_observations') """ We set the Observation space for the 6 observations cube_observations = [ round(current_disk_roll_vel, 0), round(y_distance, 1), round(roll, 1), round(pitch, 1), round(y_linear_speed,1), round(yaw, 1), ] """ # Actions and Observations self.dec_obs = rospy.get_param( "/turtlebot2/number_decimals_precision_obs", 1) self.linear_forward_speed = rospy.get_param( '/turtlebot2/linear_forward_speed') self.linear_turn_speed = rospy.get_param( '/turtlebot2/linear_turn_speed') self.angular_speed = rospy.get_param('/turtlebot2/angular_speed') self.init_linear_forward_speed = rospy.get_param( '/turtlebot2/init_linear_forward_speed') self.init_linear_turn_speed = rospy.get_param( '/turtlebot2/init_linear_turn_speed') self.n_observations = rospy.get_param('/turtlebot2/n_observations') self.min_range = rospy.get_param('/turtlebot2/min_range') self.max_laser_value = rospy.get_param('/turtlebot2/max_laser_value') self.min_laser_value = rospy.get_param('/turtlebot2/min_laser_value') # Here we will add any init functions prior to starting the MyRobotEnv super(TurtleBot2MazeEnv, self).__init__() # We create two arrays based on the binary values that will be assigned # In the discretization method. #laser_scan = self._check_laser_scan_ready() laser_scan = self.get_laser_scan() rospy.logdebug("laser_scan len===>" + str(len(laser_scan.ranges))) # Laser data self.laser_scan_frame = laser_scan.header.frame_id # Number of laser reading jumped self.new_ranges = int( math.ceil( float(len(laser_scan.ranges)) / float(self.n_observations))) rospy.logdebug("n_observations===>" + str(self.n_observations)) rospy.logdebug("new_ranges, jumping laser readings===>" + str(self.new_ranges)) high = numpy.full((self.n_observations), self.max_laser_value) low = numpy.full((self.n_observations), self.min_laser_value) # We only use two integers self.observation_space = spaces.Box(low, high) rospy.logdebug("ACTION SPACES TYPE===>" + str(self.action_space)) rospy.logdebug("OBSERVATION SPACES TYPE===>" + str(self.observation_space)) # Rewards self.forwards_reward = rospy.get_param("/turtlebot2/forwards_reward") self.turn_reward = rospy.get_param("/turtlebot2/turn_reward") self.end_episode_points = rospy.get_param( "/turtlebot2/end_episode_points") self.cumulated_steps = 0.0 self.laser_filtered_pub = rospy.Publisher( '/turtlebot2/laser/scan_filtered', LaserScan, queue_size=1)
def __init__(self): super().__init__() self.action_space = spaces.Discrete(self.N_JOINTS * 2) self.observation_space = spaces.Box(low=-self.JOINT_LIMITS, high=self.JOINT_LIMITS)
def __init__( self, environment_filename=None, worker_id=0, retro=True, timeout_wait=30, realtime_mode=False, config=None, greyscale=False, ): """ Arguments: environment_filename: The file path to the Unity executable. Does not require the extension. docker_training: Whether this is running within a docker environment and should use a virtual frame buffer (xvfb). worker_id: The index of the worker in the case where multiple environments are running. Each environment reserves port (5005 + worker_id) for communication with the Unity executable. retro: Resize visual observation to 84x84 (int8) and flattens action space. timeout_wait: Time for python interface to wait for environment to connect. realtime_mode: Whether to render the environment window image and run environment at realtime. """ self.reset_parameters = EnvironmentParametersChannel() self.engine_config = EngineConfigurationChannel() if environment_filename is None: registry = UnityEnvRegistry() registry.register_from_yaml(self._REGISTRY_YAML) self._env = registry["ObstacleTower"].make( worker_id=worker_id, timeout_wait=timeout_wait, side_channels=[self.reset_parameters, self.engine_config]) else: self._env = UnityEnvironment( environment_filename, worker_id, timeout_wait=timeout_wait, side_channels=[self.reset_parameters, self.engine_config], ) if realtime_mode: self.engine_config.set_configuration_parameters(time_scale=1.0) self.reset_parameters.set_float_parameter("train-mode", 0.0) else: self.engine_config.set_configuration_parameters(time_scale=20.0) self.reset_parameters.set_float_parameter("train-mode", 1.0) self._env.reset() behavior_name = list(self._env.behavior_specs)[0] split_name = behavior_name.split("-v") if len(split_name) == 2 and split_name[0] == "ObstacleTowerAgent": self.name, self.version = split_name else: raise UnityGymException( "Attempting to launch non-Obstacle Tower environment") if self.version not in self.ALLOWED_VERSIONS: raise UnityGymException( "Invalid Obstacle Tower version. Your build is v" + self.version + " but only the following versions are compatible with this gym: " + str(self.ALLOWED_VERSIONS)) self.visual_obs = None self._n_agents = None self._flattener = None self._greyscale = greyscale # Environment reset parameters self._seed = None self._floor = None self.realtime_mode = realtime_mode self.game_over = False # Hidden flag used by Atari environments to determine if the game is over self.retro = retro if config != None: self.config = config else: self.config = None flatten_branched = self.retro uint8_visual = self.retro # Check behavior configuration if len(self._env.behavior_specs) != 1: raise UnityGymException( "There can only be one agent in this environment " "if it is wrapped in a gym.") self.behavior_name = behavior_name behavior_spec = self._env.behavior_specs[behavior_name] if len(behavior_spec) < 2: raise UnityGymException( "Environment provides too few observations.") self.uint8_visual = uint8_visual # Check for number of agents in scene. initial_info, terminal_info = self._env.get_steps(behavior_name) self._check_agents(len(initial_info)) # Set observation and action spaces if len(behavior_spec.action_shape) == 1: self._action_space = spaces.Discrete(behavior_spec.action_shape[0]) else: if flatten_branched: self._flattener = ActionFlattener(behavior_spec.action_shape) self._action_space = self._flattener.action_space else: self._action_space = spaces.MultiDiscrete( behavior_spec.action_shape) if self._greyscale: depth = 1 else: depth = 3 image_space_max = 1.0 image_space_dtype = np.float32 camera_height = behavior_spec.observation_shapes[0][0] camera_width = behavior_spec.observation_shapes[0][1] if self.retro: image_space_max = 255 image_space_dtype = np.uint8 camera_height = 84 camera_width = 84 image_space = spaces.Box( 0, image_space_max, dtype=image_space_dtype, shape=(camera_height, camera_width, depth), ) if self.retro: self._observation_space = image_space else: max_float = np.finfo(np.float32).max keys_space = spaces.Discrete(5) time_remaining_space = spaces.Box(low=0.0, high=max_float, shape=(1, ), dtype=np.float32) floor_space = spaces.Discrete(9999) self._observation_space = spaces.Tuple( (image_space, keys_space, time_remaining_space, floor_space))
def __init__(self, env_name, num_envs, color_mode='rgb', device='cpu', rescale=False, frameskip=1, repeat_prob=0.25, episodic_life=False, max_noop_steps=30, max_episode_length=10000): """Initialize the ALE class with a given environment Args: env_name (str): The name of the Atari rom num_envs (int): The number of environments to run color_mode (str): RGB ('rgb') or grayscale ('gray') observations use_cuda (bool) : Map ALEs to GPU rescale (bool) : Rescale grayscale observations to 84x84 frameskip (int) : Number of frames to skip during training repeat_prob (float) : Probability of repeating previous action clip_rewards (bool) : Apply rewards clipping to {-1,1} episodic_life (bool) : Set 'done' on end of life """ assert (color_mode == 'rgb') or (color_mode == 'gray') if color_mode == 'rgb' and rescale: raise ValueError('Rescaling is only valid in grayscale color mode') self.cart = Rom(env_name) super(Env, self).__init__(self.cart, num_envs, max_noop_steps) self.device = torch.device(device) self.num_envs = num_envs self.rescale = rescale self.frameskip = frameskip self.repeat_prob = repeat_prob self.is_cuda = self.device.type == 'cuda' self.is_training = False self.episodic_life = episodic_life self.height = 84 if self.rescale else self.cart.screen_height() self.width = 84 if self.rescale else self.cart.screen_width() self.num_channels = 3 if color_mode == 'rgb' else 1 self.action_set = torch.Tensor([int(s) for s in self.cart.minimal_actions()]).to(self.device).byte() # check if FIRE is in the action set self.fire_reset = int(torchcule_atari.FIRE) in self.action_set self.action_space = spaces.Discrete(self.action_set.size(0)) self.observation_space = spaces.Box(low=0, high=255, shape=(self.num_channels, self.height, self.width), dtype=np.uint8) self.observations1 = torch.zeros((num_envs, self.height, self.width, self.num_channels), device=self.device, dtype=torch.uint8) self.observations2 = torch.zeros((num_envs, self.height, self.width, self.num_channels), device=self.device, dtype=torch.uint8) self.done = torch.zeros(num_envs, device=self.device, dtype=torch.bool) self.actions = torch.zeros(num_envs, device=self.device, dtype=torch.uint8) self.last_actions = torch.zeros(num_envs, device=self.device, dtype=torch.uint8) self.lives = torch.zeros(num_envs, device=self.device, dtype=torch.int32) self.rewards = torch.zeros(num_envs, device=self.device, dtype=torch.float32) self.states = torch.zeros((num_envs, self.state_size()), device=self.device, dtype=torch.uint8) self.frame_states = torch.zeros((num_envs, self.frame_state_size()), device=self.device, dtype=torch.uint8) self.ram = torch.randint(0, 255, (num_envs, self.cart.ram_size()), device=self.device, dtype=torch.uint8) self.tia = torch.zeros((num_envs, self.tia_update_size()), device=self.device, dtype=torch.int32) self.frame_buffer = torch.zeros((num_envs, 300 * self.cart.screen_width()), device=self.device, dtype=torch.uint8) self.cart_offsets = torch.zeros(num_envs, device=self.device, dtype=torch.int32) self.rand_states = torch.randint(0, np.iinfo(np.int32).max, (num_envs,), device=self.device, dtype=torch.int32) self.cached_states = torch.zeros((max_noop_steps, self.state_size()), device=self.device, dtype=torch.uint8) self.cached_ram = torch.randint(0, 255, (max_noop_steps, self.cart.ram_size()), device=self.device, dtype=torch.uint8) self.cached_frame_states = torch.zeros((max_noop_steps, self.frame_state_size()), device=self.device, dtype=torch.uint8) self.cached_tia = torch.zeros((max_noop_steps, self.tia_update_size()), device=self.device, dtype=torch.int32) self.cache_index = torch.zeros((num_envs,), device=self.device, dtype=torch.int32) self.set_cuda(self.is_cuda, self.device.index if self.is_cuda else -1) self.initialize(self.states.data_ptr(), self.frame_states.data_ptr(), self.ram.data_ptr(), self.tia.data_ptr(), self.frame_buffer.data_ptr(), self.cart_offsets.data_ptr(), self.action_set.data_ptr(), self.rand_states.data_ptr(), self.cached_states.data_ptr(), self.cached_ram.data_ptr(), self.cached_frame_states.data_ptr(), self.cached_tia.data_ptr(), self.cache_index.data_ptr());
def __init__(self,case_files, dyn_sim_config_file,rl_config_file , server_port_num = 25333, cnts=[2,2,2]): global gateway gateway = JavaGateway(gateway_parameters=GatewayParameters(port = server_port_num,auto_convert=True)) global ipss_app ipss_app = gateway.entry_point from gym import spaces _case_files = case_files _dyn_sim_config_file = dyn_sim_config_file _rl_config_file = rl_config_file #initialize the power system simulation service # {observation_history_length,observation_space_dim, action_location_num, action_level_num}; dim_ary= ipss_app.initStudyCase(case_files,dyn_sim_config_file,rl_config_file) print ('observation_history_length,observation_space_dim, action_location_num, action_level_num = ') print (dim_ary[0], dim_ary[1],dim_ary[2], dim_ary[3]) observation_history_length = dim_ary[0] observation_space_dim = dim_ary[1] action_location_num = dim_ary[2] action_level_num = dim_ary[3] num = action_level_num ** action_location_num self.action_space = spaces.Discrete(num) self.cnts = cnts #define action and observation spaces """ if(action_location_num == 1): self.action_space = spaces.Discrete(action_level_num) # Discrete, 1-D dimension else: #print('N-D dimension Discrete Action space is not supported it yet...TODO') # the following is based on the latest gym dev version # action_def_vector = np.ones(action_location_num, dtype=np.int32)*action_level_num # for gym version 0.10.4, it is parametrized by passing an array of arrays containing [min, max] for each discrete action space # for exmaple, MultiDiscrete([ [0,4], [0,1], [0,1] ]) action_def_vector = np.ones((action_location_num,2),dtype=np.int32) action_def_vector[:,1] = action_level_num -1 aa = np.asarray(action_def_vector, dtype=np.int32) self.action_space = spaces.MultiDiscrete(action_def_vector) # Discrete, N-D dimension """ self.observation_space = spaces.Box(-999,999,shape=(observation_history_length * observation_space_dim,)) # Continuous self._seed() #TOOD get the initial states self.state = None self.steps_beyond_done = None self.restart_simulation = True
def __init__(self, image_paths, true_bboxes, playout_episode=False, premasking=True, mode='train', max_steps_per_image=200, seed=None, bbox_scaling=0.125, bbox_transformer='base', has_termination_action=True, ior_marker_type='cross', history_length=10): """ :param image_paths: The paths to the individual images :param true_bboxes: The true bounding boxes for each image :type image_paths: String or list :type true_bboxes: numpy.ndarray """ # Determines whether the agent is training or testing # Optimizations can be applied during training that are not allowed for testing self.mode = mode # Factor for scaling all bounding boxes relative to their size self.bbox_scaling = bbox_scaling # Whether IoR markers will be placed upfront after loading the image self.premasking = premasking # Whether an episode terminates after a single trigger or is played out until the end self.playout_episode = playout_episode # Episodes will be terminated automatically after reaching max steps self.max_steps_per_image = max_steps_per_image # Whether a termination action should be provided in the action set self.has_termination_action = has_termination_action # The type of IoR marker to be used when masking trigger regions self.ior_marker_type = ior_marker_type # Length of history in state & agent model self.history_length = history_length # Initialize action space self.bbox_transformer = create_bbox_transformer(bbox_transformer) self.action_space = spaces.Discrete(len(self.action_set)) # 224*224*3 (RGB image) + 9 * 10 (on-hot-enconded history) = 150618 self.observation_space = spaces.Tuple([ spaces.Box(low=0, high=256, shape=(224, 224, 3)), spaces.Box(low=0, high=1, shape=(self.history_length, len(self.action_set))) ]) # Initialize dataset if type(image_paths) is not list: image_paths = [image_paths] self.image_paths = image_paths self.true_bboxes = [[TextLocEnv.to_standard_box(b) for b in bboxes] for bboxes in true_bboxes] # For registering a handler that will be executed once after a step self.post_step_handler = None # Episode-specific # Image for the current episode self.episode_image = None # Ground truth bounding boxes for the current episode image self.episode_true_bboxes = None # Predicted bounding boxes for the current episode image self.episode_pred_bboxes = None # IoU values for each trigger in the current episode self.episode_trigger_ious = None # List of indices of masked bounding boxes for the current episode image self.episode_masked_indices = [] # Number of trigger actions used so far self.num_triggers_used = 0 self.seed(seed=seed) self.reset()
"cifar100": (32, 32, 3), "cifarfellowship": (32, 32, 3), "imagenet100": (224, 224, 3), "imagenet1000": (224, 224, 3), # "permutedmnist": (28, 28, 1), # "rotatedmnist": (28, 28, 1), "core50": (224, 224, 3), "core50-v2-79": (224, 224, 3), "core50-v2-196": (224, 224, 3), "core50-v2-391": (224, 224, 3), "synbols": (224, 224, 3), }.items() } reward_spaces: Dict[str, Space] = { "mnist": spaces.Discrete(10), "fashionmnist": spaces.Discrete(10), "kmnist": spaces.Discrete(10), "emnist": spaces.Discrete(10), "qmnist": spaces.Discrete(10), "mnistfellowship": spaces.Discrete(30), "cifar10": spaces.Discrete(10), "cifar100": spaces.Discrete(100), "cifarfellowship": spaces.Discrete(110), "imagenet100": spaces.Discrete(100), "imagenet1000": spaces.Discrete(1000), "permutedmnist": spaces.Discrete(10), "rotatedmnist": spaces.Discrete(10), "core50": spaces.Discrete(50), "core50-v2-79": spaces.Discrete(50), "core50-v2-196": spaces.Discrete(50),
def _init_action_space(self): return spaces.Discrete(4)
def __post_init__(self): """Initializes the fields of the Setting (and LightningDataModule), including the transforms, shapes, etc. """ if isinstance(self.increment, list) and len(self.increment) == 1: # This can happen when parsing a list from the command-line. self.increment = self.increment[0] base_observations_space = base_observation_spaces[self.dataset] base_reward_space = reward_spaces[self.dataset] # action space = reward space by default base_action_space = base_reward_space if isinstance(base_action_space, spaces.Discrete): # Classification dataset self.num_classes = base_action_space.n # Set the number of tasks depending on the increment, and vice-versa. # (as only one of the two should be used). if self.nb_tasks == 0: self.nb_tasks = self.num_classes // self.increment else: self.increment = self.num_classes // self.nb_tasks else: raise NotImplementedError(f"TODO: (issue #43)") if not self.class_order: self.class_order = list(range(self.num_classes)) # Test values default to the same as train. self.test_increment = self.test_increment or self.increment self.test_initial_increment = self.test_initial_increment or self.test_increment self.test_class_order = self.test_class_order or self.class_order # TODO: For now we assume a fixed, equal number of classes per task, for # sake of simplicity. We could take out this assumption, but it might # make things a bit more complicated. assert isinstance(self.increment, int) assert isinstance(self.test_increment, int) self.n_classes_per_task: int = self.increment action_space = spaces.Discrete(self.n_classes_per_task) reward_space = spaces.Discrete(self.n_classes_per_task) super().__post_init__( # observation_space=observation_space, action_space=action_space, reward_space=reward_space, # the labels have shape (1,) always. ) self.train_datasets: List[_ContinuumDataset] = [] self.val_datasets: List[_ContinuumDataset] = [] self.test_datasets: List[_ContinuumDataset] = [] # This will be set by the Experiment, or passed to the `apply` method. # TODO: This could be a bit cleaner. self.config: Config # Default path to which the datasets will be downloaded. self.data_dir: Optional[Path] = None self.train_env: PassiveEnvironment = None # type: ignore self.val_env: PassiveEnvironment = None # type: ignore self.test_env: PassiveEnvironment = None # type: ignore
def __init__(self, env, keys): super(DiscreteToFixedKeysVNCActions, self).__init__(env) self._keys = keys self._generate_actions() self.action_space = spaces.Discrete(len(self._actions))
def space(self): return spaces.Discrete(self.max_value + self.scale_factor)
def __init__(self): self.__version__ = "0.0.3" print("AbadiaEnv - Version {}".format(self.__version__)) self.url = "http://localhost:4477" self.server = "http://localhost" self.port = "4477" self.num_episodes = 100 self.num_steps = 1500 self.gameName = "" self.actionsName = "" self.checkpointName = None self.dump_path = "partidas/now/" # Define what the agent can do # 0 -> STEP ORI 0 # 1 -> STEP ORI 1 # 2 -> STEP ORI 2 # 3 -> STEP ORI 3 # 4 -> RIGHT # 5 -> LEFT # 6 -> DOWN # 7 -> NOP self.action_space = spaces.Discrete(7) # json from the dump state of the episode self.json_dump = {} self.actions_list = ("cmd/A", "cmd/A", "cmd/A", "cmd/A", "cmd/D", "cmd/I", "cmd/B", "cmd/N") self.obsequium = -1 self.porcentaje = -1 self.haFracasado = False self.prevPantalla = -1 self.rejilla = [] self.prev_ob = dict() self.estaGuillermo = False self.curr_step = -1 self.is_game_done = False self.Personajes = { "Guillermo": {}, "Adso": {}, "Abad": {}, "Malaquias": {}, "Berengario": {}, "Severino": {}, "Jorge": {}, "Bernardo": {} } self.listaPersonajes = ("Guillermo", "Adso", "Abad", "Malaquias", "Berengario", "Severino", "Jorge", "Bernardo") self.Visited = np.zeros([512, 512]) # Observation is the remaining time low = np.array([ 0.0, # remaining_tries ]) high = np.array([ 512, ]) self.observation_space = spaces.Box(low, high) # Store what the agent tried self.curr_episode = -1 self.action_episode_memory = []
def space(self) -> spaces.Discrete: return spaces.Discrete(self.actions_per_axis**self.size)
def __init__(self, step_cost=-0.01, n_max=4, collision_reward=-10, arrive_prob=0.5, full_observable: bool = False, max_steps: int = 100, n_junctions=1): assert 1 <= n_max <= 4, "n_max should be range in [1,4]" assert n_junctions in AVAILABLE_JUNCTIONS, "Available junctions: {}, got {}".format( AVAILABLE_JUNCTIONS, n_junctions) assert 0 <= arrive_prob <= 1, "arrive probability should be in range [0,1]" assert 1 <= max_steps, "max_steps should be more than 1" self._grid_shape = (14, 14) self.n_agents = n_max self._max_steps = max_steps self._step_count = 0 # environment step counter self._collision_reward = collision_reward self.wrong_destination_reward = -5 self._total_episode_reward = None self._arrive_prob = arrive_prob self._n_max = n_max self._step_cost = step_cost self.curr_cars_count = 0 self._n_routes = 3 self._junction = to_array(AVAILABLE_JUNCTIONS[n_junctions]) self._agent_view_mask = (3, 3) # entry gates where the cars spawn # generates all possible options, _start_idx is used to pick # exit gates for when agents reach a wrong destination and get penalised self._entry_gates, self._exit_gates = start_and_end_positions( self._junction) self._start_idx = np.arange(len(self._entry_gates)) np.random.shuffle(self._start_idx) # destination places for the cars to reach self._destination = [] for i in range(self.n_agents): idx = self._start_idx[i] end_candidates = self._entry_gates[:idx] + self._entry_gates[idx + 1:] destination_idx = np.random.choice(np.arange(len(end_candidates))) self._destination.append(end_candidates[destination_idx]) self._agent_step_count = [0 for _ in range(self.n_agents) ] # holds a step counter for each car self.action_space = MultiAgentActionSpace( [spaces.Discrete(5) for _ in range(self.n_agents)]) self.agent_pos = {_: None for _ in range(self.n_agents)} self._on_the_road = [False for _ in range(self.n_agents) ] # flag if car is on the road self._full_obs = self.__create_grid() self._base_img = self.__draw_base_img() self._agent_dones = [None for _ in range(self.n_agents)] self.viewer = None self.full_observable = full_observable # agent id (n_agents, onehot), obs_mask (9), pos (2), route (3) mask_size = np.prod(self._agent_view_mask) self._obs_high = np.ones( (mask_size * (self.n_agents + 2 + 2))) # n_agents + destination + location self._obs_low = np.zeros( (mask_size * (self.n_agents + 2 + 2))) # n_agents + destination + location if self.full_observable: self._obs_high = np.tile(self._obs_high, self.n_agents) self._obs_low = np.tile(self._obs_low, self.n_agents) self.observation_space = MultiAgentObservationSpace([ spaces.Box(self._obs_low, self._obs_high) for _ in range(self.n_agents) ])
def space(self) -> spaces.Space: return spaces.Discrete(len(self.actions))
def _set_action_space(self): self.action_space = spaces.Discrete(6)
def __init__(self, grid_shape, field_types, initial_state, initial_position, transition, hidden_reward, corrupt_reward, episode_length, print_field=lambda x: str(x)): self.action_space = spaces.Discrete(4) assert (field_types >= 1) self.observation_space = spaces.MultiDiscrete( np.zeros(grid_shape) + field_types + 1) # All field types plus the agent's position def within_world(position): return position[0] >= 0 and position[1] >= 0 and position[ 0] < grid_shape[0] and position[1] < grid_shape[1] def to_observation(state, position): assert (within_world(position)) observation = np.array(state, dtype=np.int32) observation[position] = AGENT return observation assert (self.observation_space.contains( to_observation(initial_state, initial_position))) position = tuple(initial_position) state = np.array(initial_state) step = 0 last_action = None def _reset(): nonlocal position, state, step, last_action position = tuple(initial_position) state = np.array(initial_state) step = 0 last_action = None return to_observation(state, position) def _transition(state, position, action): pos = np.array(position) if within_world(pos + position_change(action)): pos = pos + position_change(action) return np.array(state), tuple(pos) if transition == None: transition = _transition # Only move within world, don't change anything def _step(action): nonlocal position, state, step, last_action step += 1 last_action = action state, position = transition(state, position, action) info = {'hidden_reward': hidden_reward(state, position)} reward = corrupt_reward(state, position) done = (step > episode_length) return (to_observation(state, position), reward, done, info) def _render(mode='human', close=False): observation = to_observation(state, position) observation_chars = [[ print_field(observation[c, r]) for c in range(grid_shape[0]) ] for r in reversed(range(grid_shape[1]))] additional_info = "A: " + str(last_action) + " S: " + str(step) if mode == 'text': sys.stdout.write("\n".join(''.join(line) for line in observation_chars) + "\n") sys.stdout.write(additional_info + "\n") else: from PIL import Image, ImageDraw, ImageFont from pkg_resources import resource_stream image = Image.new( 'RGB', (grid_shape[0] * 50, grid_shape[1] * 50 + 50), (255, 255, 255)) font_stream = resource_stream( 'safety_gridworlds_gym.envs.common', 'unifont-11.0.02.ttf') font = ImageFont.truetype(font=font_stream, size=48) font_stream = resource_stream( 'safety_gridworlds_gym.envs.common', 'unifont-11.0.02.ttf') smaller_font = ImageFont.truetype(font=font_stream, size=36) drawing = ImageDraw.Draw(image) for r in range(grid_shape[1]): for c in range(grid_shape[0]): drawing.text((c * 50, r * 50), observation_chars[c][r], font=font, fill=(0, 0, 0)) drawing.text((0, grid_shape[1] * 50), additional_info, font=smaller_font, fill=(0, 0, 0)) if mode == 'human': import matplotlib.pyplot as plt plt.axis("off") plt.imshow(image) plt.pause(.1) plt.clf() return np.array(image) self.step = _step self.reset = _reset self.render = _render