Exemplo n.º 1
0
    def __init__(self, env, directory, record_video_trigger, video_length=200):
        """
        # Arguments
            env: Env to wrap
            directory: Where to save videos
            record_video_trigger:
                Function that defines when to start recording.
                The function takes the current number of step,
                and returns whether we should start recording or not.
            video_length: Length of recorded video
        """

        Wrapper.__init__(self, env=env)
        self.record_video_trigger = record_video_trigger
        self.video_recorder = None

        self.directory = os.path.abspath(directory)
        logger.configure(self.directory)
        if not os.path.exists(self.directory):
            os.mkdir(self.directory)

        self.file_prefix = "env"
        self.file_infix = '{}'.format(os.getpid())
        self.step_id = 0
        self.video_length = video_length

        self.recording = False
        self.recorded_frames = 0
Exemplo n.º 2
0
    def __init__(self, env, filename, allow_early_resets=False):
        Wrapper.__init__(self, env=env)
        self.tstart = time.time()
        if filename is None:
            self.f = None
            self.logger = None
        else:
            if not filename.endswith(Monitor.EXT):
                filename = filename + "." + Monitor.EXT
            self.f = open(filename, "wt")
            self.logger = JSONLogger(self.f)
            self.logger.writekvs({
                "t_start":
                self.tstart,
                "gym_version":
                gym.__version__,
                "env_id":
                env.spec.id if env.spec else 'Unknown'
            })

        self.reward_space = env.reward_space
        self.allow_early_resets = allow_early_resets
        self.rewards = None
        self.needs_reset = True
        self.episode_rewards = []
        self.episode_lengths = []
        self.total_steps = 0
        self.current_metadata = {
        }  # extra info that gets injected into each log entry
 def __init__(self, env, n_frames=4):
     Wrapper.__init__(self, env)
     height, width, n_channels = env.observation_space.shape
     obs_shape = [height, width, n_channels * n_frames]
     self.env = env
     self.observation_space = Box(0.0, 1.0, obs_shape)
     self.framebuffer = np.zeros(obs_shape, 'float32')
 def __init__(self, env, filename, reset_keywords=(), info_keywords=()):
     Wrapper.__init__(self, env=env)
     self.tstart = time.time()
     if filename is None:
         self.f = None
         self.logger = None
     else:
         if not filename.endswith(Monitor.EXT):
             if osp.isdir(filename):
                 filename = osp.join(filename, Monitor.EXT)
             else:
                 filename = filename + "." + Monitor.EXT
         self.f = open(filename, "wt")
         self.f.write('#%s\n' %
                      json.dumps({
                          "t_start": self.tstart,
                          'env_id': env.spec and env.spec.id
                      }))
         self.logger = csv.DictWriter(
             self.f,
             fieldnames=('r', 'l', 't', 'latency') + reset_keywords +
             info_keywords)
         self.logger.writeheader()
         self.f.flush()
     self.reset_keywords = reset_keywords
     self.info_keywords = info_keywords
     self.rewards = []
     self.episode_rewards = []
     self.episode_lengths = []
     self.episode_hits = []
     self.max_reward = 0
     self.tfirst_hit = 0
     self.current_reset_info = {
     }  # extra info about the current episode, that was passed in during reset()
     self.tstart_hit = time.time()
Exemplo n.º 5
0
    def __init__(self,
                 env,
                 augment_observations=False,
                 reset_info_frequency='step',
                 min_text_area_width=300,
                 font_color=(255, 0, 0),
                 viewer=None):
        """
        Arguments:
            env: environment to wrap
            augment_observations: if True, human info will be included in observations. Recommended for playing mode.
            reset_info_frequency: if set to 'step', human info will be reset before taking every step
            min_text_area_width: min width of text area in pixels
            font_color: color of printed text
        """
        Wrapper.__init__(self, env=env)
        self.augment_observations = augment_observations
        self.reset_info_frequency = reset_info_frequency
        self.human_info = []
        self.font_color = font_color
        self.viewer = viewer
        self.min_text_area_width = min_text_area_width

        if self.augment_observations:
            ob_space = self.observation_space
            text_area_shape = self._get_text_area_shape(ob_space.shape)
            text_area_shape[1] += ob_space.shape[1]
            ob_space.shape = tuple(text_area_shape)
Exemplo n.º 6
0
 def __init__(self, env):
     Wrapper.__init__(self, env=env)
     self.tstart = time.time()
     self.total_steps = 0
     self.num_agent = self.env.num_agent
     self.rewards = [[] for _ in range(self.num_agent)]
     self.needs_reset = [False for _ in range(self.num_agent)]
Exemplo n.º 7
0
    def __init__(self, env, filename, allow_early_resets=False, cpu=None):
        Wrapper.__init__(self, env=env)
        self.tstart = time.time()
        self.filename = filename
        if filename is None:
            self.f = None
            self.logger = None
        else:
            if not filename.endswith(Monitor.EXT):
                filename_local = filename + "." + Monitor.EXT_LOCAL
                filename = filename + "." + Monitor.EXT
            else:
                filename_local = filename[:-13] + "." + Monitor.EXT_LOCAL
            self.f = open(filename, "wt")
            self.f_local = open(filename_local, "wt")
            self.logger = JSONLogger(self.f, filename=filename)
            self.logger_local = JSONLogger(self.f_local, override=True)
            self.logger.writekvs({"t_start": self.tstart, "gym_version": gym.__version__,
                                  "env_id": env.spec.id if env.spec else 'Unknown'})
            self.logger_local.writekvs({"t_start": self.tstart, "gym_version": gym.__version__,
                                        "env_id": env.spec.id if env.spec else 'Unknown'})
        self.allow_early_resets = allow_early_resets
        self.rewards = None
        self.needs_reset = True
        self.episode_rewards = []
        self.episode_lengths = []
        self.total_steps = 0
        self.current_metadata = {}  # extra info that gets injected into each log entry
        # Useful for metalearning where we're modifying the environment externally
        # But want our logs to know about these modifications

        self.cpu = cpu
Exemplo n.º 8
0
    def __init__(self,
                 env,
                 ob=True,
                 ret=True,
                 *,
                 reuse=False,
                 clipob=10.,
                 cliprew=10.,
                 gamma=0.99,
                 epsilon=1e-8):
        Wrapper.__init__(self, env=env)
        global global_instance
        if reuse:  # We need the same normalization everywhere to use the same model on different environments
            self.ob_rms = global_instance.ob_rms
            self.ret_rms = global_instance.ret_rms
            self.ret = global_instance.ret
        else:
            self.ob_rms = RunningMeanStd(
                shape=self.observation_space.shape) if ob else None
            self.ret_rms = RunningMeanStd(shape=()) if ret else None
            self.ret = np.zeros(1)
            global_instance = self

        self.clipob = clipob
        self.cliprew = cliprew
        self.gamma = gamma
        self.epsilon = epsilon
Exemplo n.º 9
0
 def __init__(self, env):
     Wrapper.__init__(self, env=env)
     self.env = env
     env_oss = env.observation_space.shape[0]
     self.observation_space = spaces.Box(-np.inf, np.inf,
                                         shape=(env_oss+1,),
                                         dtype=np.float32)
Exemplo n.º 10
0
    def __init__(self, env, filename, allow_early_resets=False, reset_keywords=(), report='test'):
        Wrapper.__init__(self, env=env)
        self.tstart = time.time()
        if filename is None:
            self.f = None
            self.logger = None
        else:
            if not filename.endswith(Monitor.EXT):
                if osp.isdir(filename):
                    filename = osp.join(filename, Monitor.EXT)
                else:
                    filename = filename + "." + Monitor.EXT
            self.f = open(filename, "wt")
            self.f.write('#%s\n'%json.dumps({"t_start": self.tstart, "gym_version": gym.__version__,
                "env_id": env.spec.id if env.spec else 'Unknown'}))
            self.logger = csv.DictWriter(self.f, fieldnames=('steps-reward-terminal-info',)+reset_keywords)
            self.logger.writeheader()

        self.reset_keywords = reset_keywords
        self.allow_early_resets = allow_early_resets
        self.rewards = None
        self.needs_reset = True
        self.done = 0
        self.step_info = {}             # info per every step
        self.episode_info = {}          # info at the episode end
        self.total_steps = 0
        self.current_reset_info = {} # extra info about the current episode, that was passed in during reset()
        self.test = False
        self.report = report
        try:
            self.env.report(self.report)
        except AttributeError:
            print("report method is not supported by the environment")
Exemplo n.º 11
0
 def __init__(self, env):
     Wrapper.__init__(self, env)
     self.frame_stack = deque(maxlen=4)
     low = np.tile(env.observation_space.low[..., np.newaxis], 4)
     high = np.tile(env.observation_space.high[..., np.newaxis], 4)
     dtype = env.observation_space.dtype
     self.observation_space = Box(low=low, high=high, dtype=dtype)
Exemplo n.º 12
0
    def __init__(self, env, filename, allow_early_resets=False, reset_keywords=()):
        Wrapper.__init__(self, env=env)
        self.tstart = time.time()
        print('Monitor: filename={}'.format(filename))
        if filename is None:
            self.f = None
            self.logger = None
        else:
            if not filename.endswith(Monitor.EXT):
                if osp.isdir(filename):
                    filename = osp.join(filename, Monitor.EXT)
                else:
                    filename = filename + "." + Monitor.EXT
            self.f = open(filename, "wt")
            self.f.write('#%s\n'%json.dumps({"t_start": self.tstart, 'env_id' : env.spec and env.spec.id}))
            self.logger = csv.DictWriter(self.f, fieldnames=('r', 'l', 't')+reset_keywords)
            self.logger.writeheader()
            self.f.flush()

        self.reset_keywords = reset_keywords
        self.allow_early_resets = allow_early_resets
        self.rewards = None
        self.needs_reset = True
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_times = []
        self.total_steps = 0
        self.current_reset_info = {} # extra info about the current episode, that was passed in during reset()
Exemplo n.º 13
0
    def __init__(self,
                 env,
                 filename,
                 allow_early_resets=False,
                 reset_keywords=(),
                 info_keywords=()):
        Wrapper.__init__(self, env=env)

        self.tstart = time.time()
        if filename:
            self.results_writer = ResultsWriter(filename,
                                                header={
                                                    't_start':
                                                    time.time(),
                                                    'env_id':
                                                    env.spec and env.spec.id
                                                },
                                                extra_keys=reset_keywords +
                                                info_keywords)
        else:
            self.results_writer = None

        self.reset_keywords = reset_keywords
        self.info_keywords = info_keywords
        self.allow_early_resets = allow_early_resets
        self.rewards = None
        self.needs_reset = True
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_times = []
        self.total_steps = 0
        self.current_reset_info = {}
Exemplo n.º 14
0
    def __init__(self,
                 env,
                 prefix="$TMPDIR",
                 *prefixes,
                 file="monitor.metrics.pkl",
                 allow_early_resets=False):
        """ Monitor Wrapper

        This wrapper uses ml-logger to cache environment metrics.

        :param env: gym environment
        :param prefixes: cache prefix, used to direct the stored metrics
            to a specific cache namescope.
        :param allow_early_resets: default False. raise exception
            if manual reset is detected.
        """
        Wrapper.__init__(self, env=env)
        from ml_logger import ML_Logger

        # dump into the temp directory
        self.logger = ML_Logger(prefix, *prefixes)
        self.file = file
        self.allow_early_resets = allow_early_resets
        self.now = self.t0 = time.time()
        self.rewards = []

        self.total_steps = 0
        # Useful for metalearning where we're modifying the environment externally
        # But want our logs to know about these modifications
        self.additional_key_values = {
        }  # extra info that gets injected into each log entry
 def __init__(self, env, filename, initial_log_size=10000):
     Wrapper.__init__(self, env=env)
     self.filename = filename
     self.log_size = initial_log_size
     self.action_log = None
     self.reward_log = LogBuffer(initial_log_size, (), dtype=np.float32)
     self.done_log = LogBuffer(initial_log_size, (), dtype=np.int32)
Exemplo n.º 16
0
    def __init__(self, env, filename, allow_early_resets=False, reset_keywords=()):
        Wrapper.__init__(self, env=env)
        self.tstart = time.time()
        if filename is None:
            self.f = None
            self.logger = None
        else:
            if not filename.endswith(Monitor.EXT):
                if osp.isdir(filename):
                    filename = osp.join(filename, Monitor.EXT)
                else:
                    filename = filename + "." + Monitor.EXT
            self.f = open(filename, "wt")
            self.f.write('#%s\n' % json.dumps({"t_start": self.tstart, 'env_id': env.spec and env.spec.id}))
            self.logger = csv.DictWriter(self.f, fieldnames=('r', 'l', 't') + reset_keywords)
            self.logger.writeheader()

        self.reset_keywords = reset_keywords
        self.allow_early_resets = allow_early_resets
        self.rewards = None
        self.needs_reset = True
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_times = []
        self.total_steps = 0
        self.current_reset_info = {}  # extra info about the current episode, that was passed in during reset()
Exemplo n.º 17
0
 def __init__(self,
              env,
              filename,
              allow_early_resets=False,
              reset_keywords=(),
              info_keywords=()):
     Wrapper.__init__(self, env=env)
     self.tstart = time.time()
     self.results_writer = ResultsWriter(
         filename,
         header={
             "t_start": time.time(),
             'env_id': env.spec and env.spec.id
         },
         extra_keys=reset_keywords + info_keywords)
     self.reset_keywords = reset_keywords
     self.info_keywords = info_keywords
     self.allow_early_resets = allow_early_resets
     self.rewards = None
     self.needs_reset = True
     self.episode_rewards = []
     self.episode_lengths = []
     self.episode_times = []
     self.total_steps = 0
     self.current_reset_info = {
     }  # extra info about the current episode, that was passed in during reset()
Exemplo n.º 18
0
    def __init__(self, env, filename, allow_early_resets=False, reset_keywords=()):
        Wrapper.__init__(self, env=env)
        self.tstart = time.time()
        if filename is None:
            self.f = None
            self.logger = None
        else:
            if not filename.endswith(Monitor.EXT):
                if osp.isdir(filename):
                    filename = osp.join(filename, Monitor.EXT)
                else:
                    filename = filename + "." + Monitor.EXT
            self.f = open(filename, "wt")
            self.f.write('#%s\n'%json.dumps({"t_start": self.tstart, "gym_version": gym.__version__,
                "env_id": env.spec.id if env.spec else 'Unknown'}))
            self.logger = csv.DictWriter(self.f, fieldnames=('r', 'l', 't')+reset_keywords)
            self.logger.writeheader()

        self.reset_keywords = reset_keywords
        self.allow_early_resets = allow_early_resets
        self.rewards = None
        self.needs_reset = True
        self.episode_rewards = []
        self.episode_lengths = []
        self.total_steps = 0
        self.current_reset_info = {} # extra info about the current episode, that was passed in during reset()

        ## Cambria specific
        self.sensor_space = env.sensor_space
Exemplo n.º 19
0
 def __init__(self, env: gym.Env) -> None:
     Wrapper.__init__(self, env=env)
     self.tstart = time.time()
     self.rewards: List[float] = []
     self.episode_rewards: List[float] = []
     self.episode_lengths: List[int] = []
     self.episode_times: List[float] = []
     self.total_steps = 0
Exemplo n.º 20
0
 def __init__(self, env):
     Wrapper.__init__(self, env=env)
     self.rewards = None
     self.needs_reset = True
     self.episode_rewards = []
     self.episode_lengths = []
     self.episode_times = []
     self.total_steps = 0
Exemplo n.º 21
0
    def __init__(self,
                 env,
                 filename,
                 allow_early_resets=False,
                 reset_keywords=(),
                 info_keywords=(),
                 max_episode_steps=None):
        """
        A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data.

        :param env: (Gym environment) The environment
        :param filename: (str) the location to save a log file, can be None for no log
        :param allow_early_resets: (bool) allows the reset of the environment before it is done
        :param reset_keywords: (tuple) extra keywords for the reset call, if extra parameters are needed at reset
        :param info_keywords: (tuple) extra information to log, from the information return of environment.step
        """
        Wrapper.__init__(self, env=env)
        self.t_start = time.time()
        if filename is None:
            self.file_handler = None
            self.logger = None
        else:
            if not filename.endswith(Monitor.EXT):
                if os.path.isdir(filename):
                    filename = os.path.join(filename, Monitor.EXT)
                else:
                    filename = filename + "." + Monitor.EXT
            self.file_handler = open(filename, "wt")
            self.file_handler.write(
                '#%s\n' % json.dumps({
                    "t_start": self.t_start,
                    'env_id': env.spec and env.spec.id
                }))
            self.logger = csv.DictWriter(self.file_handler,
                                         fieldnames=('r', 'l', 't') +
                                         reset_keywords + info_keywords)
            self.logger.writeheader()
            self.file_handler.flush()

        self.reset_keywords = reset_keywords
        self.info_keywords = info_keywords
        self.allow_early_resets = allow_early_resets
        self.rewards = None
        self.needs_reset = True
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_times = []

        self.total_steps = 0
        self.current_reset_info = {
        }  # extra info about the current episode, that was passed in during reset()

        if max_episode_steps is None:
            max_episode_steps = env.spec.max_episode_steps
        self.env.spec.max_episode_steps = max_episode_steps
        self._max_episode_steps = max_episode_steps
        self._elapsed_steps = None
Exemplo n.º 22
0
 def __init__(self, env, prob=(.2, .8), block_dur=200):
     Wrapper.__init__(self, env=env)
     self.env = env
     self.prob = prob
     # keeps track of the repeating prob of the current block
     self.curr_block = tasktools.choice(self.env.rng, [0, 1])
     # duration of block (in number oif trials)
     self.block_dur = block_dur
     self.prev_trial = self.env.trial['ground_truth']
Exemplo n.º 23
0
 def __init__(self, env, filename, initial_log_size=10000):
     Wrapper.__init__(self, env=env)
     self.filename = filename
     self.log_size = initial_log_size
     self.action_log = None
     self.reward_log = LogBuffer(initial_log_size, (), dtype=np.float32)
     self.raw_reward_log = LogBuffer(initial_log_size, (), dtype=np.float32)
     self.done_log = LogBuffer(initial_log_size, (), dtype=np.int32)
     self.reset_counter = 0
     atexit.register(self.save)
Exemplo n.º 24
0
 def __init__(self, env, filepath, exp_name):
     Wrapper.__init__(self, env=env)
     init_data = {'ep_reward': [0], 'ep_timesteps': [0], 'ep_time': [0]}
     self.monitor = pd.DataFrame(init_data)
     self.filepath = Path(filepath)
     self.exp_name = exp_name
     self.rewards = []
     self.episode_rewards = []
     self.episode_lengths = []
     self.episode_times = []
     self.total_steps = 0
Exemplo n.º 25
0
    def __init__(self, env, save_path, summary_interval=1028):
        Wrapper.__init__(self, env=env)
        self.episode_rewards = []
        self.total_nb_steps = 0
        self.rewards = None
        self.save_path = save_path

        self.summary_interval = summary_interval

        self.create_summaries(save_path)

        self.previous_summary_time = time.time()
Exemplo n.º 26
0
    def __init__(self, env, reward_type='sparse'):
        Wrapper.__init__(self, env=env)
        self.env = env
        self.action_space = env.action_space
        # observation
        for key in list(env.observation_space.spaces.keys()):
            if key not in self.observation_keys:
                del env.observation_space.spaces[key]

        self.observation_space = env.observation_space
        self.reward_type = reward_type
        self.env.reward_type = self.reward_type_dict[self.reward_type]
Exemplo n.º 27
0
 def __init__(self, env, k):
     """Stack k last frames.
     Returns lazy array, which is much more memory efficient.
     See Also
     --------
     baselines.common.atari_wrappers.LazyFrames
     """
     Wrapper.__init__(self, env)
     self.k = k
     self.frames = deque([], maxlen=k)
     n_channels, height, width = env.observation_space.shape
     self.observation_space = Box(low=0, high=255, shape=(n_channels * k, height, width))
Exemplo n.º 28
0
 def __init__(self, env, allow_early_resets=False, reset_keywords=(), info_keywords=()):
     Wrapper.__init__(self, env=env)
     self.tstart = time.time()
     self.reset_keywords = reset_keywords
     self.info_keywords = info_keywords
     self.allow_early_resets = allow_early_resets
     self.rewards = None
     self.needs_reset = True
     self.episode_rewards = []
     self.episode_lengths = []
     self.episode_times = []
     self.total_steps = 0
     self.current_reset_info = {} # extra info about the current episode, that was passed in during reset()
Exemplo n.º 29
0
 def __init__(self,
              env,
              inst=0,
              plt_tr=True,
              folder=None,
              inst_to_save=[0]):
     Wrapper.__init__(self, env=env)
     self.env = env
     self.do = inst in inst_to_save
     self.action_space = self.env.action_space
     self.observation_space = self.env.observation_space
     if self.do:
         self.num_tr = 0
         self.inst = inst
         # data to save
         self.choice_mat = []
         self.gt_mat = []
         # for catch trials
         self.catch_tr_mat = []
         # for dual-task
         self.config_mat = []
         # for RDM + trial history
         self.rep_prob_mat = []
         self.stim_mat = []
         self.reward_mat = []
         self.cum_obs = 0
         self.cum_rew = 0
         # for rendering
         self.obs_mat = []
         self.act_mat = []
         self.gt_mat_render = []
         self.rew_mat = []
         self.num_tr_save = 100000
         self.max_num_samples = 200
         self.num_subplots = 3
         self.plt_tr = plt_tr and self.do
         if self.plt_tr:
             import matplotlib
             matplotlib.use('Agg')
             import matplotlib.pyplot as plt
             self.fig, self.ax = plt.subplots(self.num_subplots, 1)
         if folder is not None:
             self.folder = folder + '/'
         else:
             self.folder = "/tmp/"
         if not os.path.exists(self.folder):
             os.mkdir(self.folder)
         # seeding
         self.env.seed()
         self.saving_name = self.folder +\
             self.env.__class__.__name__ + str(self.inst)
Exemplo n.º 30
0
 def __init__(self, env):
     self.env = env
     self.env.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2,))
     self.n = 2 + self.env.observable_sharks * 3 +\
         self.env.observable_fishes * 3 +\
         self.env.observable_walls * 2
     self.env.observation_space = spaces.Box(
         low=-1.0, high=1.0, shape=(self.n,)
     )
     self.env.reward_range = (-float('inf'), float('inf'))
     self.env.spec = None
     self.env.metadata = {'render.modes': ['human']}
     self.env.num_envs = 1
     Wrapper.__init__(self, env=env)
Exemplo n.º 31
0
    def __init__(self, env, t_max=2 * math.pi, t_cycle=50):
        if not isinstance(env.observation_space, Box):
            raise NotImplementedError("Use Box")

        self.t_max = t_max
        self.t_cycle = t_cycle

        low = env.observation_space.low
        low = np.append(low, [0])
        high = env.observation_space.high
        high = np.append(high, [self.t_max])
        env.observation_space = Box(low, high)

        Wrapper.__init__(self, env=env)
Exemplo n.º 32
0
 def __init__(self, env, catch_prob=0.01, stim_th=50):
     Wrapper.__init__(self, env=env)
     self.env = env
     # we get the original task, in case we are composing wrappers
     env_aux = env
     while env_aux.__class__.__module__.find('wrapper') != -1:
         env_aux = env.env
     self.task = env_aux
     self.catch_prob = catch_prob
     if stim_th is not None:
         self.stim_th = np.percentile(self.task.cohs, stim_th)
     else:
         self.stim_th = None
     self.R_CORRECT_ORI = self.task.R_CORRECT
     self.catch_trial = False
Exemplo n.º 33
0
 def __init__(self, env, filename, allow_early_resets=False):
     Wrapper.__init__(self, env=env)
     self.tstart = time.time()
     if filename is None:
         self.f = None
         self.logger = None
     else:
         if not filename.endswith(Monitor.EXT):
             filename = filename + "." + Monitor.EXT
         self.f = open(filename, "wt")
         self.logger = JSONLogger(self.f)
         self.logger.writekvs({"t_start": self.tstart, "gym_version": gym.__version__,
             "env_id": env.spec.id if env.spec else 'Unknown'})
     self.allow_early_resets = allow_early_resets
     self.rewards = None
     self.needs_reset = True
     self.episode_rewards = []
     self.episode_lengths = []
     self.total_steps = 0
     self.current_metadata = {} # extra info that gets injected into each log entry