Пример #1
0
    def _flush(self, force=False):
        """Flush all relevant monitor information to disk."""
        if not self.write_upon_reset and not force:
            return

        self.stats_recorder.flush()

        # Give it a very distiguished name, since we need to pick it
        # up from the filesystem later.
        path = os.path.join(
            self.directory,
            '{}.manifest.{}.manifest.json'.format(self.file_prefix,
                                                  self.file_infix))
        logger.debug('Writing training manifest file to %s', path)
        with atomic_write.atomic_write(path) as f:
            # We need to write relative paths here since people may
            # move the training_dir around. It would be cleaner to
            # already have the basenames rather than basename'ing
            # manually, but this works for now.
            json.dump(
                {
                    'stats':
                    os.path.basename(self.stats_recorder.path),
                    'videos': [(os.path.basename(v), os.path.basename(m))
                               for v, m in self.videos],
                    'env_info':
                    self._env_info(),
                },
                f,
                default=json_encode_np)
Пример #2
0
    def close(self):
        """Make sure to manually close, or else you'll leak the encoder process"""
        if not self.enabled:
            return

        if self.encoder:
            logger.debug('Closing video encoder: path=%s', self.path)
            self.encoder.close()
            self.encoder = None
        else:
            # No frames captured. Set metadata, and remove the empty output file.
            os.remove(self.path)

            if self.metadata is None:
                self.metadata = {}
            self.metadata['empty'] = True

        # If broken, get rid of the output file, otherwise we'd leak it.
        if self.broken:
            logger.info('Cleaning up paths for broken video recorder: path=%s metadata_path=%s', self.path, self.metadata_path)

            # Might have crashed before even starting the output file, don't try to remove in that case.
            if os.path.exists(self.path):
                os.remove(self.path)

            if self.metadata is None:
                self.metadata = {}
            self.metadata['broken'] = True

        self.write_metadata()
Пример #3
0
    def _past_limit(self):
        """Return true if we are past our limit"""
        if self._max_episode_steps is not None and self._max_episode_steps <= self._elapsed_steps:
            logger.debug("Env has passed the step limit defined by TimeLimit.")
            return True

        return False
Пример #4
0
    def compute_all_hulls(self, q_values, states_count):
        """
            Parallel computing of hulls
        """
        logger.debug("[BFTQ] -Compute hulls")
        n_beta = len(self.betas_for_discretisation)
        hull_params = [(q_values[state * n_beta:(state + 1) * n_beta],
                        self.betas_for_discretisation,
                        self.config["hull_options"], self.config["clamp_qc"])
                       for state in range(states_count)]
        if self.config["cpu_processes"] == 1:
            results = [
                compute_convex_hull_from_values(*param)
                for param in hull_params
            ]
        else:
            with Pool(self.config["cpu_processes"]) as p:
                results = p.starmap(compute_convex_hull_from_values,
                                    hull_params)
        hulls, all_points = zip(*results)

        torch.cuda.empty_cache()
        for s in [0, -1]:
            plot_hull(hulls[s],
                      all_points[s],
                      self.writer,
                      self.epoch,
                      title="Hull {} batch {}".format(s, self.batch))
        return hulls
Пример #5
0
    def capture_frame(self, context=None):
        """Render the given `env` and add the resulting frame to the video."""
        import cv2

        if not self.functional: return
        logger.debug('Capturing video frame: path=%s', self.path)

        render_mode = 'ansi' if self.ansi_mode else 'rgb_array'
        print(render_mode)
        frame = self.env.render(mode='human', context=context)

        if context is not None:
            frame = np.ascontiguousarray(frame, dtype=np.uint8)
            cv2.putText(frame, 'Context: {}'.format(context), (300, 300),
                        cv2.FONT_HERSHEY_SIMPLEX, 4, (255, 255, 255), 3,
                        cv2.LINE_AA)

        if frame is None:
            if self._async:
                return
            else:
                # Indicates a bug in the environment: don't want to raise
                # an error here.
                logger.warn(
                    'Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s',
                    self.path, self.metadata_path)
                self.broken = True
        else:
            self.last_frame = frame
            if self.ansi_mode:
                self._encode_ansi_frame(frame)
            else:
                self._encode_image_frame(frame)
Пример #6
0
    def step(self, a):
        self.game.resume()
        if a == ACTION_LEFT:
            self.game.tap_left()
        elif a == ACTION_RIGHT:
            self.game.tap_right()
        self._update_state()
        self.game.pause()
        is_over = self.state.status == GAME_OVER_SCREEN

        if is_over:
            reward = self.death_reward
        else:
            angle = self.state.position['angle']
            cosine = math.cos(angle)

            if self.reward_strategy == 'cosine':
                reward = cosine
            elif self.reward_strategy == 'one':
                reward = 1.0
            elif self.reward_strategy == 'cosine_thresh':
                reward = cosine if cosine > self.score_threshold else cosine * self.stay_alive_reward
            else:
                raise ValueError('Invalid reward strategy: {}'.format(
                    self.reward_strategy))

        logger.debug(
            'HiScore: {}, Score: {}, Action: {}, Reward: {}, GameOver: {}'.
            format(self.state.hiscore, self.state.score, ACTION_NAMES[a],
                   reward, is_over))
        return self._get_obs(), reward, is_over, dict(
            score=self.state.score,
            hiscore=self.state.hiscore,
            position=self.state.position['angle'])
    def capture_frame(self):
        """Render the given `env` and add the resulting frame to the video."""
        if not self.functional: return
        logger.debug('Capturing video frame: path=%s', self.path)

        render_mode = 'ansi' if self.ansi_mode else 'rgb_array'
        frame = self.env.render(mode=render_mode)
        
        im = Image.fromarray(frame).convert('RGBA')
        txt = Image.new('RGBA', im.size, (255,255,255,0))
        fnt = ImageFont.truetype('RL/BipedalWalker/BRLNSR.TTF', 40)
        d = ImageDraw.Draw(txt)
        d.text((10,350), f"Episode {1000 * int(self.base_path[-6:])}", font=fnt, fill=(240,248,255,255))
        frame = Image.alpha_composite(im, txt).convert('RGB')
        frame = np.asarray(frame)
        
        if frame is None:
            if self._async:
                return
            else:
                # Indicates a bug in the environment: don't want to raise
                # an error here.
                logger.warn('Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path)
                self.broken = True
        else:
            self.last_frame = frame
            if self.ansi_mode:
                self._encode_ansi_frame(frame)
            else:
                self._encode_image_frame(frame)
Пример #8
0
    def start(self):
        self.cmdline = (self.backend,
                     '-nostats',
                     '-loglevel', 'error', # suppress warnings
                     '-y',
                     '-r', '%d' % self.frames_per_sec,

                     # input
                     '-f', 'rawvideo',
                     '-s:v', '{}x{}'.format(*self.wh),
                     '-pix_fmt',('rgb32' if self.includes_alpha else 'rgb24'),
                     '-i', '-', # this used to be /dev/stdin, which is not Windows-friendly

                     # output
                     '-vf', 'scale=trunc(iw/2)*2:trunc(ih/2)*2',
                     '-vcodec', 'libx264',
                     '-pix_fmt', 'yuv420p',
                     self.output_path
                     )

        logger.debug('Starting ffmpeg with "%s"', ' '.join(self.cmdline))
        if hasattr(os,'setsid'): #setsid not present on Windows
            self.proc = subprocess.Popen(self.cmdline, stdin=subprocess.PIPE, preexec_fn=os.setsid)
        else:
            self.proc = subprocess.Popen(self.cmdline, stdin=subprocess.PIPE)
Пример #9
0
    def capture_frame(self):
        """Render the given `env` and add the resulting frame to the video."""
        if not self.functional:
            return
        if self._closed:
            logger.warn(
                "The video recorder has been closed and no frames will be captured anymore."
            )
            return
        logger.debug("Capturing video frame: path=%s", self.path)

        render_mode = "ansi" if self.ansi_mode else "rgb_array"
        frame = self.env.render(mode=render_mode)

        if frame is None:
            if self._async:
                return
            else:
                # Indicates a bug in the environment: don't want to raise
                # an error here.
                logger.warn(
                    "Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s",
                    self.path,
                    self.metadata_path,
                )
                self.broken = True
        else:
            self.last_frame = frame
            if self.ansi_mode:
                self._encode_ansi_frame(frame)
            else:
                self._encode_image_frame(frame)
    def capture_frame(self):
        """Render the given `env` and add the resulting frame to the video."""
        if not self.functional: return
        logger.debug('Capturing video frame: path=%s', self.path)

        render_mode = 'ansi' if self.ansi_mode else 'rgb_array'
        frames = self.env.render(mode=render_mode)
        if isinstance(frames, np.ndarray):
            for frame in frames: # multiple frames
                if frame is None:
                    if self._async:
                        return
                    else:
                        # Indicates a bug in the environment: don't want to raise
                        # an error here.
                        logger.warn('Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path)
                        self.broken = True
                else:
                    self.last_frame = frame
                    if self.ansi_mode:
                        self._encode_ansi_frame(frame)
                    else:
                        self._encode_image_frame(frame)
            return frames
        else:
            return np.array([])
Пример #11
0
 def plan(self, state, observation):
     for i in range(self.config['iterations']):
         if (i + 1) % 10 == 0:
             logger.debug('{} / {}'.format(i + 1,
                                           self.config['iterations']))
         self.run(safe_deepcopy_env(state), observation)
     return self.get_plan()
Пример #12
0
    def _fit(self, states_betas, actions, target_r, target_c):
        """
            Fit a network Q(state, action, beta) = (Qr, Qc) to target values
        :param states_betas: batch of states and betas
        :param actions: batch of actions
        :param target_r: batch of target reward-values
        :param target_c: batch of target cost-values
        :return: the Bellman residual delta between the model and target values
        """
        logger.debug("[BFTQ] Fit model")
        # Initial Bellman residual
        with torch.no_grad():
            delta = self._compute_loss(states_betas, actions, target_r,
                                       target_c).detach().item()
            torch.cuda.empty_cache()

        # Reset network
        if self.config["reset_network_each_epoch"]:
            self.reset_network()

        # Gradient descent
        losses = []
        for nn_epoch in range(self.config["regression_epochs"]):
            loss = self._gradient_step(states_betas, actions, target_r,
                                       target_c)
            losses.append(loss)
        torch.cuda.empty_cache()

        return delta
Пример #13
0
    def capture_frame(self):
        """Render the given `env` and add the resulting frame to the video."""
        if not self.functional: return
        logger.debug('Capturing video frame: path=%s', self.path)

        render_mode = 'ansi' if self.ansi_mode else 'rgb_array'
        frame = self.env.render(mode=render_mode)

        # multiagent particle envs returns list here instead of ndarray
        # because of option for multiple agent views
        # take just primary view for video
        if self.particle_env:
            frame = frame[0]

        if frame is None:
            if self._async:
                return
            else:
                # Indicates a bug in the environment: don't want to raise
                # an error here.
                logger.warn(
                    'Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s',
                    self.path, self.metadata_path)
                self.broken = True
        else:
            self.last_frame = frame
            if self.ansi_mode:
                self._encode_ansi_frame(frame)
            else:
                self._encode_image_frame(frame)
Пример #14
0
    def capture_frame(self):
        """Render the given `env` and add the resulting frame to the video."""
        if not self.functional: return
        logger.debug('Capturing video frame: path=%s', self.path)

        render_mode = 'ansi' if self.ansi_mode else 'rgb_array'
        frame = self.env.render(mode=render_mode)
        # self.frames.append(frame)
        # import matplotlib.pyplot as plt
        # plt.imshow(frame)
        # plt.show(block=True)

        if frame is None:
            if self._async:
                return
            else:
                # Indicates a bug in the environment: don't want to raise
                # an error here.
                logger.warn(
                    'Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s',
                    self.path, self.metadata_path)
                self.broken = True
        else:
            self.last_frame = frame
            if self.ansi_mode:
                self._encode_ansi_frame(frame)
            else:
                self._encode_image_frame(frame)
    def start(self):
        self.cmdline = (
            self.backend,
            '-nostats',
            '-loglevel',
            'error',  # suppress warnings
            '-y',
            '-r',
            '%d' % self.frames_per_sec,

            # input
            '-f',
            'rawvideo',
            '-s:v',
            '{}x{}'.format(*self.wh),
            '-pix_fmt',
            ('rgb32' if self.includes_alpha else 'rgb24'),
            '-i',
            '-',  # this used to be /dev/stdin, which is not Windows-friendly

            # output
            '-vcodec',
            'libx264',
            '-pix_fmt',
            'yuv420p',
            self.output_path)

        logger.debug('Starting ffmpeg with "%s"', ' '.join(self.cmdline))
        if hasattr(os, 'setsid'):  #setsid not present on Windows
            self.proc = subprocess.Popen(self.cmdline,
                                         stdin=subprocess.PIPE,
                                         preexec_fn=os.setsid)
        else:
            self.proc = subprocess.Popen(self.cmdline, stdin=subprocess.PIPE)
Пример #16
0
    def close(self):
        """Make sure to manually close, or else you'll leak the encoder process"""
        if not self.enabled:
            return

        if self.encoder:
            logger.debug('Closing video encoder: path=%s', self.path)
            self.encoder.close()
            self.encoder = None
        else:
            # No frames captured. Set metadata, and remove the empty output file.
            os.remove(self.path)

            if self.metadata is None:
                self.metadata = {}
            self.metadata['empty'] = True

        # If broken, get rid of the output file, otherwise we'd leak it.
        if self.broken:
            logger.info(
                'Cleaning up paths for broken video recorder: path=%s metadata_path=%s',
                self.path, self.metadata_path)

            # Might have crashed before even starting the output file, don't try to remove in that case.
            if os.path.exists(self.path):
                os.remove(self.path)

            if self.metadata is None:
                self.metadata = {}
            self.metadata['broken'] = True

        self.write_metadata()
Пример #17
0
    def compute_next_values(self, next_states):
        """
            Compute Q(s, beta) with a single forward pass
        :param next_states: batch of next state
        :return: Q values at next states
        """
        logger.debug("[BFTQ] -Forward pass")
        # Compute the cartesian product sb of all next states s with all budgets b
        ss = next_states.squeeze().repeat((1, len(self.betas_for_discretisation))) \
            .view((len(next_states) * len(self.betas_for_discretisation), self._value_network.size_state))
        bb = torch.from_numpy(
            self.betas_for_discretisation).float().unsqueeze(1).to(
                device=self.device)
        bb = bb.repeat((len(next_states), 1))
        sb = torch.cat((ss, bb), dim=1).unsqueeze(1)

        # To avoid spikes in memory, we actually split the batch in several minibatches
        batch_sizes = near_split(x=len(sb),
                                 num_bins=self.config["split_batches"])
        q_values = []
        for minibatch in range(self.config["split_batches"]):
            mini_batch = sb[sum(batch_sizes[:minibatch]
                                ):sum(batch_sizes[:minibatch + 1])]
            q_values.append(self._value_network(mini_batch))
            torch.cuda.empty_cache()
        return torch.cat(q_values).detach().cpu().numpy()
Пример #18
0
    async def restart(self):
        self.game_id = str(uuid.uuid4())
        self.state_id = 0

        if self.state.status == GAME_SCREEN:
            # commit suicide

            while not self.is_over():
                logger.debug('suiciding')
                await self.tap_left()
                await self.tap_left()
                await self.tap_left()
                await self.get_state()

        if self.is_over():
            await self._wait_until_replay_button_is_active()
            x = self.x + self.width // 2
            y = self.y + self.height - self.height // 7
            await self.page.mouse.click(x, y)
        elif self.state.status == START_SCREEN:
            logger.debug('start screen')
        else:
            raise ValueError('Unknown state: {}'.format(self.state.status))

        await self.start()
Пример #19
0
    def generate_video(self,
                       frames_dict,
                       extra_info_dict,
                       require_text=True,
                       gif_mode=None):
        """Render the given `env` and add the resulting frame to the video."""
        logger.debug('Capturing video frame: path=%s', self.path)

        # assert isinstance(frames_dict, OrderedDict)
        # first_row = next(iter(frames_dict.values()))
        # assert isinstance(first_row, OrderedDict)

        # frames_dict = {VIDEO_NAME: {
        #       'frames': FRAME,
        #       'pos': (ROW, COL)
        #   },
        # ...,
        #       "row_names": [ROW1, ROW2, ..],
        #       "col_names": [COL1, COL2, ..],
        #       "frame_info": {'width':.., "height":.., }
        # }

        if self.generate_gif:
            # self.scale = 1
            name_path_dict = self._generate_gif(frames_dict, extra_info_dict,
                                                gif_mode)
            return name_path_dict
            # return self.base_path

        if not self.initialized:
            info = extra_info_dict['frame_info']
            # tmp_frame = list(frames_dict.values())[0][0]
            self.width = info['width']
            self.height = info['height']
            self._build_frame_range()
            self.initialized = True

        self._build_background(frames_dict)

        self._build_grid_of_frames(frames_dict, extra_info_dict, require_text)
        if self.test_mode:
            return self.background[0]

        now = time.time()
        start = now

        for idx, frame in enumerate(self.background):
            if idx % 100 == 99:
                print(
                    "Current Frames: {}/{} (T +{:.1f}s Total {:.1f}s)".format(
                        idx + 1, len(self.background),
                        time.time() - now,
                        time.time() - start))
                now = time.time()
            self.last_frame = frame
            self._encode_image_frame(frame)

        self._close()
        return self.path
Пример #20
0
    def plan(self, state, observation):
        for self.episode in range(self.config['episodes']):
            if (self.episode + 1) % max(self.config['episodes'] // 10, 1) == 0:
                logger.debug('{} / {}'.format(self.episode + 1,
                                              self.config['episodes']))
            self.run(safe_deepcopy_env(state))

        return self.get_plan()
    def train(self, render=True, full_memory=True):
        if self.run_started:
            logger.WARN('You should not run a single experiment twice!!')
        self.run_started = True
        cum_count = 0
        try:
            for i_episode in tqdm.tqdm(range(self.num_runs)):
                observation = self.env.reset()
                cumulative_reward = 0.
                for t in range(1, self.max_steps_in_run + 1):
                    cum_count += 1
                    if render:
                        self.env.render()
                    if random.random() < self.epsilon:
                        action_index = random.randint(
                            0,
                            len(self.discrete_actions) - 1)
                    else:
                        prediction = self.target_network.predict(
                            np.reshape(observation, (1, self.num_states)))
                        action_index = np.argmax(prediction)

                    action = self.discrete_actions[action_index]
                    self.actions.append(action)
                    prev_observation = observation
                    observation, reward, done, info = self.env.step(action)
                    cumulative_reward += reward
                    self.memory.append(
                        Experience(prev_observation, action_index, reward,
                                   observation, done))
                    if cum_count % self.train_step == 0:
                        try:
                            if full_memory:
                                batch_train = self.memory.all_entries()
                            else:

                                batch_train = self.memory.sample(
                                    self.train_step)
                            self.train_network.train(batch_train,
                                                     self.target_network)

                        except ValueError:
                            # Not enough samples in memory yet. Just wait
                            continue

                    if cum_count % self.copy_step:
                        self.target_network.copy_weights(self.train_network)

                    if done:
                        logger.debug(
                            f"Episode {i_episode} finished after {t + 1} timesteps"
                        )
                        break

                self.rewards_train.append(cumulative_reward)
        except KeyboardInterrupt:
            pass
        self.env.close()
Пример #22
0
 def key_up(self, key: str) -> None:
     """Simulates a key up action on the keyboard for a given key.
     See https://cgit.freedesktop.org/xorg/proto/x11proto/plain/keysymdef.h.
     """
     logger.debug(f"[KBD] Up {key}")
     keycode = self._translate(key)
     if keycode in self._pressed_keys:
         self._pressed_keys.remove(keycode)
         subprocess.call(['xdotool', 'keyup', keycode])
Пример #23
0
 def update_design_success(self,goal,stage,metric):
     """Check wheather design goal was achieved."""
     
     if "success_threshold" in  self.environment_config['goals'][goal]:
         if metric >= self.environment_config['goals'][goal]['success_threshold']:
             self.design_success.update({goal:True})
             logger.debug(f'{self.name}:Design was successful for {goal} in {stage} with value {metric:.3f} at step:{self.steps}.')
         else:
             self.design_success.update({goal:False})        
Пример #24
0
    def load_results(training_dir):
        if not os.path.exists(training_dir):
            logger.error('Training directory %s not found', training_dir)
            return

        manifests = detect_training_manifests(training_dir)
        if not manifests:
            logger.error('No manifests found in training directory %s',
                         training_dir)
            return

        logger.debug('Uploading data from manifest %s', ', '.join(manifests))

        # Load up stats + video files
        stats_files = []
        videos = []
        env_infos = []

        for manifest in manifests:
            with open(manifest) as f:
                contents = json.load(f)
                # Make these paths absolute again
                stats_files.append(
                    os.path.join(training_dir, contents['stats']))
                videos += [(os.path.join(training_dir,
                                         v), os.path.join(training_dir, m))
                           for v, m in contents['videos']]
                env_infos.append(contents['env_info'])

        env_info = collapse_env_infos(env_infos, training_dir)

        # If only one stats file is present, there is no need to merge and all fields are included
        if len(stats_files) == 1:
            with open(stats_files[0]) as f:
                content = json.load(f)
                content.update({
                    'manifests': manifests,
                    'env_info': env_info,
                    'videos': videos
                })
                return content
        else:
            data_sources, initial_reset_timestamps, timestamps, episode_lengths, episode_rewards, \
                episode_types, initial_reset_timestamp = merge_stats_files(stats_files)

            return {
                'manifests': manifests,
                'env_info': env_info,
                'data_sources': data_sources,
                'timestamps': timestamps,
                'episode_lengths': episode_lengths,
                'episode_rewards': episode_rewards,
                'episode_types': episode_types,
                'initial_reset_timestamps': initial_reset_timestamps,
                'initial_reset_timestamp': initial_reset_timestamp,
                'videos': videos,
            }
Пример #25
0
    def _past_limit(self):
        """Return true if we are past our limit"""
        if self._max_episode_steps is not None and self._max_episode_steps <= self._elapsed_steps:
            logger.debug("Env has passed the step limit defined by TimeLimit.")
            return True

        if self._max_episode_seconds is not None and self._max_episode_seconds <= self._elapsed_seconds:
            logger.debug("Env has passed the seconds limit defined by TimeLimit.")
            return True

        return False
Пример #26
0
 def check_design_convergence(self):
     """Check if solvent extraction simulation is feasible."""
     
     if not all(self.sx_design.status.values()):
         failed_modules = [stage for stage,converged in self.sx_design.status.items() if not converged]
         logger.error(f'{self.name}:Equilibrium failed at step:{self.steps} due to non-convergence in following modules:{failed_modules} - Terminating environment!')
         self.convergence_failure = True    
     else:
         converged_modules = [stage for stage,converged in self.sx_design.status.items() if converged]
         assert len(converged_modules) == len(self.sx_design.status), 'All modules should converge'
         
         logger.debug(f'{self.name}:Equilibrium succeeded at step:{self.steps} for all modules:{converged_modules}')           
Пример #27
0
def update_rollout_dict(spec, rollout_dict):
    """
    Takes as input the environment spec for which the rollout is to be generated,
    and the existing dictionary of rollouts. Returns True iff the dictionary was
    modified.
    """
    # Skip platform-dependent
    if should_skip_env_spec_for_tests(spec):
        logger.info("Skipping tests for {}".format(spec.id))
        return False

    # Skip environments that are nondeterministic
    if spec.nondeterministic:
        logger.info("Skipping tests for nondeterministic env {}".format(
            spec.id))
        return False

    logger.info("Generating rollout for {}".format(spec.id))

    try:
        (
            observations_hash,
            actions_hash,
            rewards_hash,
            dones_hash,
        ) = generate_rollout_hash(spec)
    except:
        # If running the env generates an exception, don't write to the rollout file
        logger.warn(
            "Exception {} thrown while generating rollout for {}. Rollout not added."
            .format(sys.exc_info()[0], spec.id))
        return False

    rollout = {}
    rollout["observations"] = observations_hash
    rollout["actions"] = actions_hash
    rollout["rewards"] = rewards_hash
    rollout["dones"] = dones_hash

    existing = rollout_dict.get(spec.id)
    if existing:
        differs = False
        for key, new_hash in rollout.items():
            differs = differs or existing[key] != new_hash
        if not differs:
            logger.debug("Hashes match with existing for {}".format(spec.id))
            return False
        else:
            logger.warn("Got new hash for {}. Overwriting.".format(spec.id))

    rollout_dict[spec.id] = rollout
    return True
Пример #28
0
    def _calculate_brokerage_fee(self, amount):
        fee = 0
        for transaction_fee in self.transaction_fee:
            if amount <= transaction_fee.amount:
                if transaction_fee.is_percentage:
                    fee = amount * transaction_fee.fee / 100.0
                else:
                    fee = transaction_fee.fee
                break

        fee = round(fee, 2)
        logger.debug(f'Brokerage Fee:{fee} for amount:{round(amount, 2)})')
        return fee
Пример #29
0
 def compute_all_optimal_mixtures(self, hulls, betas):
     """
         Parallel computing of optimal mixtures
     """
     logger.debug("[BFTQ] -Compute optimal mixtures")
     params = [(hulls[i], beta.detach().item())
               for i, beta in enumerate(betas)]
     if self.config["cpu_processes"] == 1:
         optimal_policies = [optimal_mixture(*param) for param in params]
     else:
         with Pool(self.config["cpu_processes"]) as p:
             optimal_policies = p.starmap(optimal_mixture, params)
     return optimal_policies
Пример #30
0
def load_results(training_dir):
    if not os.path.exists(training_dir):
        logger.error("Training directory %s not found", training_dir)
        return

    manifests = detect_training_manifests(training_dir)
    if not manifests:
        logger.error("No manifests found in training directory %s",
                     training_dir)
        return

    logger.debug("Uploading data from manifest %s", ", ".join(manifests))

    # Load up stats + video files
    stats_files = []
    videos = []
    env_infos = []

    for manifest in manifests:
        with open(manifest) as f:
            contents = json.load(f)
            # Make these paths absolute again
            stats_files.append(os.path.join(training_dir, contents["stats"]))
            videos += [(os.path.join(training_dir,
                                     v), os.path.join(training_dir, m))
                       for v, m in contents["videos"]]
            env_infos.append(contents["env_info"])

    env_info = collapse_env_infos(env_infos, training_dir)
    (
        data_sources,
        initial_reset_timestamps,
        timestamps,
        episode_lengths,
        episode_rewards,
        episode_types,
        initial_reset_timestamp,
    ) = merge_stats_files(stats_files)

    return {
        "manifests": manifests,
        "env_info": env_info,
        "data_sources": data_sources,
        "timestamps": timestamps,
        "episode_lengths": episode_lengths,
        "episode_rewards": episode_rewards,
        "episode_types": episode_types,
        "initial_reset_timestamps": initial_reset_timestamps,
        "initial_reset_timestamp": initial_reset_timestamp,
        "videos": videos,
    }
Пример #31
0
 def step(self, action): #return observation, reward, done, info
     
     if not self.done: #Only perform step if episode has not ended
         self.steps += 1
         if self.steps ==1: #Logic for first step in an episode
             self.episode_start_logic()
             
         logger.debug(f'{self.name}:Taking action {action} at step:{self.steps}')
         
         prev_state = self.sx_design.x.copy()   #save previous state
         if self.environment_config['discrete_actions']:
            self.action_stats.update({action:self.action_stats[action]+1})                        
            if self.action_dict[action]: #Check if discrete action exists                  
               self.perform_discrete_action(action) #map action to variable and action type
               self.run_simulation()
            else:
               logger.info(f'{self.name}:No action found in:{self.action_dict[action]}')
         else:
            self.perform_continuous_action(action) #map continuous action to variable type                 
            self.run_simulation()            
             
         if not self.convergence_failure: #Calculate reward if there is no convergence failure
             reward = self.get_reward()                    
         else: 
             self.sx_design.x = prev_state #Replace with previous state if there is convergence failure
             self.done   = True
             reward = self.reward_config['min'] #-100 #Assign minimum reward if convergence failure
         
         logger.info(f'{self.name}:Completed action {action} at step {self.steps} and got reward {reward:.3f}.')
         if self.steps >= self.max_episode_steps: #Check if max episode steps reached
                 self.done = True
                 logger.warn(f'{self.name}:Maximum episode steps exceeded after {self.steps} steps - Ending episode!')                
         if all(self.design_success.values()) and not self.convergence_failure: #Check if design was successful
                 self.done = True
                 reward = self.reward_config['max']
                 logger.warn(f'{self.name}:Design successful with recovery:{self.metric_dict["recovery"]}, purity:{self.metric_dict["purity"]},Reward:{reward} after {self.steps} steps - Ending episode!')
         if self.done:
             self.episode_end_logic()
     
     else:
         if self.convergence_failure:
             print(f'Episode completed after {self.steps} steps due to Convergence failure - Reset environment to start new simulation!')
         elif self.steps >= self.max_episode_steps:
             print(f'Episode completed after {self.steps} steps since max steps were exceeded - Reset environment to start new simulation!')
         elif all(self.design_success.values()):
             print(f'Episode completed after {self.steps} steps since design goals was met - Reset environment to start new simulation!')            
         else:
             print(f'Episode completed after {self.steps} steps due to unknown reason - Reset environment to start new simulation!')
     
     return np.array(self.sx_design.x+self.sx_design.ree_mass), reward, self.done, {}
Пример #32
0
    def step(self, a):
        self.game.resume()
        if a == ACTION_FLAP:
            self.game.tap()
        self._update_state()
        self.game.pause()

        is_over = self.state.status == GAME_OVER_SCREEN
        reward = self.compute_reward(is_over)
        logger.debug(
            'HiScore: {}, Score: {}, Action: {}, Reward: {}, GameOver: {}'.
            format(self.state.hiscore, self.state.score, ACTION_NAMES[a],
                   reward, is_over))
        return self._get_obs(), reward, is_over, dict(score=self.state.score)
Пример #33
0
    def move(self):
        # check for collision with the right side of the game screen
        if self.x + self.radius + self.speedx >= self.args.env_width:
            logger.debug('ball collide with right side of screen')
            self.speedx = -self.speed_magnitude

        # check for collision with the left hand side of the game screen
        elif self.x + self.speedx <= 0:
            logger.debug('ball collide with left side of screen')
            self.speedx = self.speed_magnitude

        # check for collision with the bottom of the game screen
        if self.y + self.radius + self.speedy >= self.args.env_height:
            logger.debug('ball collide with bottom of screen')
            self.speedy = -self.speed_magnitude
            return True

        # check for collision with the top of the game screen
        elif self.y + self.radius + self.speedy <= 0:
            logger.debug('ball collide with top of screen')
            self.speedy = self.speed_magnitude

        # update the ball position
        self.x += self.speedx
        self.y += self.speedy
        return False
Пример #34
0
def load_results(training_dir):
    if not os.path.exists(training_dir):
        logger.error('Training directory %s not found', training_dir)
        return

    manifests = detect_training_manifests(training_dir)
    if not manifests:
        logger.error('No manifests found in training directory %s', training_dir)
        return

    logger.debug('Uploading data from manifest %s', ', '.join(manifests))

    # Load up stats + video files
    stats_files = []
    videos = []
    env_infos = []

    for manifest in manifests:
        with open(manifest) as f:
            contents = json.load(f)
            # Make these paths absolute again
            stats_files.append(os.path.join(training_dir, contents['stats']))
            videos += [(os.path.join(training_dir, v), os.path.join(training_dir, m))
                       for v, m in contents['videos']]
            env_infos.append(contents['env_info'])

    env_info = collapse_env_infos(env_infos, training_dir)
    data_sources, initial_reset_timestamps, timestamps, episode_lengths, episode_rewards, episode_types, initial_reset_timestamp = merge_stats_files(stats_files)

    return {
        'manifests': manifests,
        'env_info': env_info,
        'data_sources': data_sources,
        'timestamps': timestamps,
        'episode_lengths': episode_lengths,
        'episode_rewards': episode_rewards,
        'episode_types': episode_types,
        'initial_reset_timestamps': initial_reset_timestamps,
        'initial_reset_timestamp': initial_reset_timestamp,
        'videos': videos,
    }
Пример #35
0
    def capture_frame(self):
        """Render the given `env` and add the resulting frame to the video."""
        if not self.functional: return
        logger.debug('Capturing video frame: path=%s', self.path)

        render_mode = 'ansi' if self.ansi_mode else 'rgb_array'
        frame = self.env.render(mode=render_mode)

        if frame is None:
            if self._async:
                return
            else:
                # Indicates a bug in the environment: don't want to raise
                # an error here.
                logger.warn('Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path)
                self.broken = True
        else:
            self.last_frame = frame
            if self.ansi_mode:
                self._encode_ansi_frame(frame)
            else:
                self._encode_image_frame(frame)
Пример #36
0
    def _flush(self, force=False):
        """Flush all relevant monitor information to disk."""
        if not self.write_upon_reset and not force:
            return

        self.stats_recorder.flush()

        # Give it a very distiguished name, since we need to pick it
        # up from the filesystem later.
        path = os.path.join(self.directory, '{}.manifest.{}.manifest.json'.format(self.file_prefix, self.file_infix))
        logger.debug('Writing training manifest file to %s', path)
        with atomic_write.atomic_write(path) as f:
            # We need to write relative paths here since people may
            # move the training_dir around. It would be cleaner to
            # already have the basenames rather than basename'ing
            # manually, but this works for now.
            json.dump({
                'stats': os.path.basename(self.stats_recorder.path),
                'videos': [(os.path.basename(v), os.path.basename(m))
                           for v, m in self.videos],
                'env_info': self._env_info(),
            }, f, default=json_encode_np)