def _flush(self, force=False): """Flush all relevant monitor information to disk.""" if not self.write_upon_reset and not force: return self.stats_recorder.flush() # Give it a very distiguished name, since we need to pick it # up from the filesystem later. path = os.path.join( self.directory, '{}.manifest.{}.manifest.json'.format(self.file_prefix, self.file_infix)) logger.debug('Writing training manifest file to %s', path) with atomic_write.atomic_write(path) as f: # We need to write relative paths here since people may # move the training_dir around. It would be cleaner to # already have the basenames rather than basename'ing # manually, but this works for now. json.dump( { 'stats': os.path.basename(self.stats_recorder.path), 'videos': [(os.path.basename(v), os.path.basename(m)) for v, m in self.videos], 'env_info': self._env_info(), }, f, default=json_encode_np)
def close(self): """Make sure to manually close, or else you'll leak the encoder process""" if not self.enabled: return if self.encoder: logger.debug('Closing video encoder: path=%s', self.path) self.encoder.close() self.encoder = None else: # No frames captured. Set metadata, and remove the empty output file. os.remove(self.path) if self.metadata is None: self.metadata = {} self.metadata['empty'] = True # If broken, get rid of the output file, otherwise we'd leak it. if self.broken: logger.info('Cleaning up paths for broken video recorder: path=%s metadata_path=%s', self.path, self.metadata_path) # Might have crashed before even starting the output file, don't try to remove in that case. if os.path.exists(self.path): os.remove(self.path) if self.metadata is None: self.metadata = {} self.metadata['broken'] = True self.write_metadata()
def _past_limit(self): """Return true if we are past our limit""" if self._max_episode_steps is not None and self._max_episode_steps <= self._elapsed_steps: logger.debug("Env has passed the step limit defined by TimeLimit.") return True return False
def compute_all_hulls(self, q_values, states_count): """ Parallel computing of hulls """ logger.debug("[BFTQ] -Compute hulls") n_beta = len(self.betas_for_discretisation) hull_params = [(q_values[state * n_beta:(state + 1) * n_beta], self.betas_for_discretisation, self.config["hull_options"], self.config["clamp_qc"]) for state in range(states_count)] if self.config["cpu_processes"] == 1: results = [ compute_convex_hull_from_values(*param) for param in hull_params ] else: with Pool(self.config["cpu_processes"]) as p: results = p.starmap(compute_convex_hull_from_values, hull_params) hulls, all_points = zip(*results) torch.cuda.empty_cache() for s in [0, -1]: plot_hull(hulls[s], all_points[s], self.writer, self.epoch, title="Hull {} batch {}".format(s, self.batch)) return hulls
def capture_frame(self, context=None): """Render the given `env` and add the resulting frame to the video.""" import cv2 if not self.functional: return logger.debug('Capturing video frame: path=%s', self.path) render_mode = 'ansi' if self.ansi_mode else 'rgb_array' print(render_mode) frame = self.env.render(mode='human', context=context) if context is not None: frame = np.ascontiguousarray(frame, dtype=np.uint8) cv2.putText(frame, 'Context: {}'.format(context), (300, 300), cv2.FONT_HERSHEY_SIMPLEX, 4, (255, 255, 255), 3, cv2.LINE_AA) if frame is None: if self._async: return else: # Indicates a bug in the environment: don't want to raise # an error here. logger.warn( 'Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path) self.broken = True else: self.last_frame = frame if self.ansi_mode: self._encode_ansi_frame(frame) else: self._encode_image_frame(frame)
def step(self, a): self.game.resume() if a == ACTION_LEFT: self.game.tap_left() elif a == ACTION_RIGHT: self.game.tap_right() self._update_state() self.game.pause() is_over = self.state.status == GAME_OVER_SCREEN if is_over: reward = self.death_reward else: angle = self.state.position['angle'] cosine = math.cos(angle) if self.reward_strategy == 'cosine': reward = cosine elif self.reward_strategy == 'one': reward = 1.0 elif self.reward_strategy == 'cosine_thresh': reward = cosine if cosine > self.score_threshold else cosine * self.stay_alive_reward else: raise ValueError('Invalid reward strategy: {}'.format( self.reward_strategy)) logger.debug( 'HiScore: {}, Score: {}, Action: {}, Reward: {}, GameOver: {}'. format(self.state.hiscore, self.state.score, ACTION_NAMES[a], reward, is_over)) return self._get_obs(), reward, is_over, dict( score=self.state.score, hiscore=self.state.hiscore, position=self.state.position['angle'])
def capture_frame(self): """Render the given `env` and add the resulting frame to the video.""" if not self.functional: return logger.debug('Capturing video frame: path=%s', self.path) render_mode = 'ansi' if self.ansi_mode else 'rgb_array' frame = self.env.render(mode=render_mode) im = Image.fromarray(frame).convert('RGBA') txt = Image.new('RGBA', im.size, (255,255,255,0)) fnt = ImageFont.truetype('RL/BipedalWalker/BRLNSR.TTF', 40) d = ImageDraw.Draw(txt) d.text((10,350), f"Episode {1000 * int(self.base_path[-6:])}", font=fnt, fill=(240,248,255,255)) frame = Image.alpha_composite(im, txt).convert('RGB') frame = np.asarray(frame) if frame is None: if self._async: return else: # Indicates a bug in the environment: don't want to raise # an error here. logger.warn('Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path) self.broken = True else: self.last_frame = frame if self.ansi_mode: self._encode_ansi_frame(frame) else: self._encode_image_frame(frame)
def start(self): self.cmdline = (self.backend, '-nostats', '-loglevel', 'error', # suppress warnings '-y', '-r', '%d' % self.frames_per_sec, # input '-f', 'rawvideo', '-s:v', '{}x{}'.format(*self.wh), '-pix_fmt',('rgb32' if self.includes_alpha else 'rgb24'), '-i', '-', # this used to be /dev/stdin, which is not Windows-friendly # output '-vf', 'scale=trunc(iw/2)*2:trunc(ih/2)*2', '-vcodec', 'libx264', '-pix_fmt', 'yuv420p', self.output_path ) logger.debug('Starting ffmpeg with "%s"', ' '.join(self.cmdline)) if hasattr(os,'setsid'): #setsid not present on Windows self.proc = subprocess.Popen(self.cmdline, stdin=subprocess.PIPE, preexec_fn=os.setsid) else: self.proc = subprocess.Popen(self.cmdline, stdin=subprocess.PIPE)
def capture_frame(self): """Render the given `env` and add the resulting frame to the video.""" if not self.functional: return if self._closed: logger.warn( "The video recorder has been closed and no frames will be captured anymore." ) return logger.debug("Capturing video frame: path=%s", self.path) render_mode = "ansi" if self.ansi_mode else "rgb_array" frame = self.env.render(mode=render_mode) if frame is None: if self._async: return else: # Indicates a bug in the environment: don't want to raise # an error here. logger.warn( "Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s", self.path, self.metadata_path, ) self.broken = True else: self.last_frame = frame if self.ansi_mode: self._encode_ansi_frame(frame) else: self._encode_image_frame(frame)
def capture_frame(self): """Render the given `env` and add the resulting frame to the video.""" if not self.functional: return logger.debug('Capturing video frame: path=%s', self.path) render_mode = 'ansi' if self.ansi_mode else 'rgb_array' frames = self.env.render(mode=render_mode) if isinstance(frames, np.ndarray): for frame in frames: # multiple frames if frame is None: if self._async: return else: # Indicates a bug in the environment: don't want to raise # an error here. logger.warn('Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path) self.broken = True else: self.last_frame = frame if self.ansi_mode: self._encode_ansi_frame(frame) else: self._encode_image_frame(frame) return frames else: return np.array([])
def plan(self, state, observation): for i in range(self.config['iterations']): if (i + 1) % 10 == 0: logger.debug('{} / {}'.format(i + 1, self.config['iterations'])) self.run(safe_deepcopy_env(state), observation) return self.get_plan()
def _fit(self, states_betas, actions, target_r, target_c): """ Fit a network Q(state, action, beta) = (Qr, Qc) to target values :param states_betas: batch of states and betas :param actions: batch of actions :param target_r: batch of target reward-values :param target_c: batch of target cost-values :return: the Bellman residual delta between the model and target values """ logger.debug("[BFTQ] Fit model") # Initial Bellman residual with torch.no_grad(): delta = self._compute_loss(states_betas, actions, target_r, target_c).detach().item() torch.cuda.empty_cache() # Reset network if self.config["reset_network_each_epoch"]: self.reset_network() # Gradient descent losses = [] for nn_epoch in range(self.config["regression_epochs"]): loss = self._gradient_step(states_betas, actions, target_r, target_c) losses.append(loss) torch.cuda.empty_cache() return delta
def capture_frame(self): """Render the given `env` and add the resulting frame to the video.""" if not self.functional: return logger.debug('Capturing video frame: path=%s', self.path) render_mode = 'ansi' if self.ansi_mode else 'rgb_array' frame = self.env.render(mode=render_mode) # multiagent particle envs returns list here instead of ndarray # because of option for multiple agent views # take just primary view for video if self.particle_env: frame = frame[0] if frame is None: if self._async: return else: # Indicates a bug in the environment: don't want to raise # an error here. logger.warn( 'Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path) self.broken = True else: self.last_frame = frame if self.ansi_mode: self._encode_ansi_frame(frame) else: self._encode_image_frame(frame)
def capture_frame(self): """Render the given `env` and add the resulting frame to the video.""" if not self.functional: return logger.debug('Capturing video frame: path=%s', self.path) render_mode = 'ansi' if self.ansi_mode else 'rgb_array' frame = self.env.render(mode=render_mode) # self.frames.append(frame) # import matplotlib.pyplot as plt # plt.imshow(frame) # plt.show(block=True) if frame is None: if self._async: return else: # Indicates a bug in the environment: don't want to raise # an error here. logger.warn( 'Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path) self.broken = True else: self.last_frame = frame if self.ansi_mode: self._encode_ansi_frame(frame) else: self._encode_image_frame(frame)
def start(self): self.cmdline = ( self.backend, '-nostats', '-loglevel', 'error', # suppress warnings '-y', '-r', '%d' % self.frames_per_sec, # input '-f', 'rawvideo', '-s:v', '{}x{}'.format(*self.wh), '-pix_fmt', ('rgb32' if self.includes_alpha else 'rgb24'), '-i', '-', # this used to be /dev/stdin, which is not Windows-friendly # output '-vcodec', 'libx264', '-pix_fmt', 'yuv420p', self.output_path) logger.debug('Starting ffmpeg with "%s"', ' '.join(self.cmdline)) if hasattr(os, 'setsid'): #setsid not present on Windows self.proc = subprocess.Popen(self.cmdline, stdin=subprocess.PIPE, preexec_fn=os.setsid) else: self.proc = subprocess.Popen(self.cmdline, stdin=subprocess.PIPE)
def close(self): """Make sure to manually close, or else you'll leak the encoder process""" if not self.enabled: return if self.encoder: logger.debug('Closing video encoder: path=%s', self.path) self.encoder.close() self.encoder = None else: # No frames captured. Set metadata, and remove the empty output file. os.remove(self.path) if self.metadata is None: self.metadata = {} self.metadata['empty'] = True # If broken, get rid of the output file, otherwise we'd leak it. if self.broken: logger.info( 'Cleaning up paths for broken video recorder: path=%s metadata_path=%s', self.path, self.metadata_path) # Might have crashed before even starting the output file, don't try to remove in that case. if os.path.exists(self.path): os.remove(self.path) if self.metadata is None: self.metadata = {} self.metadata['broken'] = True self.write_metadata()
def compute_next_values(self, next_states): """ Compute Q(s, beta) with a single forward pass :param next_states: batch of next state :return: Q values at next states """ logger.debug("[BFTQ] -Forward pass") # Compute the cartesian product sb of all next states s with all budgets b ss = next_states.squeeze().repeat((1, len(self.betas_for_discretisation))) \ .view((len(next_states) * len(self.betas_for_discretisation), self._value_network.size_state)) bb = torch.from_numpy( self.betas_for_discretisation).float().unsqueeze(1).to( device=self.device) bb = bb.repeat((len(next_states), 1)) sb = torch.cat((ss, bb), dim=1).unsqueeze(1) # To avoid spikes in memory, we actually split the batch in several minibatches batch_sizes = near_split(x=len(sb), num_bins=self.config["split_batches"]) q_values = [] for minibatch in range(self.config["split_batches"]): mini_batch = sb[sum(batch_sizes[:minibatch] ):sum(batch_sizes[:minibatch + 1])] q_values.append(self._value_network(mini_batch)) torch.cuda.empty_cache() return torch.cat(q_values).detach().cpu().numpy()
async def restart(self): self.game_id = str(uuid.uuid4()) self.state_id = 0 if self.state.status == GAME_SCREEN: # commit suicide while not self.is_over(): logger.debug('suiciding') await self.tap_left() await self.tap_left() await self.tap_left() await self.get_state() if self.is_over(): await self._wait_until_replay_button_is_active() x = self.x + self.width // 2 y = self.y + self.height - self.height // 7 await self.page.mouse.click(x, y) elif self.state.status == START_SCREEN: logger.debug('start screen') else: raise ValueError('Unknown state: {}'.format(self.state.status)) await self.start()
def generate_video(self, frames_dict, extra_info_dict, require_text=True, gif_mode=None): """Render the given `env` and add the resulting frame to the video.""" logger.debug('Capturing video frame: path=%s', self.path) # assert isinstance(frames_dict, OrderedDict) # first_row = next(iter(frames_dict.values())) # assert isinstance(first_row, OrderedDict) # frames_dict = {VIDEO_NAME: { # 'frames': FRAME, # 'pos': (ROW, COL) # }, # ..., # "row_names": [ROW1, ROW2, ..], # "col_names": [COL1, COL2, ..], # "frame_info": {'width':.., "height":.., } # } if self.generate_gif: # self.scale = 1 name_path_dict = self._generate_gif(frames_dict, extra_info_dict, gif_mode) return name_path_dict # return self.base_path if not self.initialized: info = extra_info_dict['frame_info'] # tmp_frame = list(frames_dict.values())[0][0] self.width = info['width'] self.height = info['height'] self._build_frame_range() self.initialized = True self._build_background(frames_dict) self._build_grid_of_frames(frames_dict, extra_info_dict, require_text) if self.test_mode: return self.background[0] now = time.time() start = now for idx, frame in enumerate(self.background): if idx % 100 == 99: print( "Current Frames: {}/{} (T +{:.1f}s Total {:.1f}s)".format( idx + 1, len(self.background), time.time() - now, time.time() - start)) now = time.time() self.last_frame = frame self._encode_image_frame(frame) self._close() return self.path
def plan(self, state, observation): for self.episode in range(self.config['episodes']): if (self.episode + 1) % max(self.config['episodes'] // 10, 1) == 0: logger.debug('{} / {}'.format(self.episode + 1, self.config['episodes'])) self.run(safe_deepcopy_env(state)) return self.get_plan()
def train(self, render=True, full_memory=True): if self.run_started: logger.WARN('You should not run a single experiment twice!!') self.run_started = True cum_count = 0 try: for i_episode in tqdm.tqdm(range(self.num_runs)): observation = self.env.reset() cumulative_reward = 0. for t in range(1, self.max_steps_in_run + 1): cum_count += 1 if render: self.env.render() if random.random() < self.epsilon: action_index = random.randint( 0, len(self.discrete_actions) - 1) else: prediction = self.target_network.predict( np.reshape(observation, (1, self.num_states))) action_index = np.argmax(prediction) action = self.discrete_actions[action_index] self.actions.append(action) prev_observation = observation observation, reward, done, info = self.env.step(action) cumulative_reward += reward self.memory.append( Experience(prev_observation, action_index, reward, observation, done)) if cum_count % self.train_step == 0: try: if full_memory: batch_train = self.memory.all_entries() else: batch_train = self.memory.sample( self.train_step) self.train_network.train(batch_train, self.target_network) except ValueError: # Not enough samples in memory yet. Just wait continue if cum_count % self.copy_step: self.target_network.copy_weights(self.train_network) if done: logger.debug( f"Episode {i_episode} finished after {t + 1} timesteps" ) break self.rewards_train.append(cumulative_reward) except KeyboardInterrupt: pass self.env.close()
def key_up(self, key: str) -> None: """Simulates a key up action on the keyboard for a given key. See https://cgit.freedesktop.org/xorg/proto/x11proto/plain/keysymdef.h. """ logger.debug(f"[KBD] Up {key}") keycode = self._translate(key) if keycode in self._pressed_keys: self._pressed_keys.remove(keycode) subprocess.call(['xdotool', 'keyup', keycode])
def update_design_success(self,goal,stage,metric): """Check wheather design goal was achieved.""" if "success_threshold" in self.environment_config['goals'][goal]: if metric >= self.environment_config['goals'][goal]['success_threshold']: self.design_success.update({goal:True}) logger.debug(f'{self.name}:Design was successful for {goal} in {stage} with value {metric:.3f} at step:{self.steps}.') else: self.design_success.update({goal:False})
def load_results(training_dir): if not os.path.exists(training_dir): logger.error('Training directory %s not found', training_dir) return manifests = detect_training_manifests(training_dir) if not manifests: logger.error('No manifests found in training directory %s', training_dir) return logger.debug('Uploading data from manifest %s', ', '.join(manifests)) # Load up stats + video files stats_files = [] videos = [] env_infos = [] for manifest in manifests: with open(manifest) as f: contents = json.load(f) # Make these paths absolute again stats_files.append( os.path.join(training_dir, contents['stats'])) videos += [(os.path.join(training_dir, v), os.path.join(training_dir, m)) for v, m in contents['videos']] env_infos.append(contents['env_info']) env_info = collapse_env_infos(env_infos, training_dir) # If only one stats file is present, there is no need to merge and all fields are included if len(stats_files) == 1: with open(stats_files[0]) as f: content = json.load(f) content.update({ 'manifests': manifests, 'env_info': env_info, 'videos': videos }) return content else: data_sources, initial_reset_timestamps, timestamps, episode_lengths, episode_rewards, \ episode_types, initial_reset_timestamp = merge_stats_files(stats_files) return { 'manifests': manifests, 'env_info': env_info, 'data_sources': data_sources, 'timestamps': timestamps, 'episode_lengths': episode_lengths, 'episode_rewards': episode_rewards, 'episode_types': episode_types, 'initial_reset_timestamps': initial_reset_timestamps, 'initial_reset_timestamp': initial_reset_timestamp, 'videos': videos, }
def _past_limit(self): """Return true if we are past our limit""" if self._max_episode_steps is not None and self._max_episode_steps <= self._elapsed_steps: logger.debug("Env has passed the step limit defined by TimeLimit.") return True if self._max_episode_seconds is not None and self._max_episode_seconds <= self._elapsed_seconds: logger.debug("Env has passed the seconds limit defined by TimeLimit.") return True return False
def check_design_convergence(self): """Check if solvent extraction simulation is feasible.""" if not all(self.sx_design.status.values()): failed_modules = [stage for stage,converged in self.sx_design.status.items() if not converged] logger.error(f'{self.name}:Equilibrium failed at step:{self.steps} due to non-convergence in following modules:{failed_modules} - Terminating environment!') self.convergence_failure = True else: converged_modules = [stage for stage,converged in self.sx_design.status.items() if converged] assert len(converged_modules) == len(self.sx_design.status), 'All modules should converge' logger.debug(f'{self.name}:Equilibrium succeeded at step:{self.steps} for all modules:{converged_modules}')
def update_rollout_dict(spec, rollout_dict): """ Takes as input the environment spec for which the rollout is to be generated, and the existing dictionary of rollouts. Returns True iff the dictionary was modified. """ # Skip platform-dependent if should_skip_env_spec_for_tests(spec): logger.info("Skipping tests for {}".format(spec.id)) return False # Skip environments that are nondeterministic if spec.nondeterministic: logger.info("Skipping tests for nondeterministic env {}".format( spec.id)) return False logger.info("Generating rollout for {}".format(spec.id)) try: ( observations_hash, actions_hash, rewards_hash, dones_hash, ) = generate_rollout_hash(spec) except: # If running the env generates an exception, don't write to the rollout file logger.warn( "Exception {} thrown while generating rollout for {}. Rollout not added." .format(sys.exc_info()[0], spec.id)) return False rollout = {} rollout["observations"] = observations_hash rollout["actions"] = actions_hash rollout["rewards"] = rewards_hash rollout["dones"] = dones_hash existing = rollout_dict.get(spec.id) if existing: differs = False for key, new_hash in rollout.items(): differs = differs or existing[key] != new_hash if not differs: logger.debug("Hashes match with existing for {}".format(spec.id)) return False else: logger.warn("Got new hash for {}. Overwriting.".format(spec.id)) rollout_dict[spec.id] = rollout return True
def _calculate_brokerage_fee(self, amount): fee = 0 for transaction_fee in self.transaction_fee: if amount <= transaction_fee.amount: if transaction_fee.is_percentage: fee = amount * transaction_fee.fee / 100.0 else: fee = transaction_fee.fee break fee = round(fee, 2) logger.debug(f'Brokerage Fee:{fee} for amount:{round(amount, 2)})') return fee
def compute_all_optimal_mixtures(self, hulls, betas): """ Parallel computing of optimal mixtures """ logger.debug("[BFTQ] -Compute optimal mixtures") params = [(hulls[i], beta.detach().item()) for i, beta in enumerate(betas)] if self.config["cpu_processes"] == 1: optimal_policies = [optimal_mixture(*param) for param in params] else: with Pool(self.config["cpu_processes"]) as p: optimal_policies = p.starmap(optimal_mixture, params) return optimal_policies
def load_results(training_dir): if not os.path.exists(training_dir): logger.error("Training directory %s not found", training_dir) return manifests = detect_training_manifests(training_dir) if not manifests: logger.error("No manifests found in training directory %s", training_dir) return logger.debug("Uploading data from manifest %s", ", ".join(manifests)) # Load up stats + video files stats_files = [] videos = [] env_infos = [] for manifest in manifests: with open(manifest) as f: contents = json.load(f) # Make these paths absolute again stats_files.append(os.path.join(training_dir, contents["stats"])) videos += [(os.path.join(training_dir, v), os.path.join(training_dir, m)) for v, m in contents["videos"]] env_infos.append(contents["env_info"]) env_info = collapse_env_infos(env_infos, training_dir) ( data_sources, initial_reset_timestamps, timestamps, episode_lengths, episode_rewards, episode_types, initial_reset_timestamp, ) = merge_stats_files(stats_files) return { "manifests": manifests, "env_info": env_info, "data_sources": data_sources, "timestamps": timestamps, "episode_lengths": episode_lengths, "episode_rewards": episode_rewards, "episode_types": episode_types, "initial_reset_timestamps": initial_reset_timestamps, "initial_reset_timestamp": initial_reset_timestamp, "videos": videos, }
def step(self, action): #return observation, reward, done, info if not self.done: #Only perform step if episode has not ended self.steps += 1 if self.steps ==1: #Logic for first step in an episode self.episode_start_logic() logger.debug(f'{self.name}:Taking action {action} at step:{self.steps}') prev_state = self.sx_design.x.copy() #save previous state if self.environment_config['discrete_actions']: self.action_stats.update({action:self.action_stats[action]+1}) if self.action_dict[action]: #Check if discrete action exists self.perform_discrete_action(action) #map action to variable and action type self.run_simulation() else: logger.info(f'{self.name}:No action found in:{self.action_dict[action]}') else: self.perform_continuous_action(action) #map continuous action to variable type self.run_simulation() if not self.convergence_failure: #Calculate reward if there is no convergence failure reward = self.get_reward() else: self.sx_design.x = prev_state #Replace with previous state if there is convergence failure self.done = True reward = self.reward_config['min'] #-100 #Assign minimum reward if convergence failure logger.info(f'{self.name}:Completed action {action} at step {self.steps} and got reward {reward:.3f}.') if self.steps >= self.max_episode_steps: #Check if max episode steps reached self.done = True logger.warn(f'{self.name}:Maximum episode steps exceeded after {self.steps} steps - Ending episode!') if all(self.design_success.values()) and not self.convergence_failure: #Check if design was successful self.done = True reward = self.reward_config['max'] logger.warn(f'{self.name}:Design successful with recovery:{self.metric_dict["recovery"]}, purity:{self.metric_dict["purity"]},Reward:{reward} after {self.steps} steps - Ending episode!') if self.done: self.episode_end_logic() else: if self.convergence_failure: print(f'Episode completed after {self.steps} steps due to Convergence failure - Reset environment to start new simulation!') elif self.steps >= self.max_episode_steps: print(f'Episode completed after {self.steps} steps since max steps were exceeded - Reset environment to start new simulation!') elif all(self.design_success.values()): print(f'Episode completed after {self.steps} steps since design goals was met - Reset environment to start new simulation!') else: print(f'Episode completed after {self.steps} steps due to unknown reason - Reset environment to start new simulation!') return np.array(self.sx_design.x+self.sx_design.ree_mass), reward, self.done, {}
def step(self, a): self.game.resume() if a == ACTION_FLAP: self.game.tap() self._update_state() self.game.pause() is_over = self.state.status == GAME_OVER_SCREEN reward = self.compute_reward(is_over) logger.debug( 'HiScore: {}, Score: {}, Action: {}, Reward: {}, GameOver: {}'. format(self.state.hiscore, self.state.score, ACTION_NAMES[a], reward, is_over)) return self._get_obs(), reward, is_over, dict(score=self.state.score)
def move(self): # check for collision with the right side of the game screen if self.x + self.radius + self.speedx >= self.args.env_width: logger.debug('ball collide with right side of screen') self.speedx = -self.speed_magnitude # check for collision with the left hand side of the game screen elif self.x + self.speedx <= 0: logger.debug('ball collide with left side of screen') self.speedx = self.speed_magnitude # check for collision with the bottom of the game screen if self.y + self.radius + self.speedy >= self.args.env_height: logger.debug('ball collide with bottom of screen') self.speedy = -self.speed_magnitude return True # check for collision with the top of the game screen elif self.y + self.radius + self.speedy <= 0: logger.debug('ball collide with top of screen') self.speedy = self.speed_magnitude # update the ball position self.x += self.speedx self.y += self.speedy return False
def load_results(training_dir): if not os.path.exists(training_dir): logger.error('Training directory %s not found', training_dir) return manifests = detect_training_manifests(training_dir) if not manifests: logger.error('No manifests found in training directory %s', training_dir) return logger.debug('Uploading data from manifest %s', ', '.join(manifests)) # Load up stats + video files stats_files = [] videos = [] env_infos = [] for manifest in manifests: with open(manifest) as f: contents = json.load(f) # Make these paths absolute again stats_files.append(os.path.join(training_dir, contents['stats'])) videos += [(os.path.join(training_dir, v), os.path.join(training_dir, m)) for v, m in contents['videos']] env_infos.append(contents['env_info']) env_info = collapse_env_infos(env_infos, training_dir) data_sources, initial_reset_timestamps, timestamps, episode_lengths, episode_rewards, episode_types, initial_reset_timestamp = merge_stats_files(stats_files) return { 'manifests': manifests, 'env_info': env_info, 'data_sources': data_sources, 'timestamps': timestamps, 'episode_lengths': episode_lengths, 'episode_rewards': episode_rewards, 'episode_types': episode_types, 'initial_reset_timestamps': initial_reset_timestamps, 'initial_reset_timestamp': initial_reset_timestamp, 'videos': videos, }
def capture_frame(self): """Render the given `env` and add the resulting frame to the video.""" if not self.functional: return logger.debug('Capturing video frame: path=%s', self.path) render_mode = 'ansi' if self.ansi_mode else 'rgb_array' frame = self.env.render(mode=render_mode) if frame is None: if self._async: return else: # Indicates a bug in the environment: don't want to raise # an error here. logger.warn('Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path) self.broken = True else: self.last_frame = frame if self.ansi_mode: self._encode_ansi_frame(frame) else: self._encode_image_frame(frame)
def _flush(self, force=False): """Flush all relevant monitor information to disk.""" if not self.write_upon_reset and not force: return self.stats_recorder.flush() # Give it a very distiguished name, since we need to pick it # up from the filesystem later. path = os.path.join(self.directory, '{}.manifest.{}.manifest.json'.format(self.file_prefix, self.file_infix)) logger.debug('Writing training manifest file to %s', path) with atomic_write.atomic_write(path) as f: # We need to write relative paths here since people may # move the training_dir around. It would be cleaner to # already have the basenames rather than basename'ing # manually, but this works for now. json.dump({ 'stats': os.path.basename(self.stats_recorder.path), 'videos': [(os.path.basename(v), os.path.basename(m)) for v, m in self.videos], 'env_info': self._env_info(), }, f, default=json_encode_np)