def functional_pruning_fsm(self, bgru_net, bgru_dir, cuda): bgru_net_path = os.path.join(bgru_dir, 'model.p') bgru_plot_dir = tl.ensure_directory_exits( os.path.join(bgru_dir, 'Plots')) # TODO: unminimized, partial MMN을 pruning 해야한다. # min_moore_machine_path = os.path.join(bgru_dir, 'full_min_moore_machine.p') # unmin_moore_machine_path = os.path.join(bgru_dir, 'full_unmin_moore_machine.p') unmin_moore_machine_path = os.path.join( bgru_dir, 'partial_unmin_moore_machine.p') bgru_net.load_state_dict(torch.load(bgru_net_path)) moore_machine = pickle.load(open(min_moore_machine_path, 'rb')) bgru_net.eval() # TODO: Store_obs는 일단 False로 한다 path = bgru_net_path abcd = moore_machine.functional_pruning(bgru_net, self.env, log=True, store_obs=False, path=bgru_dir)
default=1) args = parser.parse_args() args.cuda = torch.cuda.is_available() and (not args.no_cuda) vis = visdom.Visdom() if args.render else None env_fn = lambda: FruitCollection1D(hybrid=True, vis=vis) _env = env_fn() total_actions = _env.total_actions policy_net = PolicyNet1D(_env.reset().shape[0], total_actions, _env.total_fruits) # create directories to store results result_dir = ensure_directory_exits(os.path.join(os.getcwd(), 'results')) env_dir = ensure_directory_exits( os.path.join(result_dir, _env.name, 'Hybrid_Pg_decompose' if args.decompose else "Hybrid_Pg")) plots_dir = ensure_directory_exits(os.path.join(env_dir, 'Plots')) policy_net_path = os.path.join(env_dir, 'model.p') # Let the game begin !! Broom.. Broom.. if args.cuda: policy_net = policy_net.cuda() if os.path.exists(policy_net_path) and not args.scratch: policy_net.load_state_dict(torch.load(policy_net_path)) pg = HybridPolicyGraident(_env.total_fruits, args.decompose, vis) if args.train:
def evaluate(self, net, env, total_episodes, log=True, render=False, inspect=False, store_obs=False, path=None, cuda=False): """ Evaluate the trained network. :param net: trained Bottleneck GRU network :param env: environment :param total_episodes: number of episodes to test :param log: check to print out evaluation log :param render: check to render environment :param inspect: check for previous evaluations to not evaluate again :param store_obs: check to store observations again :param path: where to check for inspection :param cuda: check if cuda is available :return: evaluation performance on given model """ net.eval() if inspect: obs_path = ensure_directory_exits(os.path.join(path, 'obs')) video_dir_path = ensure_directory_exits(os.path.join(path, 'eps_videos')) if len(os.listdir(video_dir_path)) > 0: sys.exit('Previous Video Files present: ' + video_dir_path) self.frequency = {s: {t: 0 for t in sorted((self.state_desc.keys()))} for s in sorted(self.state_desc.keys())} self.trajectory = [] total_reward = 0 for ep in range(total_episodes): if inspect: ep_video_path = ensure_directory_exits(os.path.join(video_dir_path, str(ep))) obs, org_obs = env.reset(inspect=True) _shape = (org_obs.shape[1], org_obs.shape[0]) else: obs = env.reset() done = False ep_reward = 0 ep_actions = [] ep_obs = [] curr_state = self.start_state while not done: ep_obs.append(obs) obs = torch.FloatTensor(obs).unsqueeze(0) obs = Variable(obs) if cuda: obs = obs.cuda() obs_x = list(net.obs_encode(obs).data.cpu().numpy()[0]) _, obs_index = self._get_index(self.obs_space, obs_x, force=False) if store_obs: obs_dir = ensure_directory_exits(os.path.join(obs_path, str(obs_index))) scipy.misc.imsave( os.path.join(obs_dir, str(obs_index) + '_' + str(random.randint(0, 100000)) + '.jpg'), org_obs) if not self.minimized: (obs_index, pre_index) = (obs_index, None) else: try: (obs_index, pre_index) = (self.obs_minobs_map[obs_index], obs_index) except Exception as e: logger.error(e) next_state = self.transaction[curr_state][obs_index] if next_state is None: logger.info('None state encountered!') logger.info('Exiting the script!') sys.exit(0) if render and inspect: _text = 'Current State:{} \n Obs: {} \n Next State: {} \n\n\n Total States:{} \n Total Obs: {}' _text = _text.format(str(curr_state), (obs_index, pre_index).__str__(), str(next_state), len(self.state_desc.keys()), len(self.minobs_obs_map.keys())) _label_img = self.text_image(_shape, _text) _img = np.hstack((org_obs, _label_img)) env.render(inspect=inspect, img=_img) if inspect: frame_id = str(len(ep_obs)) frame_id = '0' * (10 - len(frame_id)) + frame_id scipy.misc.imsave(os.path.join(ep_video_path, 'frame_' + frame_id + '.jpg'), _img) self.frequency[curr_state][next_state] += 1 if ep == total_episodes - 1: self.trajectory.append([len(ep_obs), curr_state, (obs_index, pre_index), next_state]) elif render: env.render() curr_state = next_state action = int(self.state_desc[curr_state]['action']) obs, reward, done, info = env.step(action) org_obs = info['org_obs'] if 'org_obs' in info else obs ep_actions.append(action) ep_reward += reward # a quick hack to prevent the agent from stucking max_same_action = 5000 if len(ep_actions) > max_same_action: actions_to_consider = ep_actions[-max_same_action:] if actions_to_consider.count(actions_to_consider[0]) == max_same_action: done = True total_reward += ep_reward if log: logger.info("Episode => {} Score=> {}".format(ep, ep_reward)) if inspect: _parseable_path = ep_video_path.replace('(', '\(') _parseable_path = _parseable_path.replace(')', '\)') os.system("ffmpeg -f image2 -pattern_type glob -framerate 1 -i '{}*.jpg' {}{}.mp4". format(os.path.join(_parseable_path, 'frame_'), os.path.join(_parseable_path, 'video_'), ep)) os.system("rm -rf {}/*.jpg".format(_parseable_path)) if self.minimized and store_obs: logger.info('Combining Sub-Observations') combined_obs_path = ensure_directory_exits(os.path.join(path, 'combined_obs')) for k in sorted(self.minobs_obs_map.keys()): logger.info('Observation Class:' + str(k)) max_images_per_comb = 250 # beyond this images cannot be combined due to library/memory issues suffix = len(self.minobs_obs_map[k]) > max_images_per_comb total_parts = int(len(self.minobs_obs_map[k]) / max_images_per_comb) if len(self.minobs_obs_map[k]) % max_images_per_comb != 0: total_parts += 1 for p_i in range(total_parts): k_image = None for o_i in self.minobs_obs_map[k][p_i:p_i + max_images_per_comb]: o_path = os.path.join(obs_path, str(o_i)) o_files = [os.path.join(o_path, f) for f in os.listdir(o_path) if os.path.isfile(os.path.join(o_path, f))] o_i_image = scipy.misc.imread(random.choice(o_files)) o_i_image = np.hstack((self.text_image(_shape, str(o_i), position=(_shape[0] // 2, _shape[1] // 2)), o_i_image)) for i in range(9): o_i_image = np.hstack((o_i_image, scipy.misc.imread(random.choice(o_files)))) k_image = o_i_image if k_image is None else np.vstack((k_image, o_i_image)) k_shape = (_shape[0], len(self.minobs_obs_map[k][p_i:p_i + max_images_per_comb]) * _shape[1]) k_name_image = self.text_image(k_shape, str(k), position=(k_shape[0] // 2, 10), font_size=20) k_image = np.hstack((k_name_image, k_image)) k_file_name = str(k) + (('_part_' + str(p_i + 1)) if suffix else '') scipy.misc.imsave(os.path.join(combined_obs_path, k_file_name + '.jpg'), k_image) if inspect: obs_path = obs_path.replace('(', '\(').replace(')', '\)') os.system("rm -rf {}".format(obs_path)) return total_reward / total_episodes
hx = hx.cuda() _, _, _, (_, _, _, input_x) = self.gru_net((obs, hx), input_fn=self.obx_net, hx_fn=self.bhx_net, inspect=True) return input_x if __name__ == '__main__': args = tl.get_args() env = gym.make(args.env) env.seed(args.env_seed) obs = env.reset() # create directories to store results result_dir = tl.ensure_directory_exits( os.path.join(args.result_dir, 'Classic_Control')) env_dir = tl.ensure_directory_exits(os.path.join(result_dir, args.env)) gru_dir = tl.ensure_directory_exits( os.path.join(env_dir, 'gru_{}'.format(args.gru_size))) gru_net_path = os.path.join(gru_dir, 'model.p') gru_plot_dir = tl.ensure_directory_exits(os.path.join(gru_dir, 'Plots')) bhx_dir = tl.ensure_directory_exits( os.path.join( env_dir, 'gru_{}_bhx_{}{}'.format(args.gru_size, args.bhx_size, args.bhx_suffix))) bhx_net_path = os.path.join(bhx_dir, 'model.p') bhx_plot_dir = tl.ensure_directory_exits(os.path.join(bhx_dir, 'Plots')) ox_dir = tl.ensure_directory_exits(
help='Sleep time for render', default=10) args = parser.parse_args() args.cuda = torch.cuda.is_available() and (not args.no_cuda) vis = visdom.Visdom() if args.render else None env_fn = lambda: FruitCollection2D(hybrid=True, vis=vis) _env = env_fn() total_actions = _env.total_actions net = ActorHybridCriticNet(_env.reset().shape[0], total_actions, _env.total_fruits) # create directories to store results result_dir = ensure_directory_exits(os.path.join(os.getcwd(), 'results')) env_dir = ensure_directory_exits( os.path.join(result_dir, _env.name, 'Actor_Hybrid_Critic')) plots_dir = ensure_directory_exits(os.path.join(env_dir, 'Plots')) net_path = os.path.join(env_dir, 'model.p') # Let the game begin !! Broom.. Broom.. if args.cuda: net = net.cuda() if os.path.exists(net_path) and not args.scratch: net.load_state_dict(torch.load(net_path)) hac = AHC(vis, reward_types=_env.total_fruits) if args.train: net.train()
else: env_fn = lambda: FruitCollection2D(vis=vis) _env = env_fn() total_actions = _env.total_actions obs = _env.reset() _env.close() if args.env == "1D": print("Creating network for 1D FruitCollection...") policy_net = PolicyNet1D(obs.shape[0], total_actions) else: print("Creating network for 2D FruitCollection...") policy_net = PolicyNet2D(obs.shape[0], total_actions) # create directories to store results result_dir = ensure_directory_exits(os.path.join(os.getcwd(), 'results')) env_dir = ensure_directory_exits( os.path.join(result_dir, _env.name, 'policy_gradient')) plots_dir = ensure_directory_exits(os.path.join(env_dir, 'Plots')) policy_net_path = os.path.join(env_dir, 'model.p') # Let the game begin !! Broom.. Broom.. if args.cuda: policy_net = policy_net.cuda() if os.path.exists(policy_net_path) and not args.scratch: policy_net.load_state_dict(torch.load(policy_net_path)) pg = PolicyGraident(vis) if args.train: policy_net.train()