示例#1
0
    def functional_pruning_fsm(self, bgru_net, bgru_dir, cuda):
        bgru_net_path = os.path.join(bgru_dir, 'model.p')
        bgru_plot_dir = tl.ensure_directory_exits(
            os.path.join(bgru_dir, 'Plots'))
        # TODO: unminimized, partial MMN을 pruning 해야한다.
        # min_moore_machine_path = os.path.join(bgru_dir, 'full_min_moore_machine.p')
        # unmin_moore_machine_path = os.path.join(bgru_dir, 'full_unmin_moore_machine.p')
        unmin_moore_machine_path = os.path.join(
            bgru_dir, 'partial_unmin_moore_machine.p')

        bgru_net.load_state_dict(torch.load(bgru_net_path))
        moore_machine = pickle.load(open(min_moore_machine_path, 'rb'))
        bgru_net.eval()

        # TODO: Store_obs는 일단 False로 한다
        path = bgru_net_path
        abcd = moore_machine.functional_pruning(bgru_net,
                                                self.env,
                                                log=True,
                                                store_obs=False,
                                                path=bgru_dir)
示例#2
0
                        default=1)

    args = parser.parse_args()
    args.cuda = torch.cuda.is_available() and (not args.no_cuda)

    vis = visdom.Visdom() if args.render else None

    env_fn = lambda: FruitCollection1D(hybrid=True, vis=vis)

    _env = env_fn()
    total_actions = _env.total_actions
    policy_net = PolicyNet1D(_env.reset().shape[0], total_actions,
                             _env.total_fruits)

    # create directories to store results
    result_dir = ensure_directory_exits(os.path.join(os.getcwd(), 'results'))
    env_dir = ensure_directory_exits(
        os.path.join(result_dir, _env.name,
                     'Hybrid_Pg_decompose' if args.decompose else "Hybrid_Pg"))
    plots_dir = ensure_directory_exits(os.path.join(env_dir, 'Plots'))
    policy_net_path = os.path.join(env_dir, 'model.p')

    # Let the game begin !! Broom.. Broom..
    if args.cuda:
        policy_net = policy_net.cuda()
    if os.path.exists(policy_net_path) and not args.scratch:
        policy_net.load_state_dict(torch.load(policy_net_path))

    pg = HybridPolicyGraident(_env.total_fruits, args.decompose, vis)

    if args.train:
示例#3
0
    def evaluate(self, net, env, total_episodes, log=True, render=False, inspect=False, store_obs=False, path=None, cuda=False):
        """
        Evaluate the trained network.

        :param net: trained Bottleneck GRU network
        :param env: environment
        :param total_episodes: number of episodes to test
        :param log: check to print out evaluation log
        :param render: check to render environment
        :param inspect: check for previous evaluations to not evaluate again
        :param store_obs: check to store observations again
        :param path: where to check for inspection
        :param cuda: check if cuda is available
        :return: evaluation performance on given model
        """
        net.eval()
        if inspect:
            obs_path = ensure_directory_exits(os.path.join(path, 'obs'))
            video_dir_path = ensure_directory_exits(os.path.join(path, 'eps_videos'))
            if len(os.listdir(video_dir_path)) > 0:
                sys.exit('Previous Video Files present: ' + video_dir_path)
            self.frequency = {s: {t: 0 for t in sorted((self.state_desc.keys()))} for s in
                              sorted(self.state_desc.keys())}
            self.trajectory = []

        total_reward = 0
        for ep in range(total_episodes):
            if inspect:
                ep_video_path = ensure_directory_exits(os.path.join(video_dir_path, str(ep)))
                obs, org_obs = env.reset(inspect=True)
                _shape = (org_obs.shape[1], org_obs.shape[0])
            else:
                obs = env.reset()
            done = False
            ep_reward = 0
            ep_actions = []
            ep_obs = []
            curr_state = self.start_state
            while not done:
                ep_obs.append(obs)
                obs = torch.FloatTensor(obs).unsqueeze(0)
                obs = Variable(obs)
                if cuda:
                    obs = obs.cuda()
                obs_x = list(net.obs_encode(obs).data.cpu().numpy()[0])
                _, obs_index = self._get_index(self.obs_space, obs_x, force=False)
                if store_obs:
                    obs_dir = ensure_directory_exits(os.path.join(obs_path, str(obs_index)))
                    scipy.misc.imsave(
                        os.path.join(obs_dir, str(obs_index) + '_' + str(random.randint(0, 100000)) + '.jpg'),
                        org_obs)

                if not self.minimized:
                    (obs_index, pre_index) = (obs_index, None)
                else:
                    try:
                        (obs_index, pre_index) = (self.obs_minobs_map[obs_index], obs_index)
                    except Exception as e:
                        logger.error(e)

                next_state = self.transaction[curr_state][obs_index]
                if next_state is None:
                    logger.info('None state encountered!')
                    logger.info('Exiting the script!')
                    sys.exit(0)
                if render and inspect:
                    _text = 'Current State:{} \n Obs: {} \n Next State: {} \n\n\n Total States:{} \n Total Obs: {}'
                    _text = _text.format(str(curr_state), (obs_index, pre_index).__str__(), str(next_state),
                                         len(self.state_desc.keys()), len(self.minobs_obs_map.keys()))
                    _label_img = self.text_image(_shape, _text)
                    _img = np.hstack((org_obs, _label_img))
                    env.render(inspect=inspect, img=_img)
                    if inspect:
                        frame_id = str(len(ep_obs))
                        frame_id = '0' * (10 - len(frame_id)) + frame_id
                        scipy.misc.imsave(os.path.join(ep_video_path, 'frame_' + frame_id + '.jpg'), _img)
                        self.frequency[curr_state][next_state] += 1
                        if ep == total_episodes - 1:
                            self.trajectory.append([len(ep_obs), curr_state, (obs_index, pre_index), next_state])
                elif render:
                    env.render()

                curr_state = next_state
                action = int(self.state_desc[curr_state]['action'])
                obs, reward, done, info = env.step(action)
                org_obs = info['org_obs'] if 'org_obs' in info else obs
                ep_actions.append(action)
                ep_reward += reward

                # a quick hack to prevent the agent from stucking
                max_same_action = 5000
                if len(ep_actions) > max_same_action:
                    actions_to_consider = ep_actions[-max_same_action:]
                    if actions_to_consider.count(actions_to_consider[0]) == max_same_action:
                        done = True

            total_reward += ep_reward
            if log:
                logger.info("Episode => {} Score=> {}".format(ep, ep_reward))
            if inspect:
                _parseable_path = ep_video_path.replace('(', '\(')
                _parseable_path = _parseable_path.replace(')', '\)')
                os.system("ffmpeg -f image2 -pattern_type glob -framerate 1 -i '{}*.jpg' {}{}.mp4".
                          format(os.path.join(_parseable_path, 'frame_'), os.path.join(_parseable_path, 'video_'),
                                 ep))
                os.system("rm -rf {}/*.jpg".format(_parseable_path))

        if self.minimized and store_obs:
            logger.info('Combining Sub-Observations')
            combined_obs_path = ensure_directory_exits(os.path.join(path, 'combined_obs'))
            for k in sorted(self.minobs_obs_map.keys()):
                logger.info('Observation Class:' + str(k))
                max_images_per_comb = 250  # beyond this images cannot be combined due to library/memory issues
                suffix = len(self.minobs_obs_map[k]) > max_images_per_comb
                total_parts = int(len(self.minobs_obs_map[k]) / max_images_per_comb)
                if len(self.minobs_obs_map[k]) % max_images_per_comb != 0:
                    total_parts += 1
                for p_i in range(total_parts):
                    k_image = None
                    for o_i in self.minobs_obs_map[k][p_i:p_i + max_images_per_comb]:
                        o_path = os.path.join(obs_path, str(o_i))
                        o_files = [os.path.join(o_path, f) for f in os.listdir(o_path) if
                                   os.path.isfile(os.path.join(o_path, f))]
                        o_i_image = scipy.misc.imread(random.choice(o_files))

                        o_i_image = np.hstack((self.text_image(_shape, str(o_i),
                                                               position=(_shape[0] // 2, _shape[1] // 2)),
                                               o_i_image))
                        for i in range(9):
                            o_i_image = np.hstack((o_i_image, scipy.misc.imread(random.choice(o_files))))
                        k_image = o_i_image if k_image is None else np.vstack((k_image, o_i_image))
                    k_shape = (_shape[0], len(self.minobs_obs_map[k][p_i:p_i + max_images_per_comb]) * _shape[1])
                    k_name_image = self.text_image(k_shape, str(k), position=(k_shape[0] // 2, 10), font_size=20)
                    k_image = np.hstack((k_name_image, k_image))
                    k_file_name = str(k) + (('_part_' + str(p_i + 1)) if suffix else '')
                    scipy.misc.imsave(os.path.join(combined_obs_path, k_file_name + '.jpg'), k_image)

            if inspect:
                obs_path = obs_path.replace('(', '\(').replace(')', '\)')
                os.system("rm -rf {}".format(obs_path))

        return total_reward / total_episodes
示例#4
0
                hx = hx.cuda()
        _, _, _, (_, _, _, input_x) = self.gru_net((obs, hx),
                                                   input_fn=self.obx_net,
                                                   hx_fn=self.bhx_net,
                                                   inspect=True)
        return input_x


if __name__ == '__main__':
    args = tl.get_args()
    env = gym.make(args.env)
    env.seed(args.env_seed)
    obs = env.reset()

    # create directories to store results
    result_dir = tl.ensure_directory_exits(
        os.path.join(args.result_dir, 'Classic_Control'))
    env_dir = tl.ensure_directory_exits(os.path.join(result_dir, args.env))

    gru_dir = tl.ensure_directory_exits(
        os.path.join(env_dir, 'gru_{}'.format(args.gru_size)))
    gru_net_path = os.path.join(gru_dir, 'model.p')
    gru_plot_dir = tl.ensure_directory_exits(os.path.join(gru_dir, 'Plots'))

    bhx_dir = tl.ensure_directory_exits(
        os.path.join(
            env_dir, 'gru_{}_bhx_{}{}'.format(args.gru_size, args.bhx_size,
                                              args.bhx_suffix)))
    bhx_net_path = os.path.join(bhx_dir, 'model.p')
    bhx_plot_dir = tl.ensure_directory_exits(os.path.join(bhx_dir, 'Plots'))

    ox_dir = tl.ensure_directory_exits(
示例#5
0
                        help='Sleep time for render',
                        default=10)
    args = parser.parse_args()
    args.cuda = torch.cuda.is_available() and (not args.no_cuda)

    vis = visdom.Visdom() if args.render else None

    env_fn = lambda: FruitCollection2D(hybrid=True, vis=vis)

    _env = env_fn()
    total_actions = _env.total_actions
    net = ActorHybridCriticNet(_env.reset().shape[0], total_actions,
                               _env.total_fruits)

    # create directories to store results
    result_dir = ensure_directory_exits(os.path.join(os.getcwd(), 'results'))
    env_dir = ensure_directory_exits(
        os.path.join(result_dir, _env.name, 'Actor_Hybrid_Critic'))
    plots_dir = ensure_directory_exits(os.path.join(env_dir, 'Plots'))
    net_path = os.path.join(env_dir, 'model.p')

    # Let the game begin !! Broom.. Broom..
    if args.cuda:
        net = net.cuda()
    if os.path.exists(net_path) and not args.scratch:
        net.load_state_dict(torch.load(net_path))

    hac = AHC(vis, reward_types=_env.total_fruits)

    if args.train:
        net.train()
示例#6
0
    else:
        env_fn = lambda: FruitCollection2D(vis=vis)
    _env = env_fn()
    total_actions = _env.total_actions
    obs = _env.reset()
    _env.close()

    if args.env == "1D":
        print("Creating network for 1D FruitCollection...")
        policy_net = PolicyNet1D(obs.shape[0], total_actions)
    else:
        print("Creating network for 2D FruitCollection...")
        policy_net = PolicyNet2D(obs.shape[0], total_actions)

    # create directories to store results
    result_dir = ensure_directory_exits(os.path.join(os.getcwd(), 'results'))
    env_dir = ensure_directory_exits(
        os.path.join(result_dir, _env.name, 'policy_gradient'))
    plots_dir = ensure_directory_exits(os.path.join(env_dir, 'Plots'))
    policy_net_path = os.path.join(env_dir, 'model.p')

    # Let the game begin !! Broom.. Broom..
    if args.cuda:
        policy_net = policy_net.cuda()
    if os.path.exists(policy_net_path) and not args.scratch:
        policy_net.load_state_dict(torch.load(policy_net_path))

    pg = PolicyGraident(vis)

    if args.train:
        policy_net.train()