示例#1
0
 def _log_stats(self, epoch):
     logger.log("Epoch {} finished".format(epoch), with_timestamp=True)
     """
     Replay Buffer
     """
     logger.record_dict(self.replay_buffer.get_diagnostics(),
                        prefix='replay_buffer/')
     """
     Trainer
     """
     logger.record_dict(self.trainer.get_diagnostics(), prefix='trainer/')
     """
     Exploration
     """
     logger.record_dict(self.expl_data_collector.get_diagnostics(),
                        prefix='exploration/')
     expl_paths = self.expl_data_collector.get_epoch_paths()
     if hasattr(self.expl_env, 'get_diagnostics'):
         logger.record_dict(
             self.expl_env.get_diagnostics(expl_paths),
             prefix='exploration/',
         )
     logger.record_dict(
         eval_util.get_generic_path_information(expl_paths),
         prefix="exploration/",
     )
     """
     Evaluation
     """
     logger.record_dict(
         self.eval_data_collector.get_diagnostics(),
         prefix='evaluation/',
     )
     eval_paths = self.eval_data_collector.get_epoch_paths()
     if hasattr(self.eval_env, 'get_diagnostics'):
         logger.record_dict(
             self.eval_env.get_diagnostics(eval_paths),
             prefix='evaluation/',
         )
     # Get path information.
     logger.record_dict(
         eval_util.get_generic_path_information(eval_paths),
         prefix="evaluation/",
     )
     """
     Misc
     """
     gt.stamp('logging')
     logger.record_dict(_get_epoch_timings())
     logger.record_tabular('Epoch', epoch)
     logger.dump_tabular(with_prefix=False, with_timestamp=False)
示例#2
0
    def _try_to_eval(self, epoch):
        logger.save_extra_data(self.get_extra_data_to_save(epoch))
        if self._can_evaluate():
            self.evaluate(epoch)

            params = self.get_epoch_snapshot(epoch)
            logger.save_itr_params(epoch, params)
            table_keys = logger.get_table_key_set()
            if self._old_table_keys is not None:
                print('$$$$$$$$$$$$$$$')
                print(table_keys)
                print('\n' * 4)
                print(self._old_table_keys)
                print('$$$$$$$$$$$$$$$')
                print(set(table_keys) - set(self._old_table_keys))
                print(set(self._old_table_keys) - set(table_keys))
                assert table_keys == self._old_table_keys, (
                    "Table keys cannot change from iteration to iteration.")
            self._old_table_keys = table_keys

            logger.record_tabular(
                "Number of train steps total",
                self._n_train_steps_total,
            )
            logger.record_tabular(
                "Number of env steps total",
                self._n_env_steps_total,
            )
            logger.record_tabular(
                "Number of rollouts total",
                self._n_rollouts_total,
            )

            times_itrs = gt.get_times().stamps.itrs
            train_time = times_itrs['train'][-1]
            sample_time = times_itrs['sample'][-1]
            eval_time = times_itrs['eval'][-1] if epoch > 0 else 0
            epoch_time = train_time + sample_time + eval_time
            total_time = gt.get_times().total

            logger.record_tabular('Train Time (s)', train_time)
            logger.record_tabular('(Previous) Eval Time (s)', eval_time)
            logger.record_tabular('Sample Time (s)', sample_time)
            logger.record_tabular('Epoch Time (s)', epoch_time)
            logger.record_tabular('Total Train Time (s)', total_time)

            logger.record_tabular("Epoch", epoch)
            logger.dump_tabular(with_prefix=False, with_timestamp=False)
        else:
            logger.log("Skipping eval for now.")
示例#3
0
    def evaluate(self, epoch):
        """
        Evaluate the policy, e.g. save/print progress.
        :param epoch:
        :return:
        """
        statistics = OrderedDict()
        try:
            statistics.update(self.eval_statistics)
            self.eval_statistics = None
        except:
            print('No Stats to Eval')

        logger.log("Collecting samples for evaluation")
        test_paths = self.eval_sampler.obtain_samples()

        statistics.update(
            eval_util.get_generic_path_information(
                test_paths,
                stat_prefix="Test",
            ))
        statistics.update(
            eval_util.get_generic_path_information(
                self._exploration_paths,
                stat_prefix="Exploration",
            ))

        if hasattr(self.env, "log_diagnostics"):
            self.env.log_diagnostics(test_paths)
        if hasattr(self.env, "log_statistics"):
            statistics.update(self.env.log_statistics(test_paths))
        if epoch % self.freq_log_visuals == 0:
            if hasattr(self.env, "log_visuals"):
                self.env.log_visuals(test_paths, epoch,
                                     logger.get_snapshot_dir())

        average_returns = eval_util.get_average_returns(test_paths)
        statistics['AverageReturn'] = average_returns
        for key, value in statistics.items():
            logger.record_tabular(key, value)

        best_statistic = statistics[self.best_key]
        if best_statistic > self.best_statistic_so_far:
            self.best_statistic_so_far = best_statistic
            if self.save_best and epoch >= self.save_best_starting_from_epoch:
                data_to_save = {'epoch': epoch, 'statistics': statistics}
                data_to_save.update(self.get_epoch_snapshot(epoch))
                logger.save_extra_data(data_to_save, 'best.pkl')
                print('\n\nSAVED BEST\n\n')
示例#4
0
    def step(self, action):
        """
        :param action: joint position controls in action space (action bounds), then scaled to joint space
        """
        assert np.shape(action) == (self.n_actions, )
        # action = np.clip(action, self.action_space.low, self.action_space.high)
        action = self.process_action(action)

        p.configureDebugVisualizer(p.COV_ENABLE_SINGLE_STEP_RENDERING)
        forces = np.array([100] * 7 + [60] * 2)
        p.setJointMotorControlArray(self.pandaUid,
                                    list(range(7)) + [9, 10],
                                    p.POSITION_CONTROL,
                                    action,
                                    forces=forces)

        p.stepSimulation()

        self.observation, _ = self.get_obs()

        done = False
        done, reward, _ = self.get_reward(done)
        # done here is that we completed, if completed we stay completed until env.reset()
        self.completed = self.completed or done

        self.step_counter += 1

        if self.step_counter > self._max_episode_steps:
            reward = 0
            done = True

        info = {
            "obj_pos":
            np.array(p.getBasePositionAndOrientation(self.objectUid)[0]),
            "obj_ori":
            np.array(p.getBasePositionAndOrientation(self.objectUid)[1]),
            "hand_pos":
            np.array(p.getLinkState(self.pandaUid, 11)[0]),
            "fingers_joint":
            np.array([
                p.getJointState(self.pandaUid, 9)[0],
                p.getJointState(self.pandaUid, 10)[0]
            ]),
            "completed":
            self.completed,
        }
        if self.completed and self.verbose:
            logger.log("Completed!")
        return self.observation, reward, done, info
示例#5
0
 def train(self):
     self.fix_data_set()
     logger.log("Done creating dataset.")
     num_batches_total = 0
     for epoch in range(self.num_epochs):
         for _ in range(self.num_batches_per_epoch):
             self.qf.train(True)
             self._do_training()
             num_batches_total += 1
         logger.push_prefix('Iteration #%d | ' % epoch)
         self.qf.train(False)
         self.evaluate(epoch)
         params = self.get_epoch_snapshot(epoch)
         logger.save_itr_params(epoch, params)
         logger.log("Done evaluating")
         logger.pop_prefix()
示例#6
0
    def _try_to_eval(self, epoch):

        if self._can_evaluate():
            # save if it's time to save
            if (epoch % self.freq_saving
                    == 0) or (epoch + 1 >= self.num_epochs):
                # if epoch + 1 >= self.num_epochs:
                #     epoch = 'final'
                logger.save_extra_data(self.get_extra_data_to_save(epoch))
                params = self.get_epoch_snapshot(epoch)
                logger.save_itr_params(epoch, params)

            self.evaluate(epoch)

            logger.record_tabular(
                "Number of train calls total",
                self._n_train_steps_total,
            )
            logger.record_tabular(
                "Number of env steps total",
                self._n_env_steps_total,
            )
            logger.record_tabular(
                "Number of rollouts total",
                self._n_rollouts_total,
            )

            times_itrs = gt.get_times().stamps.itrs
            train_time = times_itrs['train'][-1]
            sample_time = times_itrs['sample'][-1]
            eval_time = times_itrs['eval'][-1] if epoch > 0 else 0
            epoch_time = train_time + sample_time + eval_time
            total_time = gt.get_times().total

            logger.record_tabular('Train Time (s)', train_time)
            logger.record_tabular('(Previous) Eval Time (s)', eval_time)
            logger.record_tabular('Sample Time (s)', sample_time)
            logger.record_tabular('Epoch Time (s)', epoch_time)
            logger.record_tabular('Total Train Time (s)', total_time)

            logger.record_tabular("Epoch", epoch)
            logger.dump_tabular(with_prefix=False, with_timestamp=False)
        else:
            logger.log("Skipping eval for now.")
示例#7
0
def execute_actions(actions,
                    polygons,
                    asset_path,
                    sim_steps=2000,
                    img_dim=64,
                    hold_last_action=False):
    polygon_map = {ind: ply for ind, ply in enumerate(polygons)}

    xml = XML(asset_path)
    names = []
    for action in actions:
        ply_ind, pos, axangle, scale, rgb = action
        ply = polygon_map[ply_ind]
        rgba = rgb.tolist() + [1]
        #print('Dropping {} | pos: {} | axangle: {} | scale: {} | rgb: {} '.format(ply, pos, axangle, scale, rgb))

        name = xml.add_mesh(ply,
                            pos=pos,
                            axangle=axangle,
                            scale=scale,
                            rgba=rgba)
        names.append(name)

    xml_str = xml.instantiate()
    model = mjc.load_model_from_xml(xml_str)
    sim = mjc.MjSim(model)

    log_steps = len(actions) + 1
    logger = Logger(xml, sim, steps=log_steps, img_dim=img_dim)
    logger.log(0)

    for act_ind, act in enumerate(actions):
        hold_objects = names[act_ind + 1:]
        drop_object = names[act_ind]
        if act_ind == len(actions) - 1 and hold_last_action:
            logger.hold_drop_execute(hold_objects, drop_object, 1)
        else:
            logger.hold_drop_execute(hold_objects, drop_object, sim_steps)
        logger.log(act_ind + 1, hold_objects)

    data, images, masks = logger.get_logs()
    images = images / 255.

    return data, images, masks
示例#8
0
文件: high_low.py 项目: jcoreyes/erl
 def render(self):
     logger.push_prefix("HighLow(sign={0})\t".format(self._sign))
     if self._last_action is None:
         logger.log("No action taken.")
     else:
         if self._last_t == 0:
             logger.log("--- New Episode ---")
         logger.push_prefix("t={0}\t".format(self._last_t))
         with np_print_options(precision=4, suppress=False):
             logger.log("Action: {0}".format(self._last_action, ))
         logger.log("Reward: {0}".format(self._last_reward, ))
         logger.pop_prefix()
     logger.pop_prefix()
    def _try_to_eval(self, epoch):
        if epoch % self.freq_saving == 0:
            logger.save_extra_data(self.get_extra_data_to_save(epoch))
        if self._can_evaluate():
            self.evaluate(epoch)

            if epoch % self.freq_saving == 0:
                params = self.get_epoch_snapshot(epoch)
                logger.save_itr_params(epoch, params)
            table_keys = logger.get_table_key_set()

            # logger.record_tabular(
            #     "Number of train steps total",
            #     self._n_policy_train_steps_total,
            # )
            logger.record_tabular(
                "Number of env steps total",
                self._n_env_steps_total,
            )
            logger.record_tabular(
                "Number of rollouts total",
                self._n_rollouts_total,
            )

            times_itrs = gt.get_times().stamps.itrs
            train_time = times_itrs['train'][-1]
            sample_time = times_itrs['sample'][-1]
            eval_time = times_itrs['eval'][-1] if epoch > 0 else 0
            epoch_time = train_time + sample_time + eval_time
            total_time = gt.get_times().total

            logger.record_tabular('Train Time (s)', train_time)
            logger.record_tabular('(Previous) Eval Time (s)', eval_time)
            logger.record_tabular('Sample Time (s)', sample_time)
            logger.record_tabular('Epoch Time (s)', epoch_time)
            logger.record_tabular('Total Train Time (s)', total_time)

            logger.record_tabular("Epoch", epoch)
            logger.dump_tabular(with_prefix=False, with_timestamp=False)
        else:
            logger.log("Skipping eval for now.")
示例#10
0
    def sim_first_step(self, actions):
        asset_path = '/home/jcoreyes/objects/object-oriented-prediction/o2p2/data/stl'
        img_dim = 64
        xml = XML(asset_path)
        names = []
        for action in actions:
            ply_ind, pos, axangle, scale, rgb = action[0], action[1:4], action[
                4:8], action[8], action[9:12]
            ply = self.polygon_map[ply_ind]
            rgba = rgb.tolist() + [1]
            print('Dropping {} | pos: {} | axangle: {} | scale: {} | rgb: {} '.
                  format(ply, pos, axangle, scale, rgb))

            name = xml.add_mesh(ply,
                                pos=pos,
                                axangle=axangle,
                                scale=scale,
                                rgba=rgba)
            names.append(name)

        xml_str = xml.instantiate()
        model = mjc.load_model_from_xml(xml_str)
        sim = mjc.MjSim(model)
        log_steps = 2
        sim_steps = 1
        logger = Logger(xml, sim, steps=log_steps, img_dim=img_dim)
        logger.log(0)

        for act_ind, act in enumerate(actions):
            hold_objects = names[act_ind + 1:]
            drop_object = names[act_ind]
            logger.hold_drop_execute(hold_objects, drop_object, sim_steps)
            logger.log(act_ind + 1, hold_objects)
            break

        data, images, masks = logger.get_logs()
        images = images / 255.

        import pdb
        pdb.set_trace()
示例#11
0
    def evaluate(self, epoch, eval_paths=None):
        statistics = OrderedDict()
        statistics.update(self.eval_statistics)

        logger.log("Collecting samples for evaluation")
        if eval_paths:
            test_paths = eval_paths
        else:
            test_paths = self.get_eval_paths()
        statistics.update(
            eval_util.get_generic_path_information(
                test_paths,
                stat_prefix="Test",
            ))
        if len(self._exploration_paths) > 0:
            statistics.update(
                eval_util.get_generic_path_information(
                    self._exploration_paths,
                    stat_prefix="Exploration",
                ))
        if hasattr(self.env, "log_diagnostics"):
            self.env.log_diagnostics(test_paths, logger=logger)
        if hasattr(self.env, "get_diagnostics"):
            statistics.update(self.env.get_diagnostics(test_paths))

        for i in range(len(test_paths)):
            self.env.update_rewards(test_paths[i])

        statistics['AverageReturn'] = eval_util.get_average_returns(test_paths)
        statistics[
            'AverageEnvironmentReturn'] = eval_util.get_average_environment_returns(
                test_paths)
        statistics[
            'AverageUnsupervisedReturn'] = eval_util.get_average_unsupervised_returns(
                test_paths)

        for key, value in statistics.items():
            logger.record_tabular(key, value)
        self.need_to_update_eval_statistics = True
示例#12
0
    def train(self):
        for epoch in range(self.num_epochs):
            logger.push_prefix('Iteration #%d | ' % epoch)

            start_time = time.time()
            for _ in range(self.num_steps_per_epoch):
                batch = self.get_batch()
                train_dict = self.get_train_dict(batch)

                self.policy_optimizer.zero_grad()
                policy_loss = train_dict['Policy Loss']
                policy_loss.backward()
                self.policy_optimizer.step()
            logger.log("Train time: {}".format(time.time() - start_time))

            start_time = time.time()
            self.evaluate(epoch)
            logger.log("Eval time: {}".format(time.time() - start_time))

            params = self.get_epoch_snapshot(epoch)
            logger.save_itr_params(epoch, params)
            logger.pop_prefix()
示例#13
0
def run_task(variant):
    from rlkit.core import logger
    print(variant)
    logger.log("Hello from script")
    logger.log("variant: " + str(variant))
    logger.record_tabular("value", 1)
    logger.dump_tabular()
    logger.log("snapshot_dir:", logger.get_snapshot_dir())
    def _backtracking_line_search(self, params, descent_step, f_loss,
                                  f_constraint):
        prev_params = [p.clone() for p in params]
        ratio_list = self._backtrack_ratio**np.arange(self._max_backtracks)
        loss_before = f_loss()

        param_shapes = [p.shape or torch.Size([1]) for p in params]
        descent_step = unflatten_tensors(descent_step, param_shapes)
        assert len(descent_step) == len(params)

        for ratio in ratio_list:
            for step, prev_param, param in zip(descent_step, prev_params,
                                               params):
                step = ratio * step
                new_param = prev_param.data - step
                param.data = new_param.data

            loss = f_loss()
            constraint_val = f_constraint()
            if (loss < loss_before
                    and constraint_val <= self._max_constraint_value):
                break

        if ((torch.isnan(loss) or torch.isnan(constraint_val)
             or loss >= loss_before
             or constraint_val >= self._max_constraint_value)
                and not self._accept_violation):
            logger.log('Line search condition violated. Rejecting the step!')
            if torch.isnan(loss):
                logger.log('Violated because loss is NaN')
            if torch.isnan(constraint_val):
                logger.log('Violated because constraint is NaN')
            if loss >= loss_before:
                logger.log('Violated because loss not improving')
            if constraint_val >= self._max_constraint_value:
                logger.log('Violated because constraint is violated')
            for prev, cur in zip(prev_params, params):
                cur.data = prev.data
示例#15
0
    def evaluate(self, epoch, exploration_paths):
        """
        Perform evaluation for this algorithm.

        :param epoch: The epoch number.
        :param exploration_paths: List of dicts, each representing a path.
        """
        logger.log("Collecting samples for evaluation")
        paths = self._sample_eval_paths(epoch)
        statistics = OrderedDict()

        statistics.update(self._statistics_from_paths(paths, "Test"))
        statistics.update(self._get_other_statistics())
        statistics.update(
            self._statistics_from_paths(exploration_paths, "Exploration"))

        statistics['AverageReturn'] = get_average_returns(paths)
        statistics['Epoch'] = epoch

        for key, value in statistics.items():
            logger.record_tabular(key, value)

        self.log_diagnostics(paths)
示例#16
0
def train_amortized_goal_chooser(
    goal_chooser,
    goal_conditioned_model,
    argmax_q,
    discount,
    replay_buffer,
    learning_rate=1e-3,
    batch_size=32,
    num_updates=1000,
):
    def get_loss(training=False):
        buffer = replay_buffer.get_replay_buffer(training)
        batch = buffer.random_batch(batch_size)
        obs = ptu.np_to_var(batch['observations'], requires_grad=False)
        goals = ptu.np_to_var(batch['goal_states'], requires_grad=False)
        goal = goal_chooser(obs, goals)
        actions = argmax_q(obs, goal, discount)
        final_state_predicted = goal_conditioned_model(
            obs,
            actions,
            goal,
            discount,
        ) + obs
        rewards = goal_chooser.reward_function(final_state_predicted, goals)
        return -rewards.mean()

    discount = ptu.np_to_var(discount * np.ones((batch_size, 1)))
    optimizer = optim.Adam(goal_chooser.parameters(), learning_rate)
    for i in range(num_updates):
        optimizer.zero_grad()
        loss = get_loss()
        loss.backward()
        optimizer.step()
        if i % 100 == 0:
            logger.log("Number updates: {}".format(i))
            logger.log("Train loss: {}".format(float(ptu.get_numpy(loss))))
            logger.log("Validation loss: {}".format(
                float(ptu.get_numpy(get_loss(training=False)))))
示例#17
0
    def _try_to_eval(self, epoch, eval_paths=None):
        if MPI and MPI.COMM_WORLD.Get_rank() == 0:
            if epoch % self.save_extra_data_interval == 0:
                logger.save_extra_data(self.get_extra_data_to_save(epoch))

            if epoch % self.num_epochs_per_param_save == 0:
                print("Attemping itr param save...")
                params = self.get_epoch_snapshot(epoch)
                logger.save_itr_params(epoch, params)
                print(F"Itr{epoch} param saved!")

        if self._can_evaluate():
            self.evaluate(epoch, eval_paths=eval_paths)

            logger.record_tabular(
                "Number of train steps total",
                self._n_train_steps_total,
            )
            logger.record_tabular(
                "Number of env steps total",
                self._n_env_steps_total,
            )
            logger.record_tabular(
                "Number of rollouts total",
                self._n_rollouts_total,
            )

            times_itrs = gt.get_times().stamps.itrs
            # train_time = times_itrs['train'][-1]
            training_loops = ['get_batch', 'update_normalizer', 'forward', 'compute_losses', 'qf1_loop', "policy_loss_forward", 'policy_loop', 'vf_loop']
            train_time = sum(times_itrs[loop][-1] for loop in times_itrs.keys())

            sample_time = times_itrs['sample'][-1]

            if epoch > 0:
                eval_time = times_itrs['eval'][-1]
            else:
                times_itrs['eval'] = [0] # Need to do this so we can do line 343, the list comprehension
                eval_time = 0
            epoch_time = train_time + sample_time + eval_time
            total_time = gt.get_times().total

            # logger.record_tabular('Get Batch (s)', times_itrs['get_batch'][-1])
            # logger.record_tabular('Update Normalizer (s)', times_itrs['update_normalizer'][-1])
            # logger.record_tabular('Forward (s)', times_itrs['forward'][-1])
            # logger.record_tabular('Compute Losses (s)', times_itrs['compute_losses'][-1])
            # logger.record_tabular('QF1 Loop (s)', times_itrs['qf1_loop'][-1])
            # logger.record_tabular('QF2 Loop (s)', times_itrs['qf2_loop'][-1])
            # logger.record_tabular("Policy Forward (s)", times_itrs['policy_loss_forward'][-1])
            # logger.record_tabular('Policy Loop (s)', times_itrs['policy_loop'][-1])
            # logger.record_tabular('VF Loop (s)', times_itrs['vf_loop'][-1])

            [logger.record_tabular(key.title(), times_itrs[key][-1]) for key in times_itrs.keys()]

            logger.record_tabular('Train Time (s) ---', train_time)
            logger.record_tabular('(Previous) Eval Time (s) ---', eval_time)
            logger.record_tabular('Sample Time (s) ---', sample_time)
            logger.record_tabular('Epoch Time (s)', epoch_time)
            logger.record_tabular('Total Train Time (s)', total_time)
            logger.record_tabular("Epoch", epoch)

            table_keys = logger.get_table_key_set()
            if self._old_table_keys is not None and table_keys != self._old_table_keys:
                # assert table_keys == self._old_table_keys, (
                #     "Table keys cannot change from iteration to iteration."
                # )
                print("Table keys have changed. Rewriting header and filling with 0s")
                logger.update_header()
                raise NotImplementedError
            self._old_table_keys = table_keys

            logger.dump_tabular(with_prefix=False, with_timestamp=False)
        else:
            logger.log("Skipping eval for now.")
示例#18
0
def example(variant):
    import mujoco_py
    import torch
    logger.log(torch.__version__)
    date_format = '%m/%d/%Y %H:%M:%S %Z'
    date = datetime.now(tz=pytz.utc)
    logger.log("start")
    logger.log('Current date & time is: {}'.format(date.strftime(date_format)))
    if torch.cuda.is_available():
        x = torch.randn(3)
        logger.log(str(x.to(ptu.device)))

    date = date.astimezone(timezone('US/Pacific'))
    logger.log('Local date & time is: {}'.format(date.strftime(date_format)))
    for i in range(variant['num_seconds']):
        logger.log("Tick, {}".format(i))
        time.sleep(1)
    logger.log("end")
    logger.log('Local date & time is: {}'.format(date.strftime(date_format)))

    logger.log("start mujoco")
    from gym.envs.mujoco import HalfCheetahEnv
    e = HalfCheetahEnv()
    img = e.sim.render(32, 32)
    logger.log(str(sum(img)))
    logger.log("end mujocoy")
示例#19
0
        time.sleep(1)
    logger.log("end")
    logger.log('Local date & time is: {}'.format(date.strftime(date_format)))

    logger.log("start mujoco")
    from gym.envs.mujoco import HalfCheetahEnv
    e = HalfCheetahEnv()
    img = e.sim.render(32, 32)
    logger.log(str(sum(img)))
    logger.log("end mujocoy")


if __name__ == "__main__":
    # noinspection PyTypeChecker
    date_format = '%m/%d/%Y %H:%M:%S %Z'
    date = datetime.now(tz=pytz.utc)
    logger.log("start")
    variant = dict(
        num_seconds=10,
        launch_time=str(date.strftime(date_format)),
    )
    run_experiment(
        example,
        exp_prefix='test-gpu-local-singularity',
        mode='local_singularity',
        variant=variant,
        # use_gpu=True,
        use_gpu=False,
        verbose=True,
    )
def setup_logger(
        exp_prefix="default",
        variant=None,
        text_log_file="debug.log",
        variant_log_file="variant.json",
        tabular_log_file="progress.csv",
        snapshot_mode="last",
        snapshot_gap=1,
        log_tabular_only=False,
        log_dir=None,
        git_infos=None,
        script_name=None,
        **create_log_dir_kwargs
):
    """
    Set up logger to have some reasonable default settings.

    Will save log output to

        based_log_dir/exp_prefix/exp_name.

    exp_name will be auto-generated to be unique.

    If log_dir is specified, then that directory is used as the output dir.

    :param exp_prefix: The sub-directory for this specific experiment.
    :param variant:
    :param text_log_file:
    :param variant_log_file:
    :param tabular_log_file:
    :param snapshot_mode:
    :param log_tabular_only:
    :param snapshot_gap:
    :param log_dir:
    :param git_infos:
    :param script_name: If set, save the script name to this.
    :return:
    """
    if git_infos is None:
        git_infos = get_git_infos(conf.CODE_DIRS_TO_MOUNT)
    first_time = log_dir is None
    if first_time:
        log_dir = create_log_dir(exp_prefix, **create_log_dir_kwargs)

    if variant is not None:
        logger.log("Variant:")
        logger.log(json.dumps(dict_to_safe_json(variant), indent=2))
        variant_log_path = osp.join(log_dir, variant_log_file)
        logger.log_variant(variant_log_path, variant)

    tabular_log_path = osp.join(log_dir, tabular_log_file)
    text_log_path = osp.join(log_dir, text_log_file)

    logger.add_text_output(text_log_path)
    if first_time:
        logger.add_tabular_output(tabular_log_path)
    else:
        logger._add_output(tabular_log_path, logger._tabular_outputs,
                           logger._tabular_fds, mode='a')
        for tabular_fd in logger._tabular_fds:
            logger._tabular_header_written.add(tabular_fd)
    logger.set_snapshot_dir(log_dir)
    logger.set_snapshot_mode(snapshot_mode)
    logger.set_snapshot_gap(snapshot_gap)
    logger.set_log_tabular_only(log_tabular_only)
    exp_name = log_dir.split("/")[-1]
    logger.push_prefix("[%s] " % exp_name)

    if git_infos is not None:
        for (
            directory, code_diff, code_diff_staged, commit_hash, branch_name
        ) in git_infos:
            if directory[-1] == '/':
                directory = directory[:-1]
            diff_file_name = directory[1:].replace("/", "-") + ".patch"
            diff_staged_file_name = (
                directory[1:].replace("/", "-") + "_staged.patch"
            )
            if code_diff is not None and len(code_diff) > 0:
                with open(osp.join(log_dir, diff_file_name), "w") as f:
                    f.write(code_diff + '\n')
            if code_diff_staged is not None and len(code_diff_staged) > 0:
                with open(osp.join(log_dir, diff_staged_file_name), "w") as f:
                    f.write(code_diff_staged + '\n')
            with open(osp.join(log_dir, "git_infos.txt"), "a") as f:
                f.write("directory: {}\n".format(directory))
                f.write("git hash: {}\n".format(commit_hash))
                f.write("git branch name: {}\n\n".format(branch_name))
    if script_name is not None:
        with open(osp.join(log_dir, "script_name.txt"), "w") as f:
            f.write(script_name)
    return log_dir
示例#21
0
    def _log_stats(self, epoch):
        logger.log(f"Epoch {epoch} finished", with_timestamp=True)

        """
        Replay Buffer
        """
        logger.record_dict(
            self.replay_buffer.get_diagnostics(), prefix="replay_buffer/"
        )

        """
        Trainer
        """
        logger.record_dict(self.trainer.get_diagnostics(), prefix="trainer/")

        """
        Exploration
        """
        logger.record_dict(
            self.expl_data_collector.get_diagnostics(), prefix="exploration/"
        )

        expl_paths = self.expl_data_collector.get_epoch_paths()
        if len(expl_paths) > 0:
            if hasattr(self.expl_env, "get_diagnostics"):
                logger.record_dict(
                    self.expl_env.get_diagnostics(expl_paths),
                    prefix="exploration/",
                )

            logger.record_dict(
                eval_util.get_generic_path_information(expl_paths),
                prefix="exploration/",
            )

        """
        Evaluation
        """
        logger.record_dict(
            self.eval_data_collector.get_diagnostics(),
            prefix="evaluation/",
        )
        eval_paths = self.eval_data_collector.get_epoch_paths()
        if hasattr(self.eval_env, "get_diagnostics"):
            logger.record_dict(
                self.eval_env.get_diagnostics(eval_paths),
                prefix="evaluation/",
            )

        logger.record_dict(
            eval_util.get_generic_path_information(eval_paths),
            prefix="evaluation/",
        )

        """
        Misc
        """
        gt.stamp("logging")
        timings = _get_epoch_timings()
        timings["time/training and exploration (s)"] = self.total_train_expl_time
        logger.record_dict(timings)

        logger.record_tabular("Epoch", epoch)
        logger.dump_tabular(with_prefix=False, with_timestamp=False)
示例#22
0
 def pretrain(self):
     logger.log('Pretraining ...')
     for ep in range(self.num_pretrain_updates):
         for t in range(self.num_update_loops_per_train_call):
             self._do_update_step(ep, use_expert_buffer=True)
示例#23
0
def setup_logger(
    exp_prefix="default",
    exp_id=0,
    seed=0,
    variant=None,
    base_log_dir=None,
    text_log_file="debug.log",
    variant_log_file="variant.json",
    tabular_log_file="progress.csv",
    snapshot_mode="last",
    snapshot_gap=1,
    log_tabular_only=False,
    log_dir=None,
    git_info=None,
    script_name=None,
):
    """
    Set up logger to have some reasonable default settings.

    Will save log output to

        based_log_dir/exp_prefix/exp_name.

    exp_name will be auto-generated to be unique.

    If log_dir is specified, then that directory is used as the output dir.

    :param exp_prefix: The sub-directory for this specific experiment.
    :param exp_id: The number of the specific experiment run within this
    experiment.
    :param variant:
    :param base_log_dir: The directory where all log should be saved.
    :param text_log_file:
    :param variant_log_file:
    :param tabular_log_file:
    :param snapshot_mode:
    :param log_tabular_only:
    :param snapshot_gap:
    :param log_dir:
    :param git_info:
    :param script_name: If set, save the script name to this.
    :return:
    """
    first_time = log_dir is None
    if first_time:
        log_dir = create_log_dir(exp_prefix,
                                 exp_id=exp_id,
                                 seed=seed,
                                 base_log_dir=base_log_dir)

    if variant is not None:
        logger.log("Variant:")
        logger.log(json.dumps(dict_to_safe_json(variant), indent=2))
        variant_log_path = osp.join(log_dir, variant_log_file)
        logger.log_variant(variant_log_path, variant)

    tabular_log_path = osp.join(log_dir, tabular_log_file)
    text_log_path = osp.join(log_dir, text_log_file)

    logger.add_text_output(text_log_path)
    if first_time:
        logger.add_tabular_output(tabular_log_path)
    else:
        logger._add_output(tabular_log_path,
                           logger._tabular_outputs,
                           logger._tabular_fds,
                           mode='a')
        for tabular_fd in logger._tabular_fds:
            logger._tabular_header_written.add(tabular_fd)
    logger.set_snapshot_dir(log_dir)
    logger.set_snapshot_mode(snapshot_mode)
    logger.set_snapshot_gap(snapshot_gap)
    logger.set_log_tabular_only(log_tabular_only)
    exp_name = log_dir.split("/")[-1]
    logger.push_prefix("[%s] " % exp_name)

    if git_info is not None:
        code_diff, commit_hash, branch_name = git_info
        if code_diff is not None:
            with open(osp.join(log_dir, "code.diff"), "w") as f:
                f.write(code_diff)
        with open(osp.join(log_dir, "git_info.txt"), "w") as f:
            f.write("git hash: {}".format(commit_hash))
            f.write('\n')
            f.write("git branch name: {}".format(branch_name))
    if script_name is not None:
        with open(osp.join(log_dir, "script_name.txt"), "w") as f:
            f.write(script_name)
    return log_dir
示例#24
0
 def _log_stats(self, epoch):
     logger.log("Epoch {} finished".format(epoch), with_timestamp=True)
     """
     Replay Buffer
     """
     logger.record_dict(
         self.replay_buffer.get_diagnostics(),
         global_step=epoch,
         prefix="replay_buffer/",
     )
     """
     Trainer
     """
     logger.record_dict(self.trainer.get_diagnostics(),
                        global_step=epoch,
                        prefix="trainer/")
     """
     Exploration
     """
     logger.record_dict(
         self.expl_data_collector.get_diagnostics(),
         global_step=epoch,
         prefix="exploration/",
     )
     expl_paths = self.expl_data_collector.get_epoch_paths()
     if hasattr(self.expl_env, "get_diagnostics"):
         logger.record_dict(
             self.expl_env.get_diagnostics(expl_paths),
             global_step=epoch,
             prefix="exploration/",
         )
     logger.record_dict(
         eval_util.get_generic_path_information(expl_paths),
         global_step=epoch,
         prefix="exploration/",
     )
     """
     Evaluation
     """
     logger.record_dict(
         self.eval_data_collector.get_diagnostics(),
         global_step=epoch,
         prefix="evaluation/",
     )
     eval_paths = self.eval_data_collector.get_epoch_paths()
     if hasattr(self.eval_env, "get_diagnostics"):
         logger.record_dict(
             self.eval_env.get_diagnostics(eval_paths),
             global_step=epoch,
             prefix="evaluation/",
         )
     logger.record_dict(
         eval_util.get_generic_path_information(eval_paths),
         global_step=epoch,
         prefix="evaluation/",
     )
     """
     Misc
     """
     gt.stamp("logging")
     logger.record_dict(_get_epoch_timings(), global_step=epoch)
     logger.record_tabular("Epoch", epoch)
     logger.dump_tabular(with_prefix=False, with_timestamp=False)
    def evaluate(self, epoch):
        statistics = OrderedDict()
        statistics.update(self.eval_statistics)
        self.eval_statistics = None

        # statistics.update(eval_util.get_generic_path_information(
        #     self._exploration_paths, stat_prefix="Exploration",
        # ))

        for mode in ['meta_train', 'meta_test']:
            logger.log("Collecting samples for evaluation")
            test_paths = self.obtain_eval_samples(epoch, mode=mode)

            statistics.update(
                eval_util.get_generic_path_information(
                    test_paths,
                    stat_prefix="Test " + mode,
                ))
            # print(statistics.keys())
            if hasattr(self.env, "log_diagnostics"):
                self.env.log_diagnostics(test_paths)
            if hasattr(self.env, "log_statistics"):
                log_stats = self.env.log_statistics(test_paths)
                new_log_stats = OrderedDict(
                    (k + ' ' + mode, v) for k, v in log_stats.items())
                statistics.update(new_log_stats)

            average_returns = rlkit.core.eval_util.get_average_returns(
                test_paths)
            statistics['AverageReturn ' + mode] = average_returns

            if self.render_eval_paths:
                self.env.render_paths(test_paths)

        # meta_test_this_epoch = statistics['Percent_Solved meta_test']
        # meta_test_this_epoch = statistics['Avg Run Rew meta_test']
        # meta_test_this_epoch = statistics['L2AverageClosest meta_test']
        meta_test_this_epoch = statistics['Perc Success meta_test']
        # meta_test_this_epoch = 100.0
        # meta_test_this_epoch = statistics['AverageReturn meta_test']
        if meta_test_this_epoch > self.best_meta_test:
            # make sure you set save_algorithm to true then call save_extra_data
            prev_save_alg = self.save_algorithm
            self.save_algorithm = True
            if self.save_best:
                if epoch > self.save_best_after_epoch:
                    temp_rb = self.replay_buffer
                    self.replay_buffer = None
                    logger.save_extra_data(self.get_extra_data_to_save(epoch),
                                           'best_meta_test.pkl')
                    self.replay_buffer = temp_rb
                    self.best_meta_test = meta_test_this_epoch
                    print('\n\nSAVED ALG AT EPOCH %d\n\n' % epoch)
            self.save_algorithm = prev_save_alg

        if epoch in self.custom_save_epoch:
            prev_save_alg = self.save_algorithm
            self.save_algorithm = True
            logger.save_extra_data(self.get_extra_data_to_save(epoch),
                                   'custom_save_epoch_%d.pkl' % epoch)
            self.save_algorithm = prev_save_alg

        for key, value in statistics.items():
            logger.record_tabular(key, value)

        if self.plotter:
            self.plotter.draw()
示例#26
0
    def evaluate(self, epoch):
        """
        Evaluate the policy, e.g. save/print progress.
        :param epoch:
        :return:
        """

        statistics = OrderedDict()
        try:
            statistics.update(self.eval_statistics)
            self.eval_statistics = None
        except:
            print('No Stats to Eval')

        logger.log("Collecting random samples for evaluation")

        eval_steps = self.num_steps_per_eval

        test_paths = self.eval_sampler.obtain_samples(eval_steps)
        obs = torch.Tensor(
            np.squeeze(np.vstack([path["observations"]
                                  for path in test_paths])))
        acts = torch.Tensor(
            np.squeeze(np.vstack([path["actions"] for path in test_paths])))
        if len(acts.shape) < 2:
            acts = torch.unsqueeze(acts, 1)
        random_input = torch.cat([obs, acts], dim=1).to(ptu.device)

        exp_batch = self.get_batch(eval_steps,
                                   keys=['observations', 'actions'],
                                   use_expert_buffer=True)
        # exp_batch = {'observations':torch.Tensor([[0.],[1.],[2.],[3.],[4.],[5.],[6.],[7.],[8.],[9.],[10.]]), 'actions':torch.Tensor([[0.5]]*11)}

        obs = exp_batch['observations']
        acts = exp_batch['actions']
        exp_input = torch.cat([obs, acts], dim=1).to(ptu.device)

        statistics['random_avg_energy'] = self.ebm(random_input).mean().item()
        statistics['expert_avg_energy'] = self.get_energy(
            exp_input).mean().item()
        statistics['expert*20_avg_energy'] = self.get_energy(exp_input *
                                                             20).mean().item()

        statistics["random_expert_diff"] = statistics[
            "random_avg_energy"] - statistics["expert_avg_energy"]

        for key, value in statistics.items():
            logger.record_tabular(key, value)

        best_statistic = statistics[self.best_key]

        if best_statistic > self.best_statistic_so_far:
            self.best_statistic_so_far = best_statistic
            self.best_epoch = epoch
            self.best_random_avg_energy = statistics['random_avg_energy']
            self.best_expert_avg_energy = statistics['expert_avg_energy']
            logger.record_tabular("Best Model Epoch", self.best_epoch)
            logger.record_tabular("Best Random Energy",
                                  self.best_random_avg_energy)
            logger.record_tabular("Best Expert Energy",
                                  self.best_expert_avg_energy)
            if self.save_best and epoch >= self.save_best_starting_from_epoch:
                data_to_save = {'epoch': epoch, 'statistics': statistics}
                data_to_save.update(self.get_epoch_snapshot(epoch))
                logger.save_extra_data(data_to_save, 'best.pkl')
                print('\n\nSAVED BEST\n\n')
        logger.record_tabular("Best Model Epoch", self.best_epoch)
        logger.record_tabular("Best Random Energy",
                              self.best_random_avg_energy)
        logger.record_tabular("Best Expert Energy",
                              self.best_expert_avg_energy)
示例#27
0
    def _try_to_eval(self, epoch):
        if epoch % self.logging_period != 0:
            return
        if epoch in self.save_extra_manual_epoch_set:
            logger.save_extra_data(
                self.get_extra_data_to_save(epoch),
                file_name='extra_snapshot_itr{}'.format(epoch),
                mode='cloudpickle',
            )
        if self._save_extra_every_epoch:
            logger.save_extra_data(self.get_extra_data_to_save(epoch))
        gt.stamp('save-extra')
        if self._can_evaluate():
            self.evaluate(epoch)
            gt.stamp('eval')

            params = self.get_epoch_snapshot(epoch)
            logger.save_itr_params(epoch, params)
            gt.stamp('save-snapshot')
            table_keys = logger.get_table_key_set()
            if self._old_table_keys is not None:
                assert table_keys == self._old_table_keys, (
                    "Table keys cannot change from iteration to iteration.")
            self._old_table_keys = table_keys

            logger.record_dict(
                self.trainer.get_diagnostics(),
                prefix='trainer/',
            )

            logger.record_tabular(
                "Number of train steps total",
                self._n_train_steps_total,
            )
            logger.record_tabular(
                "Number of env steps total",
                self._n_env_steps_total,
            )
            logger.record_tabular(
                "Number of rollouts total",
                self._n_rollouts_total,
            )

            times_itrs = gt.get_times().stamps.itrs
            train_time = times_itrs['train'][-1]
            sample_time = times_itrs['sample'][-1]
            save_extra_time = times_itrs['save-extra'][-1]
            save_snapshot_time = times_itrs['save-snapshot'][-1]
            eval_time = times_itrs['eval'][-1] if epoch > 0 else 0
            epoch_time = train_time + sample_time + save_extra_time + eval_time
            total_time = gt.get_times().total

            logger.record_tabular('in_unsupervised_model',
                                  float(self.in_unsupervised_phase))
            logger.record_tabular('Train Time (s)', train_time)
            logger.record_tabular('(Previous) Eval Time (s)', eval_time)
            logger.record_tabular('Sample Time (s)', sample_time)
            logger.record_tabular('Save Extra Time (s)', save_extra_time)
            logger.record_tabular('Save Snapshot Time (s)', save_snapshot_time)
            logger.record_tabular('Epoch Time (s)', epoch_time)
            logger.record_tabular('Total Train Time (s)', total_time)

            logger.record_tabular("Epoch", epoch)
            logger.dump_tabular(with_prefix=False, with_timestamp=False)
        else:
            logger.log("Skipping eval for now.")
示例#28
0
def example(variant):
    import torch

    import rlkit.torch.pytorch_util as ptu

    print("Starting")
    logger.log(torch.__version__)
    date_format = "%m/%d/%Y %H:%M:%S %Z"
    date = datetime.now(tz=pytz.utc)
    logger.log("start")
    logger.log("Current date & time is: {}".format(date.strftime(date_format)))
    logger.log("Cuda available: {}".format(torch.cuda.is_available()))
    if torch.cuda.is_available():
        x = torch.randn(3)
        logger.log(str(x.to(ptu.device)))

    date = date.astimezone(timezone("US/Pacific"))
    logger.log("Local date & time is: {}".format(date.strftime(date_format)))
    for i in range(variant["num_seconds"]):
        logger.log("Tick, {}".format(i))
        time.sleep(1)
    logger.log("end")
    logger.log("Local date & time is: {}".format(date.strftime(date_format)))

    logger.log("start mujoco")
    from gym.envs.mujoco import HalfCheetahEnv

    e = HalfCheetahEnv()
    img = e.sim.render(32, 32)
    logger.log(str(sum(img)))
    logger.log("end mujoco")

    logger.record_tabular("Epoch", 1)
    logger.dump_tabular()
    logger.record_tabular("Epoch", 2)
    logger.dump_tabular()
    logger.record_tabular("Epoch", 3)
    logger.dump_tabular()
    print("Done")
 def _end_epoch(self):
     logger.log("Epoch Duration: {0}".format(time.time() -
                                             self._epoch_start_time))
     logger.log("Started Training: {0}".format(self._can_train()))
     logger.pop_prefix()
示例#30
0
    def _try_to_eval(self, epoch):
        logger.save_extra_data(self.get_extra_data_to_save(epoch))
        if self._can_evaluate():
            self.evaluate(epoch)

            params = self.get_epoch_snapshot(epoch)
            logger.save_itr_params(epoch, params)
            table_keys = logger.get_table_key_set()
            #print("TABLE KEYS")
            #print(table_keys)
            #if self._old_table_keys is not None:
            #    assert table_keys == self._old_table_keys, (
            #        "Table keys cannot change from iteration to iteration."
            #    )
            self._old_table_keys = table_keys

            logger.record_tabular(
                "Number of train steps total",
                self._n_train_steps_total,
            )
            logger.record_tabular(
                "Number of env steps total",
                self._n_env_steps_total,
            )
            logger.record_tabular(
                "Number of rollouts total",
                self._n_rollouts_total,
            )

            times_itrs = gt.get_times().stamps.itrs
            train_time = times_itrs['train'][-1]
            sample_time = times_itrs['sample'][-1]
            eval_time = times_itrs['eval'][-1] if epoch > 0 else 0
            epoch_time = train_time + sample_time + eval_time
            total_time = gt.get_times().total

            logger.record_tabular('Train Time (s)', train_time)
            logger.record_tabular('(Previous) Eval Time (s)', eval_time)
            logger.record_tabular('Sample Time (s)', sample_time)
            logger.record_tabular('Epoch Time (s)', epoch_time)
            logger.record_tabular('Total Train Time (s)', total_time)

            logger.record_tabular("Epoch", epoch)

            # tensorboard stuff
            _writer = self._writer
            for k, v_str in logger._tabular:

                if k == 'Epoch': continue

                v = float(v_str)
                if k.endswith('Loss'):
                    _writer.add_scalar('Loss/{}'.format(k), v, epoch)
                elif k.endswith('Max'):
                    prefix = k[:-4]
                    _writer.add_scalar('{}/{}'.format(prefix, k), v, epoch)
                elif k.endswith('Min'):
                    prefix = k[:-4]
                    _writer.add_scalar('{}/{}'.format(prefix, k), v, epoch)
                elif k.endswith('Std'):
                    prefix = k[:-4]
                    _writer.add_scalar('{}/{}'.format(prefix, k), v, epoch)
                elif k.endswith('Mean'):
                    prefix = k[:-5]
                    _writer.add_scalar('{}/{}'.format(prefix, k), v, epoch)
                elif 'Time' in k:
                    _writer.add_scalar('Time/{}'.format(k), v, epoch)
                elif k.startswith('Num'):
                    _writer.add_scalar('Number/{}'.format(k), v, epoch)
                elif k.startswith('Exploration'):
                    _writer.add_scalar('Exploration/{}'.format(k), v, epoch)
                elif k.startswith('Test'):
                    _writer.add_scalar('Test/{}'.format(k), v, epoch)
                else:
                    _writer.add_scalar(k, v, epoch)

            _writer.file_writer.flush()

            logger.dump_tabular(with_prefix=False, with_timestamp=False)
        else:
            logger.log("Skipping eval for now.")