Пример #1
0
    def logging(self):
        """Logging the training information.
        """
        if self.params.log_tabular and self.iter <= self.params.n_itr:
            if self.step_count % self.params.log_interval == 0:
                self.iter += 1
                logger.log(' ')
                tabular.record('StepNum', self.step_count)
                record_num = 0
                if self.params.log_dir is not None:
                    if self.step_count == self.params.log_interval:  # first time logging
                        best_actions = []
                    else:
                        with open(self.params.log_dir + '/best_actions.p',
                                  'rb') as f:
                            best_actions = pickle.load(f)

                    best_actions.append(
                        np.array([x.get() for x in self.top_paths.pq[0][0]]))
                    with open(self.params.log_dir + '/best_actions.p',
                              'wb') as f:
                        pickle.dump(best_actions, f)

                for (topi, path) in enumerate(self.top_paths):
                    tabular.record('reward ' + str(topi), path[1])
                    record_num += 1

                for topi_left in range(record_num, self.top_paths.N):
                    tabular.record('reward ' + str(topi_left), 0)
                logger.log(tabular)
                logger.dump_all(self.step_count)
                tabular.clear()
Пример #2
0
    def _train(self,
               n_epochs,
               n_epoch_cycles,
               batch_size,
               plot,
               store_paths,
               pause_for_plot,
               start_epoch=0):
        """Start actual training.

        Args:
            n_epochs(int): Number of epochs.
            n_epoch_cycles(int): Number of batches of samples in each epoch.
                This is only useful for off-policy algorithm.
                For on-policy algorithm this value should always be 1.
            batch_size(int): Number of steps in batch.
            plot(bool): Visualize policy by doing rollout after each epoch.
            store_paths(bool): Save paths in snapshot.
            pause_for_plot(bool): Pause for plot.
            start_epoch: (internal) The starting epoch.
                Use for experiment resuming.

        Returns:
            The average return in last epoch cycle.

        """
        assert self.has_setup, ('Use Runner.setup() to setup runner before '
                                'training.')

        # Save arguments for restore
        self.train_args = SimpleNamespace(n_epochs=n_epochs,
                                          n_epoch_cycles=n_epoch_cycles,
                                          batch_size=batch_size,
                                          plot=plot,
                                          store_paths=store_paths,
                                          pause_for_plot=pause_for_plot,
                                          start_epoch=start_epoch)

        self.start_worker()

        self.start_time = time.time()
        itr = start_epoch * n_epoch_cycles

        last_return = None
        for epoch in range(start_epoch, n_epochs):
            self.itr_start_time = time.time()
            paths = None
            with logger.prefix('epoch #%d | ' % epoch):
                for cycle in range(n_epoch_cycles):
                    paths = self.obtain_samples(itr, batch_size)
                    last_return = self.algo.train_once(itr, paths)
                    itr += 1
                self.save(epoch, paths if store_paths else None)
                self.log_diagnostics(pause_for_plot)
                logger.dump_all(itr)
                tabular.clear()

        self.shutdown_worker()

        return last_return
Пример #3
0
    def step_epochs(self):
        """Step through each epoch.

        This function returns a magic generator. When iterated through, this
        generator automatically performs services such as snapshotting and log
        management. It is used inside train() in each algorithm.

        The generator initializes two variables: `self.step_itr` and
        `self.step_path`. To use the generator, these two have to be
        updated manually in each epoch, as the example shows below.

        Yields:
            int: The next training epoch.

        Examples:
            for epoch in runner.step_epochs():
                runner.step_path = runner.obtain_samples(...)
                self.train_once(...)
                runner.step_itr += 1

        """
        self._start_worker()
        self._start_time = time.time()
        self.step_itr = self._stats.total_itr
        self.step_path = None

        # Used by integration tests to ensure examples can run one epoch.
        n_epochs = int(
            os.environ.get('GARAGE_EXAMPLE_TEST_N_EPOCHS',
                           self._train_args.n_epochs))

        logger.log('Obtaining samples...')

        suffix = str(uuid.uuid1())
        src = Path(self._snapshotter.snapshot_dir)
        dstfile = f"{src.name}_{suffix}.tar.xz"
        for epoch in range(self._train_args.start_epoch, n_epochs):
            self._itr_start_time = time.time()
            with logger.prefix('epoch #%d | ' % epoch):
                yield epoch
                save_path = (self.step_path
                             if self._train_args.store_paths else None)

                self._stats.last_path = save_path
                self._stats.total_epoch = epoch
                self._stats.total_itr = self.step_itr

                self.save(epoch)
                self.log_diagnostics(self._train_args.pause_for_plot)
                logger.dump_all(self.step_itr)
                tabular.clear()
Пример #4
0
    def train(self, num_iter, dump=False):

        start = time.time()
        for i in range(num_iter):
            with logger.prefix(' | Iteration {} |'.format(i)):
                t1 = time.time()
                self.train_step()
                t2 = time.time()
                print('total time of one step', t2 - t1)
                print('iter ', i, ' done')
                if dump:
                    logger.log(tabular)
                    logger.dump_all(i)
                    tabular.clear()
        return
Пример #5
0
    def step_epochs(self):
        """Step through each epoch.

        This function returns a magic generator. When iterated through, this
        generator automatically performs services such as snapshotting and log
        management. It is used inside train() in each algorithm.

        The generator initializes two variables: `self.step_itr` and
        `self.step_path`. To use the generator, these two have to be
        updated manually in each epoch, as the example shows below.

        Yields:
            int: The next training epoch.

        Examples:
            for epoch in runner.step_epochs():
                runner.step_path = runner.obtain_samples(...)
                self.train_once(...)
                runner.step_itr += 1

        """
        try:
            self._start_worker()
            self._start_time = time.time()
            self.step_itr = (self.train_args.start_epoch *
                             self.train_args.n_epoch_cycles)
            self.step_path = None

            for epoch in range(self.train_args.start_epoch,
                               self.train_args.n_epochs):
                self._itr_start_time = time.time()
                with logger.prefix('epoch #%d | ' % epoch):
                    yield epoch
                    save_path = (self.step_path
                                 if self.train_args.store_paths else None)
                    print("save_path:", save_path)
                    self.save(epoch, save_path)
                    self.log_diagnostics(self.train_args.pause_for_plot)
                    logger.dump_all(self.step_itr)
                    tabular.clear()
        finally:
            self._shutdown_worker()
Пример #6
0
    def step_epochs(self):
        """Generator for training.

        This function serves as a generator. It is used to separate
        services such as snapshotting, sampler control from the actual
        training loop. It is used inside train() in each algorithm.

        The generator initializes two variables: `self.step_itr` and
        `self.step_path`. To use the generator, these two have to be
        updated manually in each epoch, as the example shows below.

        Yields:
            int: The next training epoch.

        Examples:
            for epoch in runner.step_epochs():
                runner.step_path = runner.obtain_samples(...)
                self.train_once(...)
                runner.step_itr += 1

        """
        try:
            self._start_worker()
            self._start_time = time.time()
            self.step_itr = (self.train_args.start_epoch *
                             self.train_args.n_epoch_cycles)
            self.step_path = None

            for epoch in range(self.train_args.start_epoch,
                               self.train_args.n_epochs):
                self._itr_start_time = time.time()
                with logger.prefix('epoch #%d | ' % epoch):
                    yield epoch
                    save_path = (self.step_path
                                 if self.train_args.store_paths else None)
                    self.save(epoch, save_path)
                    self.log_diagnostics(self.train_args.pause_for_plot)
                    logger.dump_all(self.step_itr)
                    tabular.clear()
        finally:
            self._shutdown_worker()
Пример #7
0
This example demonstrates how to log a simple progress metric using dowel.

The metric is simultaneously sent to the screen, a CSV files, a text log file
and TensorBoard.
"""
import time

import dowel
from dowel import logger, tabular

logger.add_output(dowel.StdOutput())
logger.add_output(dowel.CsvOutput('progress.csv'))
logger.add_output(dowel.TextOutput('progress.txt'))
logger.add_output(dowel.TensorBoardOutput('tensorboard_logdir'))

logger.log('Starting up...')
for i in range(1000):
    logger.push_prefix('itr {}: '.format(i))
    logger.log('Running training step')

    time.sleep(0.01)  # Tensorboard doesn't like output to be too fast.

    tabular.record('itr', i)
    tabular.record('loss', 100.0 / (2 + i))
    logger.log(tabular)

    logger.pop_prefix()
    logger.dump_all()

logger.remove_all()
Пример #8
0
def main(args):
    import dowel
    from dowel import logger, tabular
    training.utility.set_up_logging()
    stages = {'500k': 'model.ckpt-2502500'}
    # stages = {'1000k': 'model.ckpt-5005000'}
    num_traj = 10
    # stages = {'100k': 'model.ckpt-500500', '500k': 'model.ckpt-2502500'}
    # stages = {'1M': 'model.ckpt-5005000'}
    # stages = {'final': 'model.ckpt-2652650'}
    # stages = {'100k': 'model.ckpt-500500', '500k': 'model.ckpt-2502500', '1M': 'model.ckpt-5005000'}
    # stages = {'100k': 'model.ckpt-600500', '500k': 'model.ckpt-3002500',
    #           'final':'model.ckpt-3182650'}
    # methods = ['weighted_100']
    # methods = ['aug7']
    methods = ['baseline3']
    # rival_method = 'baseline3'
    rival_method = 'aug7'
    rival_runs = 5
    base_dir = 'benchmark'
    envs = ['finger_spin']
    # envs = ['cartpole_swingup']
    # envs = ['finger_spin', 'cartpole_swingup','cheetah_run', 'cup_catch']
    # envs = ['finger_spin', 'cartpole_swingup', 'reacher_easy', 'cheetah_run']
    # envs = ['cartpole_swingup', 'cheetah_run', 'walker_walk', 'cup_catch']
    # envs = ['finger_spin', 'cartpole_swingup', 'reacher_easy', 'cheetah_run', 'walker_walk', 'cup_catch']
    if not check_finish(base_dir, stages, methods, envs, args.num_runs):
        exit()

    for pref, chkpt in stages.items():
        print(pref, 'begin')
        logger.add_output(dowel.StdOutput())
        logger.add_output(dowel.CsvOutput('benchmark_{}.csv'.format(pref)))
        for env in envs:
            tabular.record('Env', env)
            for method in methods:
                for id in range(rival_runs):
                    means, stds, all_scores = [], [], []
                    with args.params.unlocked:
                        args.params.chkpt = chkpt
                        args.params.tasks = [env]
                        args.params.planner_horizon = 12
                        args.params.eval_ratio = 1 / num_traj
                        # args.params.r_loss = 'contra'
                        # args.params.aug = 'rad'
                        args.params.planner = 'dual2'
                        args.params.rival = '{}/{}/00{}'.format(
                            env, rival_method, id + 1)

                    experiment = training.Experiment(
                        os.path.join(base_dir, env, method),
                        process_fn=functools.partial(process, args=args),
                        num_runs=args.num_runs,
                        ping_every=args.ping_every,
                        resume_runs=args.resume_runs,
                        planner=args.params.planner,
                        task_str=env)
                    for i, run in enumerate(experiment):
                        scores = []
                        for i, unused_score in enumerate(run):
                            print('unused', unused_score)
                            scores.append(unused_score)
                            if i == num_traj - 1:
                                break
                        means.append(np.mean(scores))
                        stds.append(np.std(scores))
                        all_scores.append(scores)
                        print(means)
                        # if args.params.planner != 'cem':
                        #     exit()
                        if args.params.planner == 'cem_eval':
                            np.save(
                                os.path.join(
                                    args.logdir, env, method,
                                    '00{}/scores_{}_cem.npy'.format(i, pref)),
                                np.array(all_scores))
                    mean, std = np.mean(means), np.std(means)
                    print('{}    {}+/-{}'.format(method, int(mean), int(std)))
                    if mean > 0:
                        tabular.record(method,
                                       '{}+/-{}'.format(int(mean), int(std)))
                    np.save(
                        os.path.join(args.logdir, env, method,
                                     'scores_{}.npy'.format(pref)),
                        np.array(all_scores))
            logger.log(tabular)
            logger.dump_all()
        logger.remove_all()
Пример #9
0
def main(args):

    if args.output_folder is not None:
        if not os.path.exists(args.output_folder):
            raise ValueError(
                "The folder with the training files does not exist")

    policy_filename = os.path.join(args.output_folder, 'policy.th')
    dynamics_filename = os.path.join(args.output_folder, 'dynamics.th')
    config_filename = os.path.join(args.output_folder, 'config.json')
    # eval_filename = os.path.join(args.output_folder, 'eval.npz')

    text_log_file = os.path.join(args.output_folder, 'test_log.txt')
    tabular_log_file = os.path.join(args.output_folder, 'test_result.csv')

    output_test_folder = args.output_folder + "test" if args.output_folder[
        -1] == '/' else args.output_folder + "/test"

    if os.path.exists(output_test_folder):
        shutil.rmtree(output_test_folder)
    os.makedirs(output_test_folder)

    # Set up logger
    logger.add_output(dowel.StdOutput())
    logger.add_output(dowel.TextOutput(text_log_file))
    logger.add_output(dowel.CsvOutput(tabular_log_file))
    logger.add_output(
        dowel.TensorBoardOutput(output_test_folder, x_axis='Batch'))
    logger.log('Logging to {}'.format(output_test_folder))

    with open(config_filename, 'r') as f:
        config = json.load(f)

    seed = config["seed"] if "seed" in config else args.seed
    if seed is not None:
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        random.seed(args.seed)

    # Metaworld
    if config['env-name'].startswith('Metaworld'):
        env_name = config['env-name'].replace("Metaworld-", "")
        metaworld = __import__('metaworld')
        class_ = getattr(metaworld, env_name)
        metaworld_benchmark = class_()
        for name, env_cls in metaworld_benchmark.train_classes.items():
            env = env_cls()
            env.close()
        benchmark = metaworld_benchmark
    # Other gym envs
    else:
        env_name = config['env-name']
        env = gym.make(config['env-name'], **config.get('env-kwargs', {}))
        env.close()
        benchmark = None

    # Policy
    policy = get_policy_for_env(env,
                                hidden_sizes=config['hidden-sizes'],
                                nonlinearity=config['nonlinearity'])

    with open(policy_filename, 'rb') as f:
        state_dict = torch.load(f, map_location=torch.device(args.device))
        policy.load_state_dict(state_dict)
    policy.share_memory()

    # Dynamics
    dynamics = get_dynamics_for_env(env,
                                    config['use_vime'],
                                    config['use_inv_vime'],
                                    args.device,
                                    config,
                                    benchmark=benchmark)
    inverse_dynamics = config['use_inv_vime']
    use_dynamics = config["use_vime"] or config["use_inv_vime"]

    if use_dynamics:
        with open(dynamics_filename, 'rb') as f:
            state_dict = torch.load(f, map_location=torch.device(args.device))
            dynamics.load_state_dict(state_dict)
        dynamics.share_memory()

    # Eta
    if config['adapt_eta']:
        eta_value = torch.Tensor([config["adapted-eta"]])
    else:
        eta_value = torch.Tensor([config["eta"]])
    eta_value = torch.log(eta_value / (1 - eta_value))
    eta = EtaParameter(eta_value, adapt_eta=config['adapt_eta'])
    eta.share_memory()

    # Baseline
    baseline = LinearFeatureBaseline(get_input_size(env))

    # Sampler
    normalize_spaces = config[
        "normalize-spaces"] if "normalize-spaces" in config else True
    act_prev_mean = mp.Manager().list()
    obs_prev_mean = mp.Manager().list()

    # Sampler
    if normalize_spaces:
        obs_prev_mean.append({
            "mean": torch.Tensor(config["obs_mean"]),
            "std": torch.Tensor(config["obs_std"])
        })
        act_prev_mean.append({
            "mean": torch.Tensor(config["act_mean"]),
            "std": torch.Tensor(config["act_std"])
        })

    epochs_counter = mp.Value('i', 100)

    sampler = MultiTaskSampler(
        config['env-name'],
        env_kwargs=config.get('env-kwargs', {}),
        batch_size=config['fast-batch-size'],  # TODO
        policy=policy,
        baseline=baseline,
        dynamics=dynamics,
        inverse_dynamics=inverse_dynamics,
        env=env,
        seed=args.seed,
        num_workers=args.num_workers,
        epochs_counter=epochs_counter,
        act_prev_mean=act_prev_mean,
        obs_prev_mean=obs_prev_mean,
        # rew_prev_mean=rew_prev_mean,
        eta=eta,
        benchmark=benchmark,
        normalize_spaces=normalize_spaces)

    logs = {'tasks': []}
    train_returns, valid_returns = [], []
    for batch in trange(args.num_batches):
        tasks = sampler.sample_test_tasks(num_tasks=config['meta-batch-size'])
        train_episodes, valid_episodes = sampler.sample(
            tasks,
            num_steps=args.num_steps,
            fast_lr=config['fast-lr'],
            gamma=config['gamma'],
            gae_lambda=config['gae-lambda'],
            device=args.device)

        logs['tasks'].extend(tasks)
        train_returns.append(get_returns(train_episodes[0]))
        valid_returns.append(get_returns(valid_episodes))

        logs['train_returns'] = np.concatenate(train_returns, axis=0)
        logs['valid_returns'] = np.concatenate(valid_returns, axis=0)

        tabular.record("Batch", batch)

        log_returns(train_episodes,
                    valid_episodes,
                    batch,
                    log_dynamics=use_dynamics,
                    benchmark=benchmark,
                    env=env,
                    env_name=env_name,
                    is_testing=True)
        log_trajectories(config['env-name'], output_test_folder,
                         train_episodes, valid_episodes, batch)

        logger.log(tabular)

        logger.dump_all()

        # with open(eval_filename + "_" + str(batch), 'wb') as f:
        #     np.savez(f, **logs)

    logger.remove_all()
Пример #10
0
    def train(self,
              n_epochs,
              batch_size=None,
              plot=False,
              store_episodes=False,
              pause_for_plot=False):
        """Start training.

        Args:
            n_epochs (int): Number of epochs.
            batch_size (int or None): Number of environment steps in one batch.
            plot (bool): Visualize an episode from the policy after each epoch.
            store_episodes (bool): Save episodes in snapshot.
            pause_for_plot (bool): Pause for plot.

        Raises:
            NotSetupError: If train() is called before setup().

        Returns:
            float: The average return in last epoch cycle.

        """
        self.batch_size = batch_size
        self.store_episodes = store_episodes
        self.pause_for_plot = pause_for_plot
        if not self._has_setup:
            raise NotSetupError(
                'Use setup() to setup trainer before training.')

        self._plot = plot

        returns = []
        for itr in range(self.start_itr, self.n_itr):
            with logger.prefix(f'itr #{itr} | '):

                # train policy
                self._algo.train(self)

                # compute irl and update reward function
                logger.log('Obtaining paths...')
                paths = self.obtain_samples(itr)
                logger.log('Processing paths...')
                paths = self._train_irl(paths, itr=itr)
                samples_data = self.process_samples(itr, paths)

                logger.log('Logging diagnostics...')
                logger.log('Time %.2f s' % (time.time() - self._start_time))
                logger.log('EpochTime %.2f s' %
                           (time.time() - self._itr_start_time))
                tabular.record('TotalEnvSteps', self._stats.total_env_steps)
                self.log_diagnostics(paths)
                logger.log('Optimizing policy...')

                logger.log('Saving snapshot...')
                self.save(itr, paths=paths)
                logger.log('Saved')
                tabular.record('Time', time.time() - self._start_time)
                tabular.record('ItrTime', time.time() - self._itr_start_time)
                logger.dump_all(self.step_itr)
                tabular.clear()

        self._shutdown_worker()

        return