Exemplo n.º 1
0
class TrainingLogger(object):
    def __init__(self, log_dir):
        self._writer = SummaryWriter(log_dir)
        self._metrics = defaultdict(tf.metrics.Mean)

    def __getitem__(self, item):
        return self._metrics[item]

    def __setitem__(self, key, value):
        self._metrics[key] = value

    def log_evaluation_summary(self, summary, step):
        for k, v in summary.items():
            self._writer.add_scalar(k, float(v), step)
        self._writer.flush()

    def log_metrics(self, step):
        print("Training step {} summary:".format(step))
        for k, v in self._metrics.items():
            print("{:<40} {:<.2f}".format(k, float(v.result())))
            self._writer.add_scalar(k, float(v.result()), step)
            v.reset_states()
        self._writer.flush()

    def log_video(self, images, step):
        video = np.expand_dims(np.transpose(images, [0, 3, 1, 2]), axis=0)
        self._writer.add_video('Evaluation policy', video, step)
        self._writer.flush()
Exemplo n.º 2
0
def test_tensorboardX(run_manager):
    wandb.tensorboard.patch(tensorboardX=True)

    fig = plt.figure()
    c1 = plt.Circle((0.2, 0.5), 0.2, color='r')

    ax = plt.gca()
    ax.add_patch(c1)
    plt.axis('scaled')

    writer = SummaryWriter()
    writer.add_figure('matplotlib', fig, 0)
    writer.add_video('video', np.random.random(size=(1, 5, 3, 28, 28)), 0)
    writer.add_scalars('data/scalar_group', {'foo': 10, 'bar': 100}, 1)
    writer.close()
    run_manager.test_shutdown()
    rows = run_manager.run.history.rows
    events = []
    for root, dirs, files in os.walk(run_manager.run.dir):
        print("ROOT", root, files)
        for file in files:
            if "tfevent" in file:
                events.append(file)
    assert rows[0]["matplotlib"]['width'] == 640
    assert rows[0]["matplotlib"]['height'] == 480
    assert rows[0]["matplotlib"]['_type'] == 'images'
    assert rows[0]["video"]['_type'] == 'videos'
    assert rows[1]["data/scalar_group/foo"] == 10
    assert rows[1]["data/scalar_group/bar"] == 100
    assert len(events) == 3
Exemplo n.º 3
0
class TrainingLogger:
    """
    Copy-pasted from 'Berkely CS285'
    (https://github.com/yardenas/berkeley-deep-rl/tree/f741338c085ee5b329f3c9dd05e93e89bc43574a)
    and used for dumping statistics to to TensorBoard readable file.
    """
    def __init__(self, log_dir, fps):
        self._log_dir = log_dir
        self.fps = fps
        logger.info('Logging training data to: ' + log_dir)
        self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1)

    def log_scalar(self, scalar, name, step):
        self._summ_writer.add_scalar('{}'.format(name), scalar, step)

    def log_scalars(self, scalar_dict, group_name, step):
        """Will log all scalars in the same plot."""
        self._summ_writer.add_scalars('{}'.format(group_name), scalar_dict,
                                      step)

    def log_image(self, image, name, step):
        assert (len(image.shape) == 3)  # [C, H, W]
        self._summ_writer.add_image('{}'.format(name), image, step)

    def log_video(self, video_frames, name, step):
        assert len(
            video_frames.shape
        ) == 5, "Need [N, T, C, H, W] input tensor for video logging!"
        self._summ_writer.add_video('{}'.format(name),
                                    video_frames,
                                    step,
                                    fps=self.fps)

    def log_figures(self, figure, name, step):
        """figure: matplotlib.pyplot figure handle"""
        assert figure.shape[
            0] > 0, "Figure logging requires input shape [batch x figures]!"
        self._summ_writer.add_figure('{}'.format(name), figure, step)

    def log_figure(self, figure, name, step):
        """figure: matplotlib.pyplot figure handle"""
        self._summ_writer.add_figure('{}'.format(name), figure, step)

    def log_graph(self, graph, step, phase):
        """figure: matplotlib.pyplot figure handle"""
        self._summ_writer.add_graph(graph)

    def log_histogram(self, data, name, step):
        self._summ_writer.add_histogram(name, data, step)

    def dump_scalars(self, log_path=None):
        log_path = os.path.join(
            self._log_dir,
            "scalar_data.json") if log_path is None else log_path
        self._summ_writer.export_scalars_to_json(log_path)

    def flush(self):
        self._summ_writer.flush()
Exemplo n.º 4
0
def main():

    # Connecting ClearML with the current process,
    # from here on everything is logged automatically
    task = Task.init(project_name="examples",
                     task_name="pytorch with video tensorboardX")

    writer = SummaryWriter("runs")
    writer.add_text("TEXT", "This is some text", 0)

    # Make a video that simply fades grey colors
    video = (torch.sin(torch.arange(0, 1000) / 100) + 1) / 2 * 255
    video = video.byte().view(1, -1, 1, 1, 1).expand(1, -1, 3, 64, 64)

    writer.add_video("my_video", video, 0, fps=50)
Exemplo n.º 5
0
class TBXLoggerCallback(TrainingSingleWorkerLoggingCallback):
    """Logs Train results in TensorboardX format.

    Args:
        logdir (Optional[str]): Path to directory where the results file
            should be. If None, will be set by the Trainer.
        worker_to_log (int): Worker index to log. By default, will log the
            worker with index 0.
    """

    VALID_SUMMARY_TYPES: Tuple[type] = (int, float, np.float32, np.float64,
                                        np.int32, np.int64)
    IGNORE_KEYS: Set[str] = {PID, TIMESTAMP, TIME_TOTAL_S, TRAINING_ITERATION}

    def start_training(self, logdir: str, **info):
        super().start_training(logdir)

        try:
            from tensorboardX import SummaryWriter
        except ImportError:
            if log_once("tbx-install"):
                warnings.warn(
                    "pip install 'tensorboardX' to see TensorBoard files.")
            raise

        self._file_writer = SummaryWriter(self.logdir, flush_secs=30)

    def handle_result(self, results: List[Dict], **info):
        result = results[self._workers_to_log]
        step = result[TRAINING_ITERATION]
        result = {k: v for k, v in result.items() if k not in self.IGNORE_KEYS}
        flat_result = flatten_dict(result, delimiter="/")
        path = ["ray", "train"]

        # same logic as in ray.tune.logger.TBXLogger
        for attr, value in flat_result.items():
            full_attr = "/".join(path + [attr])
            if (isinstance(value, self.VALID_SUMMARY_TYPES)
                    and not np.isnan(value)):
                self._file_writer.add_scalar(full_attr,
                                             value,
                                             global_step=step)
            elif ((isinstance(value, list) and len(value) > 0)
                  or (isinstance(value, np.ndarray) and value.size > 0)):

                # Must be video
                if isinstance(value, np.ndarray) and value.ndim == 5:
                    self._file_writer.add_video(full_attr,
                                                value,
                                                global_step=step,
                                                fps=20)
                    continue

                try:
                    self._file_writer.add_histogram(full_attr,
                                                    value,
                                                    global_step=step)
                # In case TensorboardX still doesn't think it's a valid value
                # (e.g. `[[]]`), warn and move on.
                except (ValueError, TypeError):
                    if log_once("invalid_tbx_value"):
                        warnings.warn(
                            "You are trying to log an invalid value ({}={}) "
                            "via {}!".format(full_attr, value,
                                             type(self).__name__))
        self._file_writer.flush()

    def finish_training(self, error: bool = False, **info):
        self._file_writer.close()
Exemplo n.º 6
0
import wandb
import numpy as np
from tensorboardX import SummaryWriter

wandb.init(tensorboard=True)

writer = SummaryWriter()

writer.add_video("video", np.random.random(size=(1, 5, 3, 28, 28)))

wandb.log({"acc": 1})
Exemplo n.º 7
0
class BaseLogger(object):
    def __init__(self, args, dataset_len, pixel_dict):
        def round_down(x, m):
            """Round x down to a multiple of m."""
            return int(m * round(float(x) / m))

        self.args = args
        self.batch_size = args.batch_size
        self.dataset_len = dataset_len
        self.device = args.device
        self.img_format = args.img_format
        self.save_dir = args.save_dir if args.is_training else args.results_dir
        self.do_classify = args.do_classify
        self.num_visuals = args.num_visuals
        self.log_path = os.path.join(self.save_dir, '{}.log'.format(args.name))
        log_dir = os.path.join(
            'logs', args.name + '_' + datetime.now().strftime('%b%d_%H%M'))
        self.summary_writer = SummaryWriter(log_dir=log_dir)

        self.epoch = args.start_epoch
        # Current iteration in epoch (i.e., # examples seen in the current epoch)
        self.iter = 0
        # Current iteration overall (i.e., total # of examples seen)
        self.global_step = round_down((self.epoch - 1) * dataset_len,
                                      args.batch_size)
        self.iter_start_time = None
        self.epoch_start_time = None
        self.pixel_dict = pixel_dict

    def _log_scalars(self, scalar_dict, print_to_stdout=True):
        """Log all values in a dict as scalars to TensorBoard."""
        for k, v in scalar_dict.items():
            if print_to_stdout:
                self.write('[{}: {:.3g}]'.format(k, v))
            k = k.replace('_', '/')  # Group in TensorBoard by phase
            self.summary_writer.add_scalar(k, v, self.global_step)

    def _plot_curves(self, curves_dict):
        """Plot all curves in a dict as RGB images to TensorBoard."""
        for name, curve in curves_dict.items():
            fig = plt.figure()
            ax = plt.gca()

            plot_type = name.split('_')[-1]
            ax.set_title(plot_type)
            if plot_type == 'PRC':
                precision, recall, _ = curve
                ax.step(recall, precision, color='b', alpha=0.2, where='post')
                ax.fill_between(recall,
                                precision,
                                step='post',
                                alpha=0.2,
                                color='b')
                ax.set_xlabel('Recall')
                ax.set_ylabel('Precision')
            elif plot_type == 'ROC':
                false_positive_rate, true_positive_rate, _ = curve
                ax.plot(false_positive_rate, true_positive_rate, color='b')
                ax.plot([0, 1], [0, 1], 'r--')
                ax.set_xlabel('False Positive Rate')
                ax.set_ylabel('True Positive Rate')
            else:
                ax.plot(curve[0], curve[1], color='b')

            ax.set_ylim([0.0, 1.05])
            ax.set_xlim([0.0, 1.0])

            fig.canvas.draw()

            curve_img = np.fromstring(fig.canvas.tostring_rgb(),
                                      dtype=np.uint8,
                                      sep='')
            curve_img = curve_img.reshape((3, ) +
                                          fig.canvas.get_width_height()[::-1])
            self.summary_writer.add_image(name.replace('_', '/'),
                                          curve_img,
                                          global_step=self.global_step)

    def visualize(self,
                  inputs,
                  cls_logits,
                  targets_dict,
                  phase,
                  unique_id=None):
        """Visualize predictions and targets in TensorBoard.

        Args:
            inputs: Inputs to the model.
            cls_logits: Classification logits predicted by the model.
            targets_dict: Dictionary of information about the target labels.
            phase: One of 'train', 'val', or 'test'.
            unique_id: A unique ID to append to every image title. Allows
              for displaying all visualizations separately on TensorBoard.

        Returns:
            Number of examples visualized to TensorBoard.
        """

        if self.pixel_dict is None:
            # Set pixel_dict to None to bypass visualization
            return 0

        cls_logits = cls_logits.detach().to('cpu')

        cls_probs = F.sigmoid(cls_logits).numpy()

        is_3d = inputs.dim() > 4
        num_visualized = 0
        for i in range(self.num_visuals):
            if i >= inputs.shape[0]:
                break

            input_np = util.un_normalize(inputs[i], self.img_format,
                                         self.pixel_dict)
            input_np = input_np.astype(np.float32) / 255.

            mask_np = None
            output_np = None

            label = 'abnormal' if targets_dict['is_abnormal'][i] else 'normal'
            visuals_np = input_np
            title = 'input'

            tag = '{}/{}/{}_{}_{:.4f}'.format(phase, title, label,
                                              targets_dict['dset_path'][i],
                                              cls_probs[i][0])
            if unique_id is not None:
                tag += '_{}'.format(unique_id)

            # Reshaping to B, C, T, H, W
            visuals_np = np.expand_dims(visuals_np, 0)
            if is_3d:
                self.summary_writer.add_video(tag, visuals_np,
                                              self.global_step)
            else:
                self.summary_writer.add_image(tag, visuals_np,
                                              self.global_step)

            num_visualized += 1

        return num_visualized

    def write(self, message, print_to_stdout=True):
        """Write a message to the log. If print_to_stdout is True, also print to stdout."""
        with open(self.log_path, 'a') as log_file:
            log_file.write(message + '\n')
        if print_to_stdout:
            print(message)

    def start_iter(self):
        """Log info for start of an iteration."""
        raise NotImplementedError

    def end_iter(self):
        """Log info for end of an iteration."""
        raise NotImplementedError

    def start_epoch(self):
        """Log info for start of an epoch."""
        raise NotImplementedError

    def end_epoch(self, metrics, curves):
        """Log info for end of an epoch. Save model parameters and update learning rate."""
        raise NotImplementedError
Exemplo n.º 8
0
class Logger(object):
    def __init__(self, log_dir, use_tb=True, config='rl'):
        self._log_dir = log_dir
        if use_tb:
            tb_dir = os.path.join(log_dir, 'tb')
            if os.path.exists(tb_dir):
                shutil.rmtree(tb_dir)
            self._sw = SummaryWriter(tb_dir)
        else:
            self._sw = None
        self._train_mg = MetersGroup(
            os.path.join(log_dir, 'train.log'),
            formating=FORMAT_CONFIG[config]['train']
        )
        self._eval_mg = MetersGroup(
            os.path.join(log_dir, 'eval.log'),
            formating=FORMAT_CONFIG[config]['eval']
        )

    def _try_sw_log(self, key, value, step):
        if self._sw is not None:
            self._sw.add_scalar(key, value, step)

    def _try_sw_log_image(self, key, image, step):
        if self._sw is not None:
            assert image.dim() == 3
            grid = torchvision.utils.make_grid(image.unsqueeze(1))
            self._sw.add_image(key, grid, step)

    def _try_sw_log_video(self, key, frames, step):
        if self._sw is not None:
            frames = torch.from_numpy(np.array(frames))
            frames = frames.unsqueeze(0)
            self._sw.add_video(key, frames, step, fps=30)

    def _try_sw_log_histogram(self, key, histogram, step):
        if self._sw is not None:
            self._sw.add_histogram(key, histogram, step)

    def log(self, key, value, step, n=1):
        assert key.startswith('train') or key.startswith('eval')
        if type(value) == torch.Tensor:
            value = value.item()
        self._try_sw_log(key, value / n, step)
        mg = self._train_mg if key.startswith('train') else self._eval_mg
        mg.log(key, value, n)

    def log_param(self, key, param, step):
        self.log_histogram(key + '_w', param.weight.data, step)
        if hasattr(param.weight, 'grad') and param.weight.grad is not None:
            self.log_histogram(key + '_w_g', param.weight.grad.data, step)
        if hasattr(param, 'bias'):
            self.log_histogram(key + '_b', param.bias.data, step)
            if hasattr(param.bias, 'grad') and param.bias.grad is not None:
                self.log_histogram(key + '_b_g', param.bias.grad.data, step)

    def log_image(self, key, image, step):
        assert key.startswith('train') or key.startswith('eval')
        self._try_sw_log_image(key, image, step)

    def log_video(self, key, frames, step):
        assert key.startswith('train') or key.startswith('eval')
        self._try_sw_log_video(key, frames, step)

    def log_histogram(self, key, histogram, step):
        assert key.startswith('train') or key.startswith('eval')
        self._try_sw_log_histogram(key, histogram, step)

    def dump(self, step):
        self._train_mg.dump(step, 'train')
        self._eval_mg.dump(step, 'eval')
Exemplo n.º 9
0
class TBXLogger(Logger):
    """TensorBoardX Logger.

    Note that hparams will be written only after a trial has terminated.
    This logger automatically flattens nested dicts to show on TensorBoard:

        {"a": {"b": 1, "c": 2}} -> {"a/b": 1, "a/c": 2}
    """

    VALID_HPARAMS = (str, bool, np.bool8, int, np.integer, float, list,
                     type(None))

    def _init(self):
        try:
            from tensorboardX import SummaryWriter
        except ImportError:
            if log_once("tbx-install"):
                logger.info(
                    "pip install 'ray[tune]' to see TensorBoard files.")
            raise
        self._file_writer = SummaryWriter(self.logdir, flush_secs=30)
        self.last_result = None

    def on_result(self, result: Dict):
        step = result.get(TIMESTEPS_TOTAL) or result[TRAINING_ITERATION]

        tmp = result.copy()
        for k in [
                "config", "pid", "timestamp", TIME_TOTAL_S, TRAINING_ITERATION
        ]:
            if k in tmp:
                del tmp[k]  # not useful to log these

        flat_result = flatten_dict(tmp, delimiter="/")
        path = ["ray", "tune"]
        valid_result = {}

        for attr, value in flat_result.items():
            full_attr = "/".join(path + [attr])
            if (isinstance(value, tuple(VALID_SUMMARY_TYPES))
                    and not np.isnan(value)):
                valid_result[full_attr] = value
                self._file_writer.add_scalar(full_attr,
                                             value,
                                             global_step=step)
            elif ((isinstance(value, list) and len(value) > 0)
                  or (isinstance(value, np.ndarray) and value.size > 0)):
                valid_result[full_attr] = value

                # Must be video
                if isinstance(value, np.ndarray) and value.ndim == 5:
                    self._file_writer.add_video(full_attr,
                                                value,
                                                global_step=step,
                                                fps=20)
                    continue

                try:
                    self._file_writer.add_histogram(full_attr,
                                                    value,
                                                    global_step=step)
                # In case TensorboardX still doesn't think it's a valid value
                # (e.g. `[[]]`), warn and move on.
                except (ValueError, TypeError):
                    if log_once("invalid_tbx_value"):
                        logger.warning(
                            "You are trying to log an invalid value ({}={}) "
                            "via {}!".format(full_attr, value,
                                             type(self).__name__))

        self.last_result = valid_result
        self._file_writer.flush()

    def flush(self):
        if self._file_writer is not None:
            self._file_writer.flush()

    def close(self):
        if self._file_writer is not None:
            if self.trial and self.trial.evaluated_params and self.last_result:
                flat_result = flatten_dict(self.last_result, delimiter="/")
                scrubbed_result = {
                    k: value
                    for k, value in flat_result.items()
                    if isinstance(value, tuple(VALID_SUMMARY_TYPES))
                }
                self._try_log_hparams(scrubbed_result)
            self._file_writer.close()

    def _try_log_hparams(self, result):
        # TBX currently errors if the hparams value is None.
        flat_params = flatten_dict(self.trial.evaluated_params)
        scrubbed_params = {
            k: v
            for k, v in flat_params.items()
            if isinstance(v, self.VALID_HPARAMS)
        }

        removed = {
            k: v
            for k, v in flat_params.items()
            if not isinstance(v, self.VALID_HPARAMS)
        }
        if removed:
            logger.info(
                "Removed the following hyperparameter values when "
                "logging to tensorboard: %s", str(removed))

        from tensorboardX.summary import hparams
        try:
            experiment_tag, session_start_tag, session_end_tag = hparams(
                hparam_dict=scrubbed_params, metric_dict=result)
            self._file_writer.file_writer.add_summary(experiment_tag)
            self._file_writer.file_writer.add_summary(session_start_tag)
            self._file_writer.file_writer.add_summary(session_end_tag)
        except Exception:
            logger.exception("TensorboardX failed to log hparams. "
                             "This may be due to an unsupported type "
                             "in the hyperparameter values.")
Exemplo n.º 10
0
class BaseAgent:
    def __init__(self,
                 ENV,
                 logdir_root='logs',
                 n_experience_episodes=1,
                 gamma=0.999,
                 epochs=1,
                 lr=0.001,
                 hidden_layer_neurons=128,
                 EPISODES=2000,
                 eval_period=50,
                 algorithm='REINFORCE',
                 noise=1.0,
                 gif_to_board=False,
                 fps=50,
                 batch_size=128,
                 LOSS_CLIPPING=LOSS_CLIPPING,
                 ENTROPY_LOSS=ENTROPY_LOSS):
        self.LOSS_CLIPPING = LOSS_CLIPPING
        self.ENTROPY_LOSS = ENTROPY_LOSS
        self.hidden_layer_neurons = hidden_layer_neurons
        self.batch_size = batch_size
        self.fps = fps
        self.gif_to_board = gif_to_board
        self.noise = noise
        self.last_eval = 0
        self.best_return = -np.inf
        self.eval_period = eval_period
        self.writer = None
        self.epsilon = 1e-12
        self.logdir_root = logdir_root
        self.EPISODES = EPISODES
        self.n_experience_episodes = n_experience_episodes
        self.episode = 0
        self.gamma = gamma
        self.epochs = epochs
        self.lr = lr
        self.logdir = self.get_log_name(ENV, algorithm, logdir_root)
        self.env = gym.make(ENV)

        if type(self.env.action_space) != gym.spaces.box.Box:
            self.nA = self.env.action_space.n
        else:
            print('Warning: El espacio de acción es continuo')
            self.nA = self.env.action_space.shape[0]
            self.logdir = self.logdir + '_' + str(self.noise)

        if type(self.env.observation_space) == gym.spaces.box.Box:
            self.nS = self.env.observation_space.shape[0]
        else:
            print('Warning: El espacio de observación no es continuo')
        self.model_train, self.model_predict = self.get_policy_model(
            lr=lr,
            hidden_layer_neurons=hidden_layer_neurons,
            input_shape=[self.nS],
            output_shape=self.nA)

        state_space_samples = np.array(
            [self.env.observation_space.sample() for x in range(10000)])
        self.scaler = sklearn.preprocessing.StandardScaler()
        self.scaler.fit(state_space_samples)

        self.reset_env()

    def get_policy_model(self,
                         lr=0.001,
                         hidden_layer_neurons=128,
                         input_shape=[4],
                         output_shape=2):
        pass

    def get_log_name(self, ENV, algorithm, logdir_root):
        name = logdir_root + '/'
        name += ENV + '/' + algorithm + '/'
        name += str(self.n_experience_episodes) + '_'
        name += str(self.epochs) + '_'
        name += str(self.batch_size) + '_'
        name += str(self.gamma) + '_'
        name += str(self.lr) + '_' + str(int(time()))
        return name

    def reset_env(self):
        # Se suma uno a la cantidad de episodios
        self.episode += 1
        # Se observa el primer estado
        self.observation = self.env.reset()
        # Se resetea la lista con los rewards
        self.reward = []

    def get_experience_episodes(self, return_ts=False):
        # Antes de llamar esta función hay que asegurarse de que el env esta reseteado
        last_observations = []
        observations = []
        observations_list = []
        actions = []
        actions_list = []
        predictions = []
        predictions_list = []
        rewards = []
        rewards_list = []
        discounted_rewards = []
        episodes_returns = []
        episodes_lenghts = []
        time_steps = []
        time_steps_list = []
        exp_episodes = 0
        ts_count = 0
        # Juega n_experience_episodes episodios
        while exp_episodes < self.n_experience_episodes:
            # Obtengo acción
            action, action_one_hot, prediction = self.get_action(eval=False)

            # Ejecuto acción
            observation, reward, done, info = self.env.step(action)

            # Guardo reward obtenido por acción
            self.reward.append(reward)

            # Notar que se guarda la observación anterior
            observations.append(self.observation)

            actions.append(action_one_hot)
            predictions.append(prediction.flatten())
            rewards.append(reward)
            self.observation = observation
            ts_count += 1
            time_steps.append(ts_count)
            if done:
                observations.append(self.observation)
                exp_episodes += 1
                discounted_reward = self.get_discounted_rewards(self.reward)
                discounted_rewards.append(
                    np.array(discounted_reward).reshape(-1, 1))
                rewards_list.append(np.array(rewards).reshape(-1, 1))
                observations_list.append(np.array(observations))
                actions_list.append(np.array(actions))
                predictions_list.append(np.array(predictions))
                time_steps_list.append(np.array(time_steps).reshape(-1, 1))
                ep_len = len(discounted_reward)
                episodes_lenghts.append(ep_len)
                episodes_returns = episodes_returns + [discounted_reward[0]]
                last_observations.append(self.observation)
                self.reset_env()
                ts_count = 0
                rewards = []
                observations = []
                actions = []
                predictions = []
                time_steps = []
        if return_ts:
            return observations_list, actions_list, predictions_list, discounted_rewards, rewards_list, np.array(
                episodes_returns), np.array(episodes_lenghts), time_steps_list
        else:
            return observations_list, actions_list, predictions_list, discounted_rewards, rewards_list, np.array(
                episodes_returns), np.array(episodes_lenghts)

    def log_data(self,
                 episode,
                 loss,
                 ep_len_mean,
                 entropy,
                 rv,
                 actor_loss,
                 deltaT,
                 ep_return,
                 critic_loss=None):
        if self.writer is None:
            self.writer = SummaryWriter(self.logdir)
            print(
                f"correr en linea de comando: tensorboard --logdir {self.logdir_root}/"
            )

        print(f'\rEpisode: {episode}', end='')
        self.writer.add_scalar('loss', loss, episode)
        self.writer.add_scalar('episode_len', ep_len_mean, episode)
        if entropy is not None:
            self.writer.add_scalar('entropy', entropy, episode)
        self.writer.add_scalar('running_var', rv, episode)
        self.writer.add_scalar('episode_return', ep_return, episode)
        if actor_loss is not None:
            self.writer.add_scalar('actor_loss', actor_loss, episode)
        self.writer.add_scalar('time', deltaT, episode)
        if critic_loss is not None:
            self.writer.add_scalar('critic_loss', critic_loss, episode)
        if self.episode - self.last_eval >= self.eval_period:
            if self.gif_to_board:
                obs, actions, preds, disc_sum_rews, rewards, ep_returns, ep_len, frames = self.get_eval_episode(
                    return_frames=self.gif_to_board)
            else:
                obs, actions, preds, disc_sum_rews, rewards, ep_returns, ep_len = self.get_eval_episode(
                    return_frames=self.gif_to_board)
            if self.best_return <= ep_returns[-1]:
                self.best_weights = self.model_predict.get_weights()
                self.model_predict.save(self.logdir + '.hdf5')
                print()
                print(
                    f'Model on episode {self.episode - 1} improved from {self.best_return} to {ep_returns[-1]}. Saved!'
                )
                self.best_return = ep_returns[-1]
                if self.gif_to_board:
                    video = frames.reshape((1, ) + frames.shape)
                    gif_name = self.logdir.replace('logs/', '').replace(
                        '/', '_') + '_' + str(self.episode) + '_' + str(
                            int(self.best_return * 100) / 100)
                    self.writer.add_video(gif_name,
                                          np.rollaxis(video, 4, 2),
                                          fps=self.fps)
            else:
                print()
                print(
                    f'Model on episode {self.episode - 1} did not improved {ep_returns[-1]}. Best saved: {self.best_return}'
                )


#                 print('Loading best_weights model')
#                 self.model_predict.set_weights(self.best_weights)

            self.writer.add_scalar('eval_episode_steps', len(obs),
                                   self.episode)
            self.writer.add_scalar('eval_episode_return', ep_returns[-1],
                                   episode)
            self.last_eval = self.episode
            self.writer.flush

    def get_eval_episode(self, gif_name=None, fps=50, return_frames=False):
        frames = []
        self.reset_env()
        observations = []
        actions = []
        predictions = []
        rewards = []
        discounted_rewards = []
        episodes_returns = []
        episodes_lenghts = []
        exp_episodes = 0
        if gif_name is not None or return_frames:
            frames.append(self.env.render(mode='rgb_array'))
        while True:
            # Juega episodios hasta juntar un tamaño de buffer mínimo
            action, action_one_hot, prediction = self.get_action(eval=True)

            observation, reward, done, info = self.env.step(action)
            self.reward.append(reward)

            # Notar que se guarda la observación anterior
            observations.append(self.observation)
            actions.append(action_one_hot)
            predictions.append(prediction.flatten())
            rewards.append(reward)
            self.observation = observation
            if gif_name is not None or return_frames:
                frames.append(self.env.render(mode='rgb_array'))
            if done:
                exp_episodes += 1
                discounted_reward = self.get_discounted_rewards(self.reward)
                discounted_rewards = np.hstack(
                    [discounted_rewards, discounted_reward])
                ep_len = len(discounted_reward)
                episodes_lenghts.append(ep_len)
                episodes_returns = episodes_returns + [discounted_reward[0]
                                                       ] * ep_len
                self.reset_env()
                if gif_name is not None:
                    clip = mpy.ImageSequenceClip(frames, fps=fps)
                    clip.write_gif(gif_name,
                                   fps=fps,
                                   verbose=False,
                                   logger=None)
                if return_frames:
                    return np.array(observations), np.array(actions), np.array(
                        predictions), np.array(discounted_rewards), np.array(
                            rewards), np.array(episodes_returns), np.array(
                                episodes_lenghts), np.array(frames)
                return np.array(observations), np.array(actions), np.array(
                    predictions), np.array(discounted_rewards), np.array(
                        rewards), np.array(episodes_returns), np.array(
                            episodes_lenghts)
Exemplo n.º 11
0
dataset = datasets.MNIST('mnist', train=False, download=True)
images = dataset.test_data[:100].float()
label = dataset.test_labels[:100]
features = images.view(100, 784)
writer.add_embedding(features, metadata=label, label_img=images.unsqueeze(1))
writer.add_embedding(features, global_step=1, tag='noMetadata')
dataset = datasets.MNIST('mnist', train=True, download=True)
images_train = dataset.train_data[:100].float()
labels_train = dataset.train_labels[:100]
features_train = images_train.view(100, 784)

all_features = torch.cat((features, features_train))
all_labels = torch.cat((label, labels_train))
all_images = torch.cat((images, images_train))
dataset_label = ['test'] * 100 + ['train'] * 100
all_labels = list(zip(all_labels, dataset_label))

writer.add_embedding(all_features,
                     metadata=all_labels,
                     label_img=all_images.unsqueeze(1),
                     metadata_header=['digit', 'dataset'],
                     global_step=2)

# VIDEO
vid_images = dataset.train_data[:16 * 48]
vid = vid_images.view(16, 1, 48, 28, 28)  # BxCxTxHxW
writer.add_video('video', vid_tensor=vid)

writer.close()
Exemplo n.º 12
0
        val_dataloader = DataLoader(val_dataset,
                                    batch_size=args.bs,
                                    shuffle=False,
                                    num_workers=args.workers,
                                    pin_memory=True)

        if args.ckpt:
            pass
        else:
            # save graph and clips_order samples
            for data in train_dataloader:
                #tuple_clips, tuple_orders, tuple_clips_random, tuple_orders_random,idx = data
                tuple_clips, tuple_orders, idx = data
                for i in range(args.tl):
                    writer.add_video('train/tuple_clips',
                                     tuple_clips[:, i, :, :, :, :],
                                     i,
                                     fps=8)
                    writer.add_text('train/tuple_orders',
                                    str(tuple_orders[:, i].tolist()), i)
                tuple_clips = tuple_clips.to(device)
                #writer.add_graph(tcg, tuple_clips)
                break
            # save init params at step 0
            for name, param in tcg.named_parameters():
                writer.add_histogram('params/{}'.format(name), param, 0)

        n_data = train_dataset.__len__()

        torch.backends.cudnn.benchmark = True

        ### loss funciton, optimizer and scheduler ###
Exemplo n.º 13
0
class Logger:
    def __init__(self, log_dir, n_logged_samples=10, summary_writer=None):
        self._log_dir = log_dir
        self._n_logged_samples = n_logged_samples
        if summary_writer is not None:
            self._summ_writer = summary_writer
        else:
            self._summ_writer = SummaryWriter(log_dir)

    def _loop_batch(self, fn, name, val, *argv, **kwargs):
        """Loops the logging function n times."""
        for log_idx in range(min(self._n_logged_samples, len(val))):
            name_i = os.path.join(name, "_%d" % log_idx)
            fn(name_i, val[log_idx], *argv, **kwargs)

    @staticmethod
    def _check_size(val, size):
        if isinstance(val, torch.Tensor) or isinstance(val, np.ndarray):
            assert len(
                val.shape
            ) == size, "Size of tensor does not fit required size, {} vs {}".format(
                len(val.shape), size)
        elif isinstance(val, list):
            assert len(
                val[0].shape
            ) == size - 1, "Size of list element does not fit required size, {} vs {}".format(
                len(val[0].shape), size - 1)
        else:
            raise NotImplementedError(
                "Input type {} not supported for dimensionality check!".format(
                    type(val)))
        if (val[0].shape[1] > 10000) or (val[0].shape[2] > 10000):
            raise ValueError("This might be a bit too much")

    def log_scalar(self, scalar, name, step, phase):
        self._summ_writer.add_scalar('{}_{}'.format(name, phase), scalar, step)

    def log_scalars(self, scalar_dict, group_name, step, phase):
        """Will log all scalars in the same plot."""
        self._summ_writer.add_scalars('{}_{}'.format(group_name, phase),
                                      scalar_dict, step)

    def log_images(self, image, name, step, phase):
        self._check_size(image, 4)  # [N, C, H, W]
        self._loop_batch(self._summ_writer.add_image,
                         '{}_{}'.format(name, phase), image, step)

    def log_video(self, video_frames, name, step, phase):
        assert len(
            video_frames.shape
        ) == 4, "Need [T, C, H, W] input tensor for single video logging!"
        if not isinstance(video_frames, torch.Tensor):
            video_frames = torch.tensor(video_frames)
        video_frames = torch.transpose(video_frames, 0,
                                       1)  # tbX requires [C, T, H, W]
        video_frames = video_frames.unsqueeze(
            0)  # add an extra dimension to get grid of size 1
        self._summ_writer.add_video('{}_{}'.format(name, phase), video_frames,
                                    step)

    def log_videos(self, video_frames, name, step, phase, fps=3):
        assert len(
            video_frames.shape
        ) == 5, "Need [N, T, C, H, W] input tensor for video logging!"
        video_frames = video_frames.unsqueeze(
            1)  # add an extra dimension after batch to get grid of size 1
        self._loop_batch(self._summ_writer.add_video,
                         '{}_{}'.format(name, phase),
                         video_frames,
                         step,
                         fps=fps)

    def log_image(self, images, name, step, phase):
        self._summ_writer.add_image('{}_{}'.format(name, phase), images, step)

    def log_image_grid(self, images, name, step, phase, nrow=8):
        assert len(
            images.shape
        ) == 4, "Image grid logging requires input shape [batch, C, H, W]!"
        img_grid = torchvision.utils.make_grid(images, nrow=nrow)
        self.log_images(img_grid, '{}_{}'.format(name, phase), step)

    def log_video_grid(self, video_frames, name, step, phase, fps=3):
        assert len(
            video_frames.shape
        ) == 5, "Need [N, T, C, H, W] input tensor for video logging!"
        self._summ_writer.add_video('{}_{}'.format(name, phase),
                                    video_frames,
                                    step,
                                    fps=fps)

    def log_figures(self, figure, name, step, phase):
        """figure: matplotlib.pyplot figure handle"""
        assert figure.shape[
            0] > 0, "Figure logging requires input shape [batch x figures]!"
        self._loop_batch(self._summ_writer.add_figure,
                         '{}_{}'.format(name, phase), figure, step)

    def log_figure(self, figure, name, step, phase):
        """figure: matplotlib.pyplot figure handle"""
        self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)

    def log_graph(self, array, name, step, phase):
        """figure: matplotlib.pyplot figure handle"""
        im = plot_graph(array)
        self._summ_writer.add_image('{}_{}'.format(name, phase), im, step)

    def dump_scalars(self, log_path=None):
        log_path = os.path.join(
            self._log_dir,
            "scalar_data.json") if log_path is None else log_path
        self._summ_writer.export_scalars_to_json(log_path)
Exemplo n.º 14
0
            seq_test, gt_seq_test = seq_test.to(device), gt_seq_test.to(device)

            # 送入模型进行推断
            test_output = model(seq_test, future=num_frame)

            # loss计算
            test_loss = loss_L1_L2(test_output[:, -num_frame:, :, :, :],
                                   gt_seq_test[:, -num_frame:, :, :, :])
            test_metric = loss_SSIM(test_output[:, -num_frame:, :, :, :],
                                    gt_seq_test[:, -num_frame:, :, :, :])

        step_time = time.time() - step_time

        # 将有用的信息存进tensorboard中
        if (step + 1) % print_freq == 0:
            writer.add_video('train_seq/feed_seq', seq,
                             epoch * train_lenth + step + 1)
            writer.add_video('train_seq/gt_seq', seq_target,
                             epoch * train_lenth + step + 1)
            writer.add_video('train_seq/pred_seq', layer_output,
                             epoch * train_lenth + step + 1)
            writer.add_video('test_seq/feed_seq', seq_test,
                             epoch * train_lenth + step + 1)
            writer.add_video('test_seq/gt_seq', gt_seq_test,
                             epoch * train_lenth + step + 1)
            writer.add_video('test_seq/pred_seq', test_output,
                             epoch * train_lenth + step + 1)
        writer.add_scalars(
            'loss/merge', {
                "train_loss": train_loss,
                "test_loss": test_loss,
                "train_metric": train_metric,
Exemplo n.º 15
0
def train_deepq(name,
                env,
                nb_actions,
                Q_network,
                preprocess_fn=None,
                batch_size=32,
                replay_start_size=50000,
                replay_memory_size=50000,
                agent_history_length=4,
                target_network_update_frequency=10000,
                discount_factor=0.99,
                learning_rate=1e-5,
                update_frequency=4,
                inital_exploration=1,
                final_exploration=0.1,
                final_exploration_step=int(1e6),
                nb_timesteps=int(1e7),
                tensorboard_freq=50,
                demo_tensorboard=False):

    #SAVE/LOAD MODEL
    DIRECTORY_MODELS = './models/'
    if not os.path.exists(DIRECTORY_MODELS):
        os.makedirs(DIRECTORY_MODELS)
    PATH_SAVE = DIRECTORY_MODELS + name + '_' + time.strftime('%Y%m%d-%H%M')

    #GPU/CPU
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    print('RUNNING ON', device)

    #TENSORBOARDX
    writer = SummaryWriter(comment=name)

    replay_memory = init_replay_memory(env, replay_memory_size,
                                       replay_start_size, preprocess_fn)

    print('#### TRAINING ####')
    print('see more details on tensorboard')

    done = True  #reset environment
    eps_schedule = ScheduleExploration(inital_exploration, final_exploration,
                                       final_exploration_step)
    Q_network = Q_network.to(device)
    Q_hat = copy.deepcopy(Q_network).to(device)
    loss = SmoothL1Loss()
    optimizer = RMSprop(Q_network.parameters(),
                        lr=learning_rate,
                        alpha=0.95,
                        eps=0.01,
                        centered=True)

    episode = 1
    rewards_episode, total_reward_per_episode = list(), list()
    for timestep in tqdm(range(nb_timesteps)):  #tqdm
        #if an episode is ended
        if done:
            total_reward_per_episode.append(np.sum(rewards_episode))
            rewards_episode = list()
            phi_t = env.reset()
            if preprocess_fn:
                phi_t = preprocess_fn(phi_t)

            if (episode % tensorboard_freq == 0):
                assert len(total_reward_per_episode) == tensorboard_freq
                #tensorboard
                writer.add_scalar('rewards/train_reward',
                                  np.mean(total_reward_per_episode), episode)
                total_reward_per_episode = list()
                writer.add_scalar('other/replay_memory_size',
                                  len(replay_memory), episode)
                writer.add_scalar('other/eps_exploration',
                                  eps_schedule.get_eps(), episode)
                if demo_tensorboard:
                    demos, demo_rewards = play(env,
                                               Q_network,
                                               preprocess_fn,
                                               nb_episodes=1,
                                               eps=eps_schedule.get_eps())
                    writer.add_scalar('rewards/demo_reward',
                                      np.mean(demo_rewards), episode)
                    for demo in demos:
                        demo = demo.permute([3, 0, 1, 2]).unsqueeze(0)
                        writer.add_video(name, demo, episode, fps=25)

                #save model
                torch.save(Q_network.state_dict(), PATH_SAVE)

            episode += 1

        a_t = get_action(phi_t, env, Q_network, eps_schedule)

        phi_t_1, r_t, done, info = env.step(a_t)
        rewards_episode.append(r_t)
        if preprocess_fn:
            phi_t_1 = preprocess_fn(phi_t_1)
        replay_memory.push([phi_t, a_t, r_t, phi_t_1, done])
        phi_t = phi_t_1

        #training
        if timestep % update_frequency == 0:
            #get training data
            phi_t_training, actions_training, y = get_training_data(
                Q_hat, replay_memory, batch_size, discount_factor)

            #forward
            phi_t_training = phi_t_training.to(device)
            Q_values = Q_network(phi_t_training)
            mask = torch.zeros([batch_size, nb_actions]).to(device)
            for j in range(len(actions_training)):
                mask[j, actions_training[j]] = 1
            Q_values = Q_values * mask
            Q_values = torch.sum(Q_values, dim=1)
            output = loss(Q_values, y)

            #backward and gradient descent
            optimizer.zero_grad()
            output.backward()
            optimizer.step()

        if timestep % target_network_update_frequency == 0:
            Q_hat = copy.deepcopy(Q_network).to(device)
Exemplo n.º 16
0
class Logger(object):
    def __init__(self, log_folder, tensorboard_dir, log_interval):
        log_file = str(log_folder / 'log')

        self.logger = logzero.setup_logger(
                          name='main',
                          logfile=log_file,
                          level=20,
                          fileLoglevel=10,
                          formatter=None,
                      )

        self.metrics = {
            "epoch": 0,
            "iteration": 1,
            "loss_gen": 0.,
            "loss_idis": 0.,
            "loss_vdis": 0.,
            "elapsed_time": 0,
        }
        
        self.log_interval = log_interval
        
        self.writer = SummaryWriter(str(tensorboard_dir))
        
        self.start_time = time.time()
        self.display_metric_names()

    def display_metric_names(self):
        log_string = ""
        for name in self.metrics.keys():
            log_string += "{:>12} ".format(name)
        self.logger.info(log_string)

    def init(self):
        targets = ["loss_gen", "loss_idis", "loss_vdis"]
        for name in targets:
            self.metrics[name] = 0.

    def update(self, name, value):
        self.metrics[name] += value

    def log(self):
        # display and save logs
        self.metrics["elapsed_time"] = time.time() - self.start_time

        metric_strings = []
        for name, value in self.metrics.items():
            if name in ["epoch", "iteration"]:
                s = "{}".format(value)
            elif name in ["loss_gen", "loss_idis", "loss_vdis"]:
                s = "{:0.3f}".format(value/self.log_interval)
            elif name in ["elapsed_time"]:
                value = int(value)
                s = "{:02d}:{:02d}:{:02d}".format(value//3600, value//60, value%60)
            else:
                raise Exception("Unsupported mertic is added")

            metric_strings.append(s)
        
        log_string = ""
        for s in metric_strings:
            log_string += "{:>12} ".format(s)
        self.logger.info(log_string)

    def tf_log(self):
        step = self.metrics["iteration"]
        for name in ["loss_gen", "loss_idis", "loss_vdis"]:
            value = self.metrics[name]/self.log_interval
            self.writer.add_scalar(name, value, step)

    def tf_log_video(self, name, videos, step):
        self.writer.add_video(name, videos, fps=8, global_step=step)

    def tf_log_histgram(self, var, tag, step):
        var = var.clone().cpu().data.numpy()
        self.writer.add_histogram(tag, var, step)
Exemplo n.º 17
0
class FPTrainer():
    def __init__(self, opt):
        self.opt = opt
        self.save_dir = opt.log_dir
        self.dataset = opt.dataset
        self.batch_size = opt.batch_size
        self.patch_size = opt.patch_size

        self.dtype = torch.cuda.FloatTensor

        self.start_epoch = 0
        self.eta = 1.0
        self.total_iter = 0
        num_iters = int(opt.total_epoch * opt.epoch_size / 2)
        self.delta = float(1) / num_iters

        self.seq_len = opt.seq_len
        self.pre_len = opt.pre_len
        self.eval_len = opt.eval_len

        self.input_nc = opt.input_nc
        self.output_nc = opt.output_nc

        self.epoch_size = opt.epoch_size

        self.shape = [
            int(opt.image_width / opt.patch_size),
            int(opt.image_height / opt.patch_size)
        ]

        self.rnn_size = opt.rnn_size
        self.rnn_nlayer = opt.rnn_nlayer

        self.filter_size = opt.filter_size

        ic = self.input_nc * opt.patch_size**2
        oc = self.output_nc * opt.patch_size**2

        # tensorboard
        # ---------------- visualization with tensorboardX ----------
        train_log_dir = os.path.join(self.save_dir, 'runs/train')
        if not os.path.exists(train_log_dir):
            os.mkdir(train_log_dir)
        self.writer_train = SummaryWriter(log_dir=train_log_dir)

        test_log_dir = os.path.join(self.save_dir, 'runs/test')
        if not os.path.exists(test_log_dir):
            os.mkdir(test_log_dir)
        self.writer_test = SummaryWriter(log_dir=test_log_dir)

        # setting dataset
        train_data, valid_data, test_data = utils.load_dataset(opt)

        self.train_loader = DataLoader(train_data,
                                       num_workers=opt.data_threads,
                                       batch_size=opt.batch_size,
                                       shuffle=True,
                                       drop_last=True,
                                       pin_memory=True)

        self.test_loader = DataLoader(test_data,
                                      num_workers=opt.data_threads,
                                      batch_size=opt.batch_size,
                                      shuffle=False,
                                      drop_last=False,
                                      pin_memory=True)

        self.valid_loader = DataLoader(valid_data,
                                       num_workers=opt.data_threads,
                                       batch_size=opt.batch_size,
                                       shuffle=False,
                                       drop_last=False,
                                       pin_memory=True)

        def get_training_batch():
            while True:
                for sequence in self.train_loader:
                    batch = utils.normalize_data(opt, self.dtype, sequence)
                    yield batch

        self.training_batch_generator = get_training_batch()

        def get_testing_batch():
            while True:
                for sequence in self.test_loader:
                    batch = utils.normalize_data(opt, self.dtype, sequence)
                    yield batch

        self.testing_batch_generator = get_testing_batch()

        # set model
        self.model = get_convrnn_model(opt.model,
                                       input_chans=ic,
                                       output_chans=oc,
                                       hidden_size=self.rnn_size,
                                       filter_size=self.filter_size,
                                       num_layers=self.rnn_nlayer,
                                       img_size=opt.image_height //
                                       opt.patch_size)
        self.model.cuda()

        # set optimizer
        if opt.optimizer == 'adam':
            optimizer = optim.Adam
        elif opt.optimizer == 'rmsprop':
            optimizer = optim.RMSprop
        elif opt.optimizer == 'sgd':
            optimizer = optim.SGD
        elif opt.optimizer == 'adamw':
            optimizer = optim.AdamW
        else:
            raise ValueError('Unknown optimizer: %s' % opt.optimizer)

        self.optimizer = optimizer(self.model.parameters(),
                                   lr=opt.lr,
                                   betas=(opt.beta1, 0.999))

        self.scheduler = utils.get_scheduler(
            self.optimizer, self.opt, (opt.total_epoch - self.start_epoch))

        # load model
        if opt.resume:
            if not os.path.isfile(opt.resume):
                raise RuntimeError("=> no checkpoint found at '{}'".format(
                    opt.resume))
            checkpoint = torch.load(opt.resume)
            self.start_epoch = checkpoint['epoch']
            self.model.load_state_dict(checkpoint['state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer'])
            self.eta = checkpoint['eta']
            self.total_iter = self.start_epoch * self.epoch_size
            print("=> loaded checkpoint '{}' (epoch {})".format(
                opt.resume, checkpoint['epoch']))

        # criterion
        if opt.criterion == "L1":
            self.criterion = torch.nn.L1Loss(size_average=False,
                                             reduce=True).cuda()
            print('The criterion type is L1!')
        elif opt.criterion == "BCE":
            self.criterion = torch.nn.BCELoss(size_average=False,
                                              reduce=True).cuda()
            print('The criterion type is BCE!')
        elif opt.criterion == "MSE&L1":
            self.criterion = MSEL1Loss(size_average=False,
                                       reduce=True,
                                       alpha=1.0).cuda()
            print('The criterion type is MSE + L1!')
        else:
            self.criterion = torch.nn.MSELoss(size_average=False,
                                              reduce=True).cuda()
            print('The criterion type is MSE!')

        # Print networks
        print('---------- Networks initialized -------------')
        utils.print_network(self.model, opt.model)

    def name(self):
        return 'Frame Prediction Trainer'

    def set_input(self, input):
        # X: len, batchsize, inchains, size(0), size(1)
        if self.patch_size > 1:
            x = [utils.reshape_patch(img, self.patch_size) for img in input]
        else:
            x = input

        reverse_x = x[::-1]

        random_flip = np.random.random_sample(
            (self.pre_len - 1, self.batch_size))
        true_token = (random_flip < self.eta)
        one = torch.FloatTensor(1, x[0].size(1), x[0].size(2),
                                x[0].size(3)).fill_(1.0).cuda()
        zero = torch.FloatTensor(1, x[0].size(1), x[0].size(2),
                                 x[0].size(3)).fill_(0.0).cuda()

        masks = []
        for t in range(self.pre_len - 1):
            masks_b = []
            for i in range(self.batch_size):
                if true_token[t, i]:
                    masks_b.append(one)
                else:
                    masks_b.append(zero)
            mask = torch.cat(masks_b, 0)  # along batchsize
            masks.append(mask)
        return x, reverse_x, masks

    def forward(self, x, mask):
        gen_ims = []
        x_gen = self.model(x[0], init_hidden=True)

        for t in range(1, self.seq_len + self.pre_len - 1):
            if t < self.seq_len:
                inputs = x[t]
            else:
                inputs = mask[t - self.seq_len] * x[t] + (
                    1 - mask[t - self.seq_len]) * x_gen

            x_gen = self.model(inputs, init_hidden=False)

            gen_ims.append(x_gen)

        gen_ims = torch.stack(gen_ims, 1)
        images = torch.stack(x[2:], 1)

        loss = self.criterion(gen_ims, images)
        loss /= 2.0
        return loss

    def save_checkpoint(self, checkpoint, network_label, epoch_label):
        save_filename = '%s_%s_net_%s.pth.tar' % (self.dataset, network_label,
                                                  epoch_label)
        save_path = os.path.join(self.save_dir, save_filename)
        torch.save(checkpoint, save_path)

    def train_epoch(self, epoch):
        self.model.train()
        # epoch_rec = 0.0
        info_dict = {'loss': 0.0}
        describe = '[' + self.opt.model + ',' + self.opt.dataset + ']:' + 'Epoch ' + str(
            epoch)
        pbar = tqdm(total=self.epoch_size, desc=describe)

        for i in range(self.epoch_size):
            x = next(self.training_batch_generator)
            loss = self.optimize_parameters(x)
            # epoch_rec += loss

            with open(os.path.join(
                    self.save_dir, 'train_loss_%s_%s.txt' %
                (self.opt.model, self.opt.dataset)),
                      mode='a') as f:
                f.write('%0.8f \n' % (loss))

            self.total_iter += 1
            self.writer_train.add_scalar('Train/loss', loss, self.total_iter)
            self.writer_train.add_scalar('Train/Eta', self.eta,
                                         self.total_iter)

            self.eta -= self.delta
            self.eta = max(self.eta, 0.0)

            info_dict['loss'] = loss
            pbar.set_postfix(info_dict)
            pbar.update(1)
        pbar.close()

        # save epoch
        self.save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': self.model.state_dict(),
                'optimizer': self.optimizer.state_dict(),
                'eta': self.eta,
            }, self.opt.model, 'last')

        self.scheduler.step()
        lr = self.scheduler.get_last_lr()[0]
        print('learning rate = %.7f' % lr)

    def test(self, epoch):
        self.model.eval()
        rec = 0

        result_path = os.path.join(self.save_dir, 'results', str(epoch))
        if not os.path.exists(result_path):
            os.mkdir(result_path)

        psnr = np.zeros(self.eval_len)
        ssim = np.zeros(self.eval_len)
        mae = np.zeros(self.eval_len)
        sharp = np.zeros(self.eval_len)
        mse = np.zeros(self.eval_len)

        index = 0
        total_index = 0

        describe = '[Testing]:Epoch ' + str(epoch)
        pbar = tqdm(total=len(self.test_loader), desc=describe)

        for batch in self.test_loader:
            # for batch in self.test_loader:
            x = utils.normalize_data(self.opt, self.dtype, batch)
            rec += self.evaluation(x)
            index += 1
            total_index += x[0].size(0)  # bs

            gt = []
            pred = []
            for i in range(self.eval_len):
                x1 = x[i + self.seq_len].data.cpu().numpy()
                x2 = self.preds[i + self.seq_len].data.cpu().numpy()
                gt.append(x1)
                pred.append(x2)

            mse_, mae_, ssim_, psnr_, sharp_ = utils.eval_seq_batch(gt, pred)
            mse += mse_
            mae += mae_
            ssim += ssim_
            psnr += psnr_
            sharp += sharp_

            if index < 11:
                path = os.path.join(result_path, str(index))
                if not os.path.exists(path):
                    os.mkdir(path)
                for i in range(self.seq_len + self.eval_len):
                    name = 'gt' + str(i + 1) + '.png'
                    file_name = os.path.join(path, name)
                    img_gt = x[i][0].data.cpu()
                    img_gt = img_gt.transpose(0, 1).transpose(1, 2).numpy()
                    img_gt = np.uint8(img_gt * 255)

                    if 2 in img_gt.shape:
                        cv2.imwrite(file_name, img_gt[:, :, :1])
                        continue
                    cv2.imwrite(file_name, img_gt)

                for i in range(self.eval_len):
                    name = 'pd' + str(i + self.seq_len + 1) + '.png'
                    file_name = os.path.join(path, name)
                    img_pd = self.preds[i + self.seq_len][0].data.cpu()
                    img_pd = img_pd.transpose(0,
                                              1).transpose(1,
                                                           2).clamp(0,
                                                                    1).numpy()
                    img_pd = np.uint8(img_pd * 255)

                    if 2 in img_pd.shape:
                        cv2.imwrite(file_name, img_pd[:, :, :1])
                        continue

                    cv2.imwrite(file_name, img_pd)

            if index == 1:
                gt = torch.stack(x, dim=1)  # B, T, C, H, W
                pd = torch.stack(self.preds, dim=1)
                gif = torch.cat([gt, pd], dim=0)  # cat along batch
                gif = gif.data.cpu().clamp(0, 1)
                self.writer_test.add_video('Test/gt&pred', gif, epoch)

            pbar.update(1)
        pbar.close()

        rec = rec / index
        mse /= total_index
        mae /= total_index
        ssim /= total_index
        psnr /= total_index
        sharp /= total_index

        # ----------- log the frame-wise measurement
        with open(os.path.join(self.save_dir,
                               'test_result_%s.txt' % (self.dataset)),
                  mode='a') as f:
            f.write(
                '####################### frame-wise results at epoch: %04d ####################### \n'
                % (epoch))

            f.write('- mse: mean %04f -' % (np.mean(mse)))
            for t in range(self.eval_len):
                f.write('-[%d: %04f]-' % (t + self.pre_len, mse[t]))
            f.write('\n')

            f.write('- mae: mean %04f -' % (np.mean(mae)))
            for t in range(self.eval_len):
                f.write('[%d: %04f] ' % (t + self.pre_len, mae[t]))
            f.write('\n')

            f.write('- ssim: mean %04f -' % (np.mean(ssim)))
            for t in range(self.eval_len):
                f.write('[%d: %04f] ' % (t + self.pre_len, ssim[t]))
            f.write('\n')

            f.write('- psnr: mean %04f -' % (np.mean(psnr)))
            for t in range(self.eval_len):
                f.write('[%d: %04f] ' % (t + self.pre_len, psnr[t]))
            f.write('\n')

            f.write('- sharp: mean %04f -' % (np.mean(sharp)))
            for t in range(self.eval_len):
                f.write('[%d: %04f] ' % (t + self.pre_len, sharp[t]))
            f.write('\n')

        self.writer_test.add_scalar('Test/rec_loss', rec, epoch)
        self.writer_test.add_scalar('Test/mse', np.mean(mse), epoch)
        self.writer_test.add_scalar('Test/mae', np.mean(mae), epoch)
        self.writer_test.add_scalar('Test/ssim', np.mean(ssim), epoch)
        self.writer_test.add_scalar('Test/psnr', np.mean(psnr), epoch)
        self.writer_test.add_scalar('Test/sharp', np.mean(sharp), epoch)

    def evaluation(self, x):
        # patch
        if self.patch_size > 1:
            x = [utils.reshape_patch(img, self.patch_size) for img in x]
        # --------------------------------------------------------------
        loss = 0.0
        self.preds = [x[0], x[1]]
        x_gen = self.model(x[0], init_hidden=True)
        gen_ims = []

        for t in range(1, self.seq_len + self.eval_len - 1):
            if t < self.seq_len:
                inputs = x[t]
            else:
                inputs = x_gen

            x_gen = self.model(inputs, init_hidden=False)
            gen_ims.append(x_gen)

            # -----------------------------------------------------------
            if t < self.seq_len - 1:
                self.preds.append(x[t + 1])
            else:
                self.preds.append(x_gen)

        # unpatch
        if self.patch_size > 1:
            self.preds = [
                utils.reshape_patch_back(img, self.patch_size)
                for img in self.preds
            ]

        gen_ims = torch.stack(gen_ims, 1)
        images = torch.stack(x[2:], 1)
        loss = self.criterion(gen_ims, images)
        loss /= 2.0  # for consistency with tensorflow

        return loss.detach().to("cpu").item() / (
            self.batch_size * (self.seq_len + self.eval_len - 2))

    def optimize_parameters(self, x):
        x, x_rev, mask = self.set_input(x)

        self.optimizer.zero_grad()
        self.loss_1 = self.forward(x, mask)

        self.loss_1.backward()
        self.optimizer.step()

        self.optimizer.zero_grad()
        self.loss_2 = self.forward(x_rev, mask)

        self.loss_2.backward()
        self.optimizer.step()

        loss = (self.loss_1 + self.loss_2) / 2
        return loss.detach().to("cpu").item()

    def finish(self):
        self.preds = []
        self.writer_train.export_scalars_to_json(
            os.path.join(self.save_dir, 'runs', 'train_all_scalars.json'))
        self.writer_train.close()
        self.writer_test.export_scalars_to_json(
            os.path.join(self.save_dir, 'runs', 'test_all_scalars.json'))
        self.writer_test.close()
Exemplo n.º 18
0
def main2():
    model = c3d.C3D(with_classifier=True, num_classes=101)

    start_epoch = 1
    # load 16 compressed video frames
    train_coviar = CoviarData(
        '/data2/fb/project/pytorch-coviar-master/data/ucf101/mpeg4_videos',
        'ucf101',
        '/data2/fb/project/pytorch-coviar-master/data/datalists/ucf101_split1_train.txt',
        'residual', get_augmentation(), 4, 1, True)

    val_size = 400
    train_dataset, val_dataset = random_split(
        train_coviar, (len(train_coviar) - val_size, val_size))

    print("num_workers:{:d}".format(params['num_workers']))
    print("batch_size:{:d}".format(params['batch_size']))
    train_loader = DataLoader(train_dataset,
                              batch_size=params['batch_size'],
                              shuffle=True,
                              num_workers=params['num_workers'])
    val_loader = DataLoader(val_dataset,
                            batch_size=params['batch_size'],
                            shuffle=True,
                            num_workers=params['num_workers'])

    model = model.cuda()
    criterion = nn.CrossEntropyLoss.cuda()
    optimizer = optim.SGD(model.parameters(),
                          lr=params['learning_rate'],
                          momentum=params['momentum'],
                          weight_decay=params['weight_decay'])
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     'min',
                                                     min_lr=1e-5,
                                                     patience=20,
                                                     factor=0.1)

    model_save_dir = os.path.join(save_path, '_', time.strftime('%m-%d-%H-%M'))
    writer = SummaryWriter(model_save_dir)

    for data in train_loader:
        clip, label = data
        writer.add_video('train/clips', clip, 0, fps=8)
        writer.add_text('train/idx', str(label.tolist()), 0)
        clip = clip.cuda()
        break
    for name, param in model.named_parameters():
        writer.add_histogram('params/{}', format(name), param, 0)

    if not os.path.exists(model_save_dir):
        os.makedirs(model_save_dir)
    prev_best_val_loss = float('inf')
    prev_best_loss_model_path = None
    prev_best_acc_model_path = None
    best_acc = 0
    best_epoch = 0
    for epoch in tqdm(range(start_epoch, start_epoch + params['epoch_num'])):
        train_coviar(train_loader, model, criterion, optimizer, epoch,
                     params['learning_rate'])
        val_loss, top1_avg = validation(val_loader, model, criterion,
                                        optimizer, epoch)
        if top1_avg >= best_acc:
            best_acc = top1_avg
            best_epoch = epoch
            model_path = os.path.join(
                model_save_dir, 'best_acc_model_{}.pth.tar'.format(epoch))
            torch.save(model.state_dict(), model_path)

            prev_best_acc_model_path = model_path
        if val_loss < prev_best_val_loss:
            model_path = os.path.join(
                model_save_dir, 'best_loss_model_{}.pth.tar'.format(epoch))
            torch.save(model.state_dict(), model_path)
            prev_best_val_loss = val_loss

            prev_best_loss_model_path = model_path
        scheduler.step(val_loss)
        if epoch % 20 == 0:
            checkpoints = os.path.join(model_save_dir, str(epoch) + ".pth.tar")
            torch.save(model.state_dict(), checkpoints)
            print("save_to:", checkpoints)
    print("best is :", best_acc, best_epoch)
writer.add_embedding(features, global_step=1, tag='noMetadata')
# 在 PyTorch_Tutorial/Code 目录下运行
# dataset = datasets.MNIST(os.path.join("..", "..", "Data", "mnist"),
# 在 PyTorch_Tutorial 目录下运行
dataset = datasets.MNIST(os.path.join(".", "Data", "mnist"),
                         train=True,
                         download=True)
images_train = dataset.train_data[:100].float()
labels_train = dataset.train_labels[:100]
features_train = images_train.view(100, 784)

all_features = torch.cat((features, features_train))
all_labels = torch.cat((label, labels_train))
all_images = torch.cat((images, images_train))
dataset_label = ['test'] * 100 + ['train'] * 100
all_labels = list(zip(all_labels, dataset_label))

writer.add_embedding(all_features,
                     metadata=all_labels,
                     label_img=all_images.unsqueeze(1),
                     metadata_header=['digit', 'dataset'],
                     global_step=2)

# VIDEO
vid_images = dataset.train_data[:16 * 48]
vid = vid_images.view(16, 1, 48, 28, 28)  # BxCxTxHxW

writer.add_video('video', vid_tensor=vid)
writer.add_video('video_1_fps', vid_tensor=vid, fps=1)

writer.close()
Exemplo n.º 20
0
import torch
from tensorboardX import SummaryWriter
import torchvision

logger = SummaryWriter(comment='feature_vis')
feature = torch.load('pev_feature_bpi3d_rgb_result.pt',
                     map_location=lambda storage, loc: storage)
for k, v in feature.items():

    min = v.min()
    max = v.max()
    v.clamp_(min=min, max=max)
    v.add_(-min).div_(max - min + 1e-5)

    imgs = v.permute(1, 0, 2, 3).reshape(-1, 1, 14, 14)

    logger.add_image('feature/img', torchvision.utils.make_grid(imgs, nrow=8),
                     k.item())

    v = v.view(64, 8, 1, 14, 14)
    logger.add_video('feature/video', v, k.item())
Exemplo n.º 21
0
        # create video every 100 episodes
        if ((i_episode % 100) == 0):
            policy_net.eval()
            env.reset()
            current_screen = get_screen()
            state = current_screen

            episode_video_frames = []
            for t in count():
                action = select_action(state, update_step=False)
                _, _, done = env.step(action.item())
                obs = get_screen()
                episode_video_frames.append(obs.cpu().numpy())
                if (done or t > 3000):
                    break
            # stacked with T, C, H, W     #T, H, W, C
            # pdb.set_trace()
            stacked_frames = np.stack(episode_video_frames).transpose(
                1, 0, 2, 3)
            stacked_frames = np.expand_dims(stacked_frames, 0)
            # video takes B, C, T, H, W
            writer.add_video('video/episode', stacked_frames, i_episode)

        if (total_frame_count > NUM_FRAMES):
            torch.save(policy_net, MODEL_PATH)
            break

        print('Reward this episode:', total_reward)

    print('Complete')
Exemplo n.º 22
0
class TBVisualizer:
    def __init__(self, opt):
        self._opt = opt
        self._save_path = os.path.join(opt["dirs"]["exp_dir"],
                                       opt["dirs"]["events"])
        self._log_path = os.path.join(self._save_path, 'loss_log.txt')
        self._tb_path = os.path.join(self._save_path, 'summary.json')

        # create summary writers
        self._writer_full = SummaryWriter(self._save_path,
                                          filename_suffix="_full")

        # init log file with header
        self._init_log_file()

    def __del__(self):
        self._writer_full.close()

    def _init_log_file(self):
        with open(self._log_path, "a") as log_file:
            now = time.strftime("%c")
            log_file.write(
                '================ Training Loss (%s) ================\n' % now)

    def display_current_results(self,
                                visuals,
                                it,
                                is_train,
                                save_visuals=False):
        # add visuals to events file
        for label, image_numpy in visuals.items():
            sum_name = '{}/{}'.format('Train' if is_train else 'Val', label)
            if image_numpy.ndim == 3:
                self._writer_full.add_image(sum_name, image_numpy, it)
            else:
                self._writer_full.add_video(sum_name, image_numpy, it)

            # save image to file
            if save_visuals:
                util.save_image(
                    image_numpy,
                    os.path.join(self._opt.checkpoints_dir, self._opt.name,
                                 'event_imgs', sum_name, '%08d.png' % it))

        # force write
        self._writer_full.file_writer.flush()

    def plot_scalars(self, scalars, it, is_train, is_mean=False):
        for label, scalar in scalars.items():
            # set labels
            if is_mean:
                label = f"M_{label}"
            sum_name = '{}/{}'.format('Train' if is_train else 'Val', label)

            # add scalars to events file
            self._writer_full.add_scalar(sum_name, scalar, it)

    def plot_histograms(self, histograms, it, is_train, is_mean=False):
        for label, scalar in histograms.items():
            # set labels
            if is_mean:
                label = f"M_{label}"
            sum_name = '{}/{}'.format('Train' if is_train else 'Val', label)

            # add hist to events file
            self._writer_full.add_histogram(sum_name, scalar, it)

        # force write
        self._writer_full.file_writer.flush()

    def plot_time(self, read_time, train_time, it):
        # add scalars to events file
        self._writer_full.add_scalar("read_time", read_time, it)
        self._writer_full.add_scalar("train_time", train_time, it)

    def print_current_train_errors(self, epoch, i, iters_per_epoch, errors,
                                   iter_read_time, iter_procs_time,
                                   visuals_were_stored):
        # set label
        log_time = time.strftime("[%d/%m %H:%M:%S]")
        visuals_info = "v" if visuals_were_stored else ""
        message = '%s (T%s, epoch: %d, it: %d/%d, s/smpl: %.3fr %.3fp) ' % (
            log_time, visuals_info, epoch, i, iters_per_epoch, iter_read_time,
            iter_procs_time)
        # print in terminal and store in log file
        self._print_and_store_errors(errors, message)

    def print_current_validate_errors(self, epoch, errors, t):
        # set label
        log_time = time.strftime("[%d/%m/%Y %H:%M:%S]")
        message = '%s (V, epoch: %d, time_to_val: %ds) ' % (log_time, epoch, t)

        # print in terminal and store in log file
        self._print_and_store_errors(errors, message)

    def print_epoch_avg_errors(self, epoch, errors, is_train):
        # set label
        label = "MT" if is_train else "MV"
        log_time = time.strftime("[%d/%m/%Y %H:%M:%S]")
        message = '%s (%s, epoch: %d) ' % (log_time, label, epoch)

        # print in terminal and store in log file
        self._print_and_store_errors(errors, message)

    def _print_and_store_errors(self, errors, message):
        # set errors msg
        for k, v in errors.items():
            message += '%s:%.3f ' % (k, v)

        # print in terminal and store in log file
        print(message)
        self._save_log(message)

    def print_msg(self, message):
        # set label
        log_time = time.strftime("[%d/%m/%Y %H:%M:%S]")
        message = '%s %s' % (log_time, message)

        # print in terminal and store in log file
        print(message)
        self._save_log(message)

    def save_images(self, visuals):
        for label, image_numpy in visuals.items():
            image_name = '%s.png' % label
            save_path = os.path.join(self._save_path, "samples", image_name)
            util.save_image(image_numpy, save_path)

    def _save_log(self, msg):
        with open(self._log_path, "a") as log_file:
            log_file.write('%s\n' % msg)
Exemplo n.º 23
0
class ExperimentLogger(object):
    """Wraps Tensorboard logger and lightweight logger. (and standard output logger)
    TensorBoard logger can be useful for looking at all experiments

    Performs checkpointing and resumes experiments that are not completed.
    - serves to combine other loggers 
    """
    def __init__(self, log_dir, checkpoint_name="latest.tar", log_std_out=False, use_tensorboard=True):
        self.log_dir = log_dir
        self.logger = Logger(log_dir=log_dir)
        self.checkpoint_name = os.path.join(self.log_dir, checkpoint_name)

        self.use_tensorboard = use_tensorboard
        if use_tensorboard:
            self.tensorboard_writer = SummaryWriter(log_dir=self.log_dir)

        self.log_std_out = log_std_out
        if log_std_out:
            std_out_file = os.path.join(log_dir, "std_out.txt")
            self.std_out_logger = set_print_logger("marl", std_out_file)

    def info(self, msg):
        """ mimic logging.logger """
        if self.log_std_out:
            self.std_out_logger.info(msg)
        else:
            print(msg)

    def update_tensorboard_writer(self, epoch):
        self.tensorboard_writer = SummaryWriter(log_dir=self.log_dir, purge_step=epoch)

    def checkpoint_exists(self):
        return os.path.isfile(self.checkpoint_name)

    def load_checkpoint(self):
        return torch.load(self.checkpoint_name)

    def log_hyperparams(self, hyp_dict, file_name="hyperparams.json"):
        with open(os.path.join(self.log_dir, file_name), 'w') as fp:
            json.dump(hyp_dict, fp)

    def add_scalar(self, name, val, iter):
        self.logger.log(name, val, iter)
        if self.use_tensorboard:
            self.tensorboard_writer.add_scalar(name, val, iter)

    def add_histogram(self, name, val_array, iter):
        self.tensorboard_writer.add_histogram(name, val_array, iter)

    def add_histogram_dict(self, val_dict, iter):
        """ take in dict of named parameters (e.g. network weights) """
        if self.use_tensorboard:
            for k, v in val_dict.items():
                self.add_histogram(k, v, iter)

    def add_images(self, name, image_tensor, iter):
        """ images: (N,C,H,W) """ 
        self.tensorboard_writer.add_images(name, image_tensor, iter)

    def add_video(self, name, video_tensor, iter):
        """ videos: (T,H,W,C) """
        self.tensorboard_writer.add_video(name, video_tensor, iter, fps=4)

    def log_video(self, name, video, fps=20):
        """ video: rgb arrays 
        reference: https://imageio.readthedocs.io/en/stable/format_gif-pil.html
        """
        vid_kargs = {
            'fps': fps   # duration per frame
        }   
        vid_name = '{}/{}'.format(self.log_dir, name)
        mkdirs(os.path.dirname(vid_name))   # often is "videos/"
        imageio.mimsave(vid_name, video, **vid_kargs)

    def log_epoch(self, epoch, state, epoch_stats):
        assert 'epoch' in state
        assert 'model' in state
        assert 'optimizer' in state

        torch.save(state, self.checkpoint_name)
        for k, v in epoch_stats.items():
            self.add_scalar(k, v, epoch)
    
    def export_scalars_to_json(self, summary_path="summary.json"):
        self.tensorboard_writer.export_scalars_to_json(self.log_dir + "/" + summary_path)

    def close(self):
        if self.use_tensorboard:
            self.tensorboard_writer.close()
Exemplo n.º 24
0
class TBXLoggerCallback(TrainingCallback):
    """Logs Train results in TensorboardX format.

    Args:
        logdir (Optional[str]): Path to directory where the results file
            should be. If None, will be set by the Trainer.
        worker_to_log: Worker index to log. By default, will log the
            worker with index 0.
    """

    VALID_SUMMARY_TYPES: Tuple[type] = (
        int,
        float,
        np.float32,
        np.float64,
        np.int32,
        np.int64,
    )
    IGNORE_KEYS: Set[str] = {PID, TIMESTAMP, TIME_TOTAL_S}

    def __init__(self,
                 logdir: Optional[str] = None,
                 worker_to_log: int = 0) -> None:
        warnings.warn(
            _deprecation_msg,
            DeprecationWarning,
        )
        self._logdir_manager = _TrainCallbackLogdirManager(logdir=logdir)

        results_preprocessors = [
            IndexedResultsPreprocessor(indices=worker_to_log),
            ExcludedKeysResultsPreprocessor(excluded_keys=self.IGNORE_KEYS),
        ]
        self.results_preprocessor = SequentialResultsPreprocessor(
            results_preprocessors)

    def start_training(self, logdir: str, **info):
        self._logdir_manager.setup_logdir(default_logdir=logdir)

        try:
            from tensorboardX import SummaryWriter
        except ImportError:
            if log_once("tbx-install"):
                warnings.warn(
                    "pip install 'tensorboardX' to see TensorBoard files.")
            raise

        self._file_writer = SummaryWriter(str(self.logdir), flush_secs=30)

    def handle_result(self, results: List[Dict], **info):
        result = results[0]
        # Use TRAINING_ITERATION for step but remove it so it is not logged.
        step = result.pop(TRAINING_ITERATION)
        flat_result = flatten_dict(result, delimiter="/")
        path = ["ray", "train"]

        # same logic as in ray.tune.logger.TBXLogger
        for attr, value in flat_result.items():
            full_attr = "/".join(path + [attr])
            if isinstance(value,
                          self.VALID_SUMMARY_TYPES) and not np.isnan(value):
                self._file_writer.add_scalar(full_attr,
                                             value,
                                             global_step=step)
            elif (isinstance(value, list)
                  and len(value) > 0) or (isinstance(value, np.ndarray)
                                          and value.size > 0):

                # Must be video
                if isinstance(value, np.ndarray) and value.ndim == 5:
                    self._file_writer.add_video(full_attr,
                                                value,
                                                global_step=step,
                                                fps=20)
                    continue

                try:
                    self._file_writer.add_histogram(full_attr,
                                                    value,
                                                    global_step=step)
                # In case TensorboardX still doesn't think it's a valid value
                # (e.g. `[[]]`), warn and move on.
                except (ValueError, TypeError):
                    if log_once("invalid_tbx_value"):
                        warnings.warn(
                            "You are trying to log an invalid value ({}={}) "
                            "via {}!".format(full_attr, value,
                                             type(self).__name__))
        self._file_writer.flush()

    def finish_training(self, error: bool = False, **info):
        self._file_writer.close()

    @property
    def logdir(self) -> Path:
        return self._logdir_manager.logdir_path
Exemplo n.º 25
0
def main():
    base = c3d.C3D(with_classifier=False)
    model = ssl_net.SSLNET(base, with_classifier=True, num_classes=12)

    start_epoch = 1
    # pretrain_weight = loadcontinur_weights(pretrain_path)

    # model.load_state_dict(pretrain_weight, strict=False)
    # train
    train_dataset = UntrimmedVideoDataset(params['root'], mode="train")
    if params['data'] == 'UCF-101':
        val_size = 800
    elif params['data'] == 'hmdb':
        val_size = 400
    elif params['data'] == 'Thumos14':
        val_size = 400
    train_dataset, val_dataset = random_split(
        train_dataset, (len(train_dataset) - val_size, val_size))

    print("num_works:{:d}".format(params['num_workers']))
    print("batch_size:{:d}".format(params['batch_size']))
    train_loader = DataLoader(train_dataset,
                              batch_size=params['batch_size'],
                              shuffle=True,
                              num_workers=params['num_workers'])
    val_loader = DataLoader(val_dataset,
                            batch_size=params['batch_size'],
                            shuffle=True,
                            num_workers=params['num_workers'])
    model = nn.DataParallel(model)  #multi-gpu
    model = model.cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = optim.SGD(model.parameters(),
                          lr=params['learning_rate'],
                          momentum=params['momentum'],
                          weight_decay=params['weight_decay'])
    #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     'min',
                                                     min_lr=1e-5,
                                                     patience=50,
                                                     factor=0.1)

    #pretrain_model = pretrain_path.split('/')[-1].split('.')[0] + 'pth'

    model_save_dir = os.path.join(save_path,
                                  '_' + time.strftime('%m-%d-%H-%M'))
    writer = SummaryWriter(model_save_dir)

    for data in train_loader:
        clip, label = data
        writer.add_video('train/clips', clip, 0, fps=8)
        writer.add_text('train/idx', str(label.tolist()), 0)
        clip = clip.cuda()
        #writer.add_graph(model, (clip, clip));
        break
    for name, param in model.named_parameters():
        writer.add_histogram('params/{}'.format(name), param, 0)

    if not os.path.exists(model_save_dir):
        os.makedirs(model_save_dir)
    prev_best_val_loss = float('inf')
    prev_best_loss_model_path = None
    prev_best_acc_model_path = None
    best_acc = 0
    best_epoch = 0
    for epoch in tqdm(range(start_epoch, start_epoch + params['epoch_num'])):
        train(train_loader, model, criterion, optimizer, epoch, writer)
        val_loss, top1_avg = validation(val_loader, model, criterion,
                                        optimizer, epoch)
        if top1_avg >= best_acc:
            best_acc = top1_avg
            best_epoch = epoch
            model_path = os.path.join(
                model_save_dir, 'best_acc_model_{}.pth.tar'.format(epoch))
            torch.save(model.state_dict(), model_path)

            prev_best_acc_model_path = model_path
        if val_loss < prev_best_val_loss:
            model_path = os.path.join(
                model_save_dir, 'best_loss_model_{}.pth.tar'.format(epoch))
            torch.save(model.state_dict(), model_path)
            prev_best_val_loss = val_loss

            prev_best_loss_model_path = model_path
        scheduler.step(val_loss)
        if epoch % 20 == 0:
            checkpoints = os.path.join(model_save_dir, str(epoch) + ".pth.tar")
            torch.save(model.state_dict(), checkpoints)
            print("save_to:", checkpoints)
    print("best is :", best_acc, best_epoch)
Exemplo n.º 26
0
        ## PLAY GAME
        metrics['epsilon'] = eps.get(step)
        game = utils.play_game(env, agent = dqn_epsilon_agent, th = metrics['epsilon'], memory = memory)
        metrics['run_reward'], metrics['run_episode_steps'] = game['cum_reward'], game['steps']
        step += metrics['run_episode_steps']

        ## TRAIN
        for _ in range(metrics['run_episode_steps']//param['batch_size']):
            metrics['run_loss'] = train_batch(param)
            
        if metrics['episode'] % 500 == 0:
            target_dqn.load_state_dict(dqn.state_dict())

        # Test agent:
        if metrics['episode'] % 100 == 0:
            game = utils.play_game(env, agent = dqn_epsilon_agent, th = 0.02, memory = memory)
            metrics['test_reward'], metrics['test_episode_steps'] = game['cum_reward'], game['steps']
            checkpoint.save(dqn, step = step, step_loss = -metrics['test_reward'])


        # REPORTING
        if metrics['episode'] % 100 == 0:
            for key, val in metrics.items():
                writer.add_scalar(key, val, global_step = step)
                
        # Animate agent:
        if metrics['episode'] % 2500 == 0:
            print("episode: {}, step: {}, reward: {}".format(metrics['episode'], step, metrics['run_reward']))
            game = utils.play_game(env, agent = dqn_epsilon_agent, th = 0.02, render = True, memory = memory)
            writer.add_video("test_game", game['frames'], global_step = step)
Exemplo n.º 27
0
                  clip_grad_value=10)

    print(model)

    for itr in tqdm(range(args.gradient_steps)):
        try:
            batch = next(train_loader_iterator)
        except StopIteration:
            train_loader_iterator = iter(train_loader)
            batch = next(train_loader_iterator)
        batch = batch.to(device)
        batch_size, seq_len, *_ = batch.size()
        batch = batch.view(batch_size, seq_len, -1)
        batch = batch.transpose(0, 1)

        loss = model.train({"x": batch})
        writer.add_scalar('train_loss', loss, itr)

        with torch.no_grad():
            if itr % log_interval_num == 0:
                test_pred = model.pred(test_batch)
                test_loss = model.test(
                    {"x": batch.view(seq_len, batch_size, -1)})

                writer.add_scalar('test_loss', test_loss, itr)
                writer.add_video('test_pred', test_pred.transpose(0, 1), itr)
                writer.add_video('test_ground_truth',
                                 test_batch.transpose(0, 1), itr)

    writer.close()
Exemplo n.º 28
0
class Logger:
    def __init__(self, log_dir, n_logged_samples=10, summary_writer=None):
        self._log_dir = log_dir
        print('########################')
        print('logging outputs to ', log_dir)
        print('########################')
        self._n_logged_samples = n_logged_samples
        self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1)

    def log_scalar(self, scalar, name, step_):
        self._summ_writer.add_scalar('{}'.format(name), scalar, step_)

    def log_scalars(self, scalar_dict, group_name, step, phase):
        """Will log all scalars in the same plot."""
        self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step)

    def log_image(self, image, name, step):
        assert(len(image.shape) == 3)  # [C, H, W]
        self._summ_writer.add_image('{}'.format(name), image, step)

    def log_video(self, video_frames, name, step, fps=10):
        assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!"
        self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps)

    def log_paths_as_videos(self, paths, step, max_videos_to_save=2, fps=10, video_title='video'):
        print('***************logging video********************')
        # reshape the rollouts
        videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths]

        # max rollout length
        max_videos_to_save = np.min([max_videos_to_save, len(videos)])
        max_length = videos[0].shape[0]
        for i in range(max_videos_to_save):
            if videos[i].shape[0]>max_length:
                max_length = videos[i].shape[0]

        # pad rollouts to all be same length
        for i in range(max_videos_to_save):
            if videos[i].shape[0]<max_length:
                padding = np.tile([videos[i][-1]], (max_length-videos[i].shape[0],1,1,1))
                videos[i] = np.concatenate([videos[i], padding], 0)

        # log videos to tensorboard event file
        videos = np.stack(videos[:max_videos_to_save], 0)
        self.log_video(videos, video_title, step, fps=fps)

    def log_figures(self, figure, name, step, phase):
        """figure: matplotlib.pyplot figure handle"""
        assert figure.shape[0] > 0, "Figure logging requires input shape [batch x figures]!"
        self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)

    def log_figure(self, figure, name, step, phase):
        """figure: matplotlib.pyplot figure handle"""
        self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)

    def log_graph(self, array, name, step, phase):
        """figure: matplotlib.pyplot figure handle"""
        im = plot_graph(array)
        self._summ_writer.add_image('{}_{}'.format(name, phase), im, step)

    def dump_scalars(self, log_path=None):
        log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path
        self._summ_writer.export_scalars_to_json(log_path)

    def flush(self):
        self._summ_writer.flush()
Exemplo n.º 29
0
class Logger:
    def __init__(self, log_dir, n_logged_samples=10, summary_writer=None):
        self._log_dir = log_dir
        print('########################')
        print('logging outputs to ', log_dir)
        print('########################')
        self._n_logged_samples = n_logged_samples
        self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1)

    def log_scalar(self, scalar, name, step_):
        self._summ_writer.add_scalar('{}'.format(name), scalar, step_)

    def log_scalars(self, scalar_dict, group_name, step, phase):
        """Will log all scalars in the same plot."""
        self._summ_writer.add_scalars('{}_{}'.format(group_name, phase),
                                      scalar_dict, step)

    def log_image(self, image, name, step):
        assert (len(image.shape) == 3)  # [C, H, W]
        self._summ_writer.add_image('{}'.format(name), image, step)

    def log_video(self, video_frames, name, step, fps=10):
        assert len(
            video_frames.shape
        ) == 5, "Need [N, T, C, H, W] input tensor for video logging!"
        self._summ_writer.add_video('{}'.format(name),
                                    video_frames,
                                    step,
                                    fps=fps)

    def log_paths_as_videos(self,
                            paths,
                            step,
                            max_videos_to_save=2,
                            fps=10,
                            video_title='video'):

        # reshape the rollouts
        videos = [p['image_obs'] for p in paths]

        # max rollout length
        max_videos_to_save = np.min([max_videos_to_save, len(videos)])
        max_length = videos[0].shape[0]
        for i in range(max_videos_to_save):
            if videos[i].shape[0] > max_length:
                max_length = videos[i].shape[0]

        # pad rollouts to all be same length
        for i in range(max_videos_to_save):
            if videos[i].shape[0] < max_length:
                padding = np.tile([videos[i][-1]],
                                  (max_length - videos[i].shape[0], 1, 1, 1))
                videos[i] = np.concatenate([videos[i], padding], 0)

            clip = mpy.ImageSequenceClip(list(videos[i]), fps=fps)
            txt_clip = (mpy.TextClip(video_title, fontsize=30,
                                     color='white').set_position(
                                         'top', 'center').set_duration(10))

            video = mpy.CompositeVideoClip([clip, txt_clip])
            new_video_title = video_title + '{}_{}'.format(step, i) + '.mp4'
            filename = os.path.join(self._log_dir, new_video_title)
            video.write_videofile(filename, fps=fps)

    def log_figures(self, figure, name, step, phase):
        """figure: matplotlib.pyplot figure handle"""
        assert figure.shape[
            0] > 0, "Figure logging requires input shape [batch x figures]!"
        self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)

    def log_figure(self, figure, name, step, phase):
        """figure: matplotlib.pyplot figure handle"""
        self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)

    def log_graph(self, array, name, step, phase):
        """figure: matplotlib.pyplot figure handle"""
        im = plot_graph(array)
        self._summ_writer.add_image('{}_{}'.format(name, phase), im, step)

    def dump_scalars(self, log_path=None):
        log_path = os.path.join(
            self._log_dir,
            "scalar_data.json") if log_path is None else log_path
        self._summ_writer.export_scalars_to_json(log_path)

    def flush(self):
        self._summ_writer.flush()
Exemplo n.º 30
0
    elif args.dataset == 'K400':
        train_dataset = K400Dataset_train('data/K400', args.cl, args.split, True, train_transforms)
        val_dataset = K400Dataset_val('data/K400', args.cl, args.split, True, train_transforms)

    # split val for 800 videos
    #train_dataset, val_dataset = random_split(train_dataset, (len(train_dataset)-val_size, val_size))
    print('TRAIN video number: {}, VAL video number: {}.'.format(len(train_dataset), len(val_dataset)))
    train_dataloader = DataLoader(train_dataset, batch_size=args.bs, shuffle=True,
                                num_workers=args.workers, pin_memory=True)
    val_dataloader = DataLoader(val_dataset, batch_size=args.bs, shuffle=False,
                                num_workers=args.workers, pin_memory=True)

    # save graph and clips_order samples
    for data in train_dataloader:
        clips, idxs = data
        writer.add_video('train/clips', clips, 0, fps=8)
        writer.add_text('train/idxs', str(idxs.tolist()), 0)
        clips = clips.to(device)
        #writer.add_graph(model, clips)
        break
    # save init params at step 0
    for name, param in model.named_parameters():
        writer.add_histogram('params/{}'.format(name), param, 0)

    ### loss funciton, optimizer and scheduler ###
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD([
        {'params': [param for name, param in model.named_parameters() if 'linear' not in name and 'conv5' not in name and 'conv4' not in name]},
        {'params': [param for name, param in model.named_parameters() if 'linear' in name or 'conv5' in name or 'conv4' in name], 'lr': args.ft_lr}],
        lr=args.lr, momentum=args.momentum, weight_decay=args.wd)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', min_lr=1e-5, patience=50, factor=0.1)
Exemplo n.º 31
0
class Logger:
    def __init__(self, log_dir, n_logged_samples=3, summary_writer=None):
        self._log_dir = log_dir
        self._n_logged_samples = n_logged_samples
        if summary_writer is not None:
            self._summ_writer = summary_writer
        else:
            self._summ_writer = SummaryWriter(log_dir)

    def log_scalar(self, scalar, name, step, phase):
        self._summ_writer.add_scalar('{}_{}'.format(name, phase), scalar, step)

    def log_scalars(self, scalar_dict, group_name, step, phase):
        """Will log all scalars in the same plot."""
        self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step)

    def log_images(self, image, name, step, phase):
        image = self._format_input(image)
        self._check_size(image, 4)   # [N, C, H, W]
        self._loop_batch(self._summ_writer.add_image, '{}_{}'.format(name, phase), image, step)

    def log_gif(self, gif_frames, name, step, phase):
        if isinstance(gif_frames, list): gif_frames = np.concatenate(gif_frames)
        gif_frames = self._format_input(gif_frames)
        assert len(gif_frames.shape) == 4, "Need [T, C, H, W] input tensor for single video logging!"
        gif_frames = gif_frames.unsqueeze(0)    # add an extra dimension to get grid of size 1
        self._summ_writer.add_video('{}_{}'.format(name, phase), gif_frames, step, fps=10)
        
    def log_graph(self, array, name, step, phase):
        """array gets plotted with plt.plot"""
        im = torch.tensor(plot_graph(array).transpose(2, 0, 1))
        self._summ_writer.add_image('{}_{}'.format(name, phase), im, step)

    def dump_scalars(self, log_path=None):
        log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path
        self._summ_writer.export_scalars_to_json(log_path)

    def _loop_batch(self, fn, name, val, *argv, **kwargs):
        """Loops the logging function n times."""
        for log_idx in range(min(self._n_logged_samples, len(val))):
            name_i = os.path.join(name, "_%d" % log_idx)
            fn(name_i, val[log_idx], *argv, **kwargs)

    def visualize(self, *args, **kwargs):
        """Subclasses can implement this method to visualize training results."""
        pass

    @staticmethod
    def _check_size(val, size):
        if isinstance(val, torch.Tensor) or isinstance(val, np.ndarray):
            assert len(val.shape) == size, "Size of tensor does not fit required size, {} vs {}".format(len(val.shape),
                                                                                                        size)
        elif isinstance(val, list):
            assert len(val[0].shape) == size - 1, "Size of list element does not fit required size, {} vs {}".format(
                len(val[0].shape), size - 1)
        else:
            raise NotImplementedError("Input type {} not supported for dimensionality check!".format(type(val)))
        if (val[0].shape[1] > 10000) or (val[0].shape[2] > 10000):
            print("Logging very large image with size {}px.".format(max(val[0].shape[1], val[0].shape[2])))
            raise ValueError("This might be a bit too much")

    @staticmethod
    def _format_input(arr):
        if not isinstance(arr, torch.Tensor): arr = torch.tensor(arr)
        if not (arr.shape[1] == 3 or arr.shape[1] == 1): arr = arr.permute(0, 3, 1, 2)
        arr = arr.float()
        return arr

    def __del__(self):
        self._summ_writer.close()
        print("Closed summary writer.")