Пример #1
0
def write_weight_statitsics(writer: torch.utils.tensorboard.SummaryWriter,
                            module: torch.nn.Module, epoch: int):
    # log the weights-norm for the parameters of the model
    for weight_name in module.state_dict().keys():
        w = module.state_dict()[weight_name]
        norm = w.norm().item()
        writer.add_scalar(f'Norm/{weight_name}', norm, epoch)
        avg = w.abs().mean().item()
        writer.add_scalar(f'avg/{weight_name}', avg, epoch)
        writer.add_histogram(f'hist/{weight_name}', w, epoch)
Пример #2
0
def log_results(writer: torch.utils.tensorboard.SummaryWriter,
                accuracy_train: float, loss_train: float,
                accuracy_validation: float, loss_validation: float,
                epoch: int):
    """ Log accuracies and losses for training and validation in given epoch.

    Arguments
    ----------
    writer : torch.utils.tensorboard.SummaryWriter
         Entry to log data for consumption and visualization by TensorBoard
    accuracy_train : float
        Training accuracy
    loss_train : float
        Training loss
    accuracy_validation : float
        Validation accuracy
    loss_validation : float
        Validation loss
    epoch : int
        Number of epochs, where above results were obtained
    """
    writer.add_scalar('Train/Accuracy', accuracy_train, epoch)
    writer.add_scalar('Train/Loss', loss_train, epoch)
    writer.add_scalar('Validation/Accuracy', accuracy_validation, epoch)
    writer.add_scalar('Validation/Loss', loss_validation, epoch)
Пример #3
0
def train_pcoders(net: torch.nn.Module, optimizer: torch.optim.Optimizer, loss_function: Callable, epoch: int, train_loader: torch.utils.data.DataLoader, device: str, writer: torch.utils.tensorboard.SummaryWriter=None):
    r"""
    Trains the feedback modules of PCoders using a distance between the prediction of a PCoder and the
    representation of the PCoder below.

    Args:
        net (torch.nn.Module): Predified network including all the PCoders
        optimizer (torch.optim.Optimizer): PyTorch-compatible optimizer object
        loss_function (Callable): A callable function that receives two tensors and returns the distance between them
        epoch (int): Training epoch number
        train_loader (torch.utils.data.DataLoader): DataLoader for training samples
        writer (torch.utils.tensorboard.SummaryWrite, optional): Tensorboard summary writer to track training history. Default: None
        device (str): Training device (e.g. 'cpu', 'cuda:0')
    """
    
    net.train()
    net.backbone.eval()

    nb_trained_samples = 0
    for batch_index, (images, _) in enumerate(train_loader):
        net.reset()
        images = images.to(device)
        optimizer.zero_grad()
        outputs = net(images)
        for i in range(net.number_of_pcoders):
            if i == 0:
                a = loss_function(net.pcoder1.prd, images)
                loss = a
            else:
                pcoder_pre = getattr(net, f"pcoder{i}")
                pcoder_curr = getattr(net, f"pcoder{i+1}")
                a = loss_function(pcoder_curr.prd, pcoder_pre.rep)
                loss += a
            if writer is not None:
                writer.add_scalar(f"MSE Train/PCoder{i+1}", a.item(), (epoch-1) * len(train_loader) + batch_index)
        
        nb_trained_samples += images.shape[0]

        loss.backward()
        optimizer.step()

        print('Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}'.format(
            loss.item(),
            epoch=epoch,
            trained_samples=nb_trained_samples,
            total_samples=len(train_loader.dataset)
        ))
        if writer is not None:
            writer.add_scalar(f"MSE Train/Sum", loss.item(), (epoch-1) * len(train_loader) + batch_index)
Пример #4
0
def eval_pcoders(net: torch.nn.Module, loss_function: Callable, epoch: int, eval_loader: torch.utils.data.DataLoader, device: str, writer: torch.utils.tensorboard.SummaryWriter=None):
    r"""
    Evaluates the feedback modules of PCoders using a distance between the prediction of a PCoder and the
    representation of the PCoder below.

    Args:
        net (torch.nn.Module): Predified network including all the PCoders
        loss_function (Callable): A callable function that receives two tensors and returns the distance between them
        epoch (int): Evaluation epoch number
        test_loader (torch.utils.data.DataLoader): DataLoader for evaluation samples
        writer (torch.utils.tensorboard.SummaryWrite, optional): Tensorboard summary writer to track evaluation history. Default: None
        device (str): Training device (e.g. 'cpu', 'cuda:0')
    """

    net.eval()

    final_loss = [0 for i in range(net.number_of_pcoders)]
    for batch_index, (images, _) in enumerate(eval_loader):
        net.reset()
        images = images.to(device)
        with torch.no_grad():
            outputs = net(images)
        for i in range(net.number_of_pcoders):
            if i == 0:
                final_loss[i] += loss_function(net.pcoder1.prd, images).item()
            else:
                pcoder_pre = getattr(net, f"pcoder{i}")
                pcoder_curr = getattr(net, f"pcoder{i+1}")
                final_loss[i] += loss_function(pcoder_curr.prd, pcoder_pre.rep).item()
    
    loss_sum = 0
    for i in range(net.number_of_pcoders):
        final_loss[i] /= len(eval_loader)
        loss_sum += final_loss[i]
        if writer is not None:
            writer.add_scalar(f"MSE Eval/PCoder{i+1}", final_loss[i], epoch-1)
            
            
    print('Training Epoch: {epoch} [{evaluated_samples}/{total_samples}]\tLoss: {:0.4f}'.format(
        loss_sum,
        epoch=epoch,
        evaluated_samples=len(eval_loader.dataset),
        total_samples=len(eval_loader.dataset)
    ))
    if writer is not None:
        writer.add_scalar(f"MSE Eval/Sum", loss_sum, epoch-1)
Пример #5
0
def run_environment(args: argparse.Namespace,
                    device: str = 'cpu',
                    logger: torch.utils.tensorboard.SummaryWriter = None):

    # ==
    # Set up environment
    env = gym.make(args.env_name)
    env = MiniGridFlatWrapper(env,
                              use_tensor=False,
                              scale_observation=True,
                              scale_min=0,
                              scale_max=10)

    # ==
    # Set up agent
    agent = init_agent(args, env, device=device)

    # ==
    # Start training
    print(f'Starting training, {args.num_episode} episodes')
    for episode_idx in range(args.num_episode):
        # Reset environment and agent
        observation = env.reset()
        action = agent.begin_episode(observation)

        # Counters
        cumu_reward = 0.0
        timestep = 0

        # (optional) Record video
        video = None
        max_vid_len = 200
        if args.video_freq is not None:
            if episode_idx % int(args.video_freq) == 0:
                # Render first frame and insert to video array
                frame = env.render()
                video = np.zeros(shape=((max_vid_len, ) + frame.shape),
                                 dtype=np.uint8)  # (max_vid_len, C, W, H)
                video[0] = frame

        while True:
            # ==
            # Interact with environment
            observation, reward, done, info = env.step(action)
            action = agent.step(observation, reward, done)

            # ==
            # Counters
            cumu_reward += reward
            timestep += 1

            # ==
            # Optional video
            if video is not None:
                if timestep < max_vid_len:
                    video[timestep] = env.render()

            # ==
            # Episode done
            if done:
                # Logging
                if args.log_dir is not None:
                    # Add reward
                    logger.add_scalar('Reward',
                                      cumu_reward,
                                      global_step=episode_idx)
                    # Optionally add video
                    if video is not None:
                        # Determine last frame
                        last_frame_idx = timestep + 2
                        if last_frame_idx > max_vid_len:
                            last_frame_idx = max_vid_len

                        # Change to tensor
                        vid_tensor = torch.tensor(
                            video[:last_frame_idx, :, :, :], dtype=torch.uint8)
                        vid_tensor = vid_tensor.unsqueeze(0)

                        # Add to tensorboard
                        logger.add_video('Run_Video',
                                         vid_tensor,
                                         global_step=episode_idx,
                                         fps=8)

                    # Occasional print
                    if episode_idx % 100 == 0:
                        print(
                            f'Epis {episode_idx}, Timesteps: {timestep}, Return: {cumu_reward}'
                        )

                else:
                    print(
                        f'Epis {episode_idx}, Timesteps: {timestep}, Return: {cumu_reward}'
                    )

                # Agent logging TODO: not sure if this is the best practice
                agent.report(logger=logger, episode_idx=episode_idx)
                break

            # TODO: have some debugging print-out (e.g. every 100 episode) to make sure times and
            # things are good and training is happening

    env.close()
    if args.log_dir is not None:
        logger.close()
Пример #6
0
def run_environment(args: argparse.Namespace,
                    logger: torch.utils.tensorboard.SummaryWriter = None):
    # ==
    # Initialize environment and agent
    env, agent = _init_env_agent(args)

    # ==
    # Save the transition matrix for later comparison
    env_trans = env.get_transition_matrix()

    # ==
    # Start training
    print(f'Start training for {args.num_episode} episodes')
    for episode_idx in range(args.num_episode):
        # Reset counter variables
        cumulative_reward = 0.0
        steps = 0

        # Reset environment and agent
        observation = env.reset()
        action = agent.begin_episode(observation)

        # ==
        # Run episode
        while True:
            # Interaction
            observation, reward, done, info = env.step(action)
            action = agent.step(observation, reward, done)

            # Counter variables
            cumulative_reward += reward
            steps += 1

            # TODO: need some way of recording the *recent* state occupancy
            # to evaluate the agent behaviour

            # ==
            # If done
            if done or steps >= args.max_steps:
                # ==
                # Compute error
                if episode_idx % 100 == 0:
                    t_err = get_transition_estimation_error(env, agent)

                # ==
                # Log
                if logger is None:
                    print(episode_idx, steps, cumulative_reward)
                else:
                    logger.add_scalar('Cumulative_reward', cumulative_reward,
                                      global_step=episode_idx)
                    logger.add_scalar('Steps', steps,
                                      global_step=episode_idx)
                    logger.add_scalar('Trans_l2_error', t_err,
                                      global_step=episode_idx)

                    if episode_idx % 100 == 0:
                        print(episode_idx, steps, cumulative_reward)

                # Agent self-report
                agent.report(logger=logger, episode_idx=episode_idx)

                break

    env.close()
Пример #7
0
    def forward(self,
                x,
                opt: optim.Optimizer,
                step,
                summary_writer: torch.utils.tensorboard.SummaryWriter = None,
                sample_gpu=None):
        """
        train inside forward
        """
        opt.zero_grad()
        batch_size, num_pts = x.shape[:2]
        z_mu, z_sigma = self.encoder(x)
        # Compute Q(z|X) and entropy H{Q(z|X)}
        if self.use_deterministic_encoder:
            z = z_mu + 0 * z_sigma  # ? why, the original code added this 0 multiplier
            entropy = torch.zeros(batch_size).to(z)
        else:
            z = self.reparametrized_gaussian(z_mu, z_sigma)
            entropy = self.gaussian_entropy(z_sigma)

        # Compute prior P(z)
        if self.use_latent_flow:
            w, dlog_pw = self.latentCNF(z, None,
                                        torch.zeros(batch_size, 1).to(z))
            log_pw = standard_normal_logp(w).view(batch_size,
                                                  -1).sum(dim=1, keepdim=True)
            dlog_pw = dlog_pw.view(batch_size, 1).to(z)
            log_pz = log_pw - dlog_pw
        else:
            log_pz = torch.zeros(batch_size, 1).to(z)

        # Compute recon. P(X|z)
        z_new = z.view(z.shape) + (log_pz * 0.).mean()  # ? why
        y, dlog_py = self.pointCNF(x, z_new,
                                   torch.zeros(batch_size, num_pts, 1).to(x))
        log_py = standard_normal_logp(y).view(batch_size, -1).sum(dim=1,
                                                                  keepdim=True)
        dlog_py = dlog_py.view(batch_size, num_pts, 1).to(x)
        log_px = log_py - dlog_py

        # Loss
        entropy_loss = -entropy.mean() * self.entropy_w
        recon_loss = -log_px.mean() * self.recon_w
        prior_loss = -log_pz.mean() * self.prior_w
        loss = entropy_loss + recon_loss + prior_loss
        loss.backward()
        opt.step()

        # Write logs
        if self.distributed:
            raise NotImplementedError("Distributed training not implemented!")
        else:
            entropy_log = entropy.mean()
            recon_log = -log_px.mean()
            prior_log = -log_pz.mean()

        recon_nats = recon_log / float(x.size(1) * x.size(2))
        prior_nats = prior_log / float(self.fz)

        # reconstruct to save
        with torch.no_grad():
            recon_pc = self.reconstruct(x, truncate_std=True)
            recon_im = visualize(recon_pc,
                                 path='/home/tmp/screenshot.png',
                                 samples=1)

        # sample to save
        if self.use_latent_flow:
            with torch.no_grad():
                sample_pc = self.sample(1, 1024, gpu=sample_gpu)
                sample_im = visualize(sample_pc,
                                      samples=1,
                                      path='/home/tmp/screenshot.png')

        record_dict = {
            'train/entropy':
            entropy_log.cpu().detach().item()
            if not isinstance(entropy_log, float) else entropy_log,
            'train/prior':
            prior_log,
            'train/recon':
            recon_log,
            'train/recon-nats':
            recon_nats,
            'train/prior-nats':
            prior_nats,
            # 'train/sample-reconstructed': recon_pc
        }

        if summary_writer is not None:
            for key, value in record_dict:
                summary_writer.add_scalar(key, value, step)

        record_dict['train/sample-reconstructed'] = recon_im
        summary_writer.add_images('train/sample-reconstructed',
                                  recon_im,
                                  step,
                                  dataformats='NHWC')
        record_dict['train/sample-sampled'] = sample_im
        summary_writer.add_images('train/sample-sampled',
                                  sample_im,
                                  step,
                                  dataformats='NHWC')
        return record_dict
Пример #8
0
def run_environment(config: configparser.ConfigParser,
                    device: str = 'cpu',
                    logger: torch.utils.tensorboard.SummaryWriter = None):
    # =========
    # Set up environment
    config_env_name = config['Training']['env_name']
    config_seed = config['Training'].getint('seed')
    env = gym.make(config_env_name)
    env = MiniGridFlatWrapper(env,
                              use_tensor=False,
                              scale_observation=False,
                              scale_min=0,
                              scale_max=10)
    env.seed(config_seed)

    # =========
    # Set up agent
    agent = init_agent(config, env, device=device)

    # =========
    # Start training

    # Extract training variables
    config_num_episodes = config['Training'].getint('num_episode')
    config_record_video = config['Video'].getboolean('record')
    config_video_freq = config['Video'].getint('frequency')
    config_video_maxlen = config['Video'].getint('max_length')
    config_video_fps = config['Video'].getint('fps')

    # Train
    print(f'Starting training, {config_num_episodes} episodes')
    for episode_idx in range(config_num_episodes):
        # ==
        # Reset environment and agent
        observation = env.reset()
        action = agent.begin_episode(observation)

        # Counters
        cumu_reward = 0.0
        timestep = 0

        # ==
        # (optional) Record video
        video = None
        if config_record_video:
            if episode_idx % int(config_video_freq) == 0:
                # Render first frame and insert to video array
                frame = env.render()
                video = np.zeros(shape=((config_video_maxlen, ) + frame.shape),
                                 dtype=np.uint8)  # (max_vid_len, C, W, H)
                video[0] = frame

        # ==
        # Run episode
        while True:
            # ==
            # Interact with environment
            observation, reward, done, info = env.step(action)
            action = agent.step(observation, reward, done)

            # ==
            # Counters
            cumu_reward += reward
            timestep += 1

            # ==
            # Optional video
            if video is not None:
                if timestep < config_video_maxlen:
                    video[timestep] = env.render()

            # ==
            # Episode done
            if done:
                # Logging
                if logger is not None:
                    # Add reward
                    logger.add_scalar('Reward',
                                      cumu_reward,
                                      global_step=episode_idx)
                    # Optionally add video
                    if video is not None:
                        # Determine last frame
                        last_frame_idx = timestep + 2
                        if last_frame_idx > config_video_maxlen:
                            last_frame_idx = config_video_maxlen

                        # Change to tensor
                        vid_tensor = torch.tensor(
                            video[:last_frame_idx, :, :, :], dtype=torch.uint8)
                        vid_tensor = vid_tensor.unsqueeze(0)

                        # Add to tensorboard
                        logger.add_video('Run_Video',
                                         vid_tensor,
                                         global_step=episode_idx,
                                         fps=config_video_fps)

                    # Occasional print
                    if episode_idx % 100 == 0:
                        print(
                            f'Epis {episode_idx}, Timesteps: {timestep}, Return: {cumu_reward}'
                        )

                else:
                    print(
                        f'Epis {episode_idx}, Timesteps: {timestep}, Return: {cumu_reward}'
                    )

                # Agent logging TODO: not sure if this is the best practice
                agent.report(logger=logger, episode_idx=episode_idx)
                break

            # TODO: have some debugging print-out (e.g. every 100 episode) to make sure times and
            # things are good and training is happening

    env.close()
    if logger is not None:
        logger.close()