Пример #1
0
    def after_epoch(self):
        self.learner.final_record = self.log[1:].copy()
        self.values.append(self.learner.final_record)

        self.log.append(format_time(time.time() - self.start_epoch))
        self.logger(self.log)
        self.iters.append(self.smooth_loss.count)
Пример #2
0
 def after_epoch(self):
     stats = [str(self.epoch)]
     for o in [self.train_metrics, self.valid_metrics]:
         for m in o:
             stats += [f"{m.value:.6f}"]
     stats += [format_time(time.time() - self.start_time)]
     self.logger(stats)
Пример #3
0
 def after_epoch(self):
     stats = [str(self.epoch)]
     for o in [self.train_stats, self.valid_stats]:
         #now storing stats in array, and pass to logger
         stats += [f'{v:.6f}' for v in o.avg_stats]
     stats += [format_time(time.time() - self.start_time)]
     self.logger(stats)
Пример #4
0
 def format_stats(self, stats:TensorOrNumList)->None:
     "Format stats before printing."
     str_stats = []
     for name,stat in zip(self.names,stats):
         str_stats.append('#na#' if stat is None else str(stat) if isinstance(stat, int) else f'{stat:.6f}')
     if self.add_time: str_stats.append(format_time(time() - self.start_epoch))
     if not self.silent: self.pbar.write(str_stats, table=True)
Пример #5
0
 def after_epoch(self):
     "Store and log the loss/metric values"
     self.learn.final_record = self.log[1:].copy()
     self.values.append(self.learn.final_record)
     if self.add_time: self.log.append(format_time(time.time() - self.start_epoch))
     self.logger(self.log)
     self.iters.append(self.smooth_loss.count)
Пример #6
0
 def after_epoch(self):
     "Store and log the loss/metric values"
     self.learn.final_record = self.log[1:].copy()  # first element is epoch
     self.values.append(self.learn.final_record)
     if self.show_time:
         self.log.append(format_time(time.time() - self.epoch_start_time))
     self.logger(self.log)
     self.iters.append(self.train_loss.count)
Пример #7
0
 def on_epoch_end(self, epoch: int, smooth_loss: Tensor, last_metrics: MetricsList, **kwargs: Any) -> bool:
     "Add a line with `epoch` number, `smooth_loss` and `last_metrics`."
     last_metrics = ifnone(last_metrics, [])
     stats = [str(stat) if isinstance(stat, int) else '#na#' if stat is None else f'{stat:.6f}'
              for name, stat in zip(self.learn.recorder.names, [epoch, smooth_loss] + last_metrics)]
     if self.add_time: stats.append(format_time(time() - self.start_epoch))
     str_stats = ','.join(stats)
     self.file.write(str_stats + '\n')
Пример #8
0
 def _format_stats(self, stats: TensorOrNumList) -> None:
     """Format stats before printing. Note, this does the same thing as Recorder's"""
     str_stats = []
     for name, stat in zip(self.names, stats):
         str_stats.append("#na#" if stat is None else str(stat)
                          if isinstance(stat, int) else f"{stat:.6f}")
     str_stats.append(format_time(time() - self.start_epoch))
     self.pbar.write(str_stats, table=True)
Пример #9
0
 def after_epoch(self, e: Event):
     #print(self.train_stats)
     #print(self.valid_stats)
     stats = [str(e.learn.epoch)]
     for o in [self.train_stats, self.valid_stats]:
         stats += [f'{v:.6f}' for v in o.avg_stats]
     stats += [format_time(time.time() - self.start_time)]
     e.learn.logger(stats)
Пример #10
0
    def fit(self, num_epochs, args, device='cuda:0'):
        """
        Fit the PyTorch model
        :param num_epochs: number of epochs to train (int)
        :param args:
        :param device: str (defaults to 'cuda:0')
        """
        optimizer, scheduler, step_scheduler_on_batch = self.optimizer(args)
        self.model = self.model.to(device)
        pbar = master_bar(range(num_epochs))
        headers = [
            'Train_Loss', 'Val_Loss', 'F1-Macro', 'F1-Micro', 'JS', 'Time'
        ]
        pbar.write(headers, table=True)
        for epoch in pbar:
            epoch += 1
            start_time = time.time()
            self.model.train()
            overall_training_loss = 0.0
            for step, batch in enumerate(
                    progress_bar(self.train_data_loader, parent=pbar)):
                loss, num_rows, _, _ = self.model(batch, device)
                overall_training_loss += loss.item() * num_rows

                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               max_norm=1.0)
                optimizer.step()
                if step_scheduler_on_batch:
                    scheduler.step()
                optimizer.zero_grad()

            if not step_scheduler_on_batch:
                scheduler.step()

            overall_training_loss = overall_training_loss / len(
                self.train_data_loader.dataset)
            overall_val_loss, pred_dict = self.predict(device, pbar)
            y_true, y_pred = pred_dict['y_true'], pred_dict['y_pred']

            str_stats = []
            stats = [
                overall_training_loss, overall_val_loss,
                f1_score(y_true, y_pred, average="macro"),
                f1_score(y_true, y_pred, average="micro"),
                jaccard_score(y_true, y_pred, average="samples")
            ]

            for stat in stats:
                str_stats.append('NA' if stat is None else str(stat)
                                 if isinstance(stat, int) else f'{stat:.4f}')
            str_stats.append(format_time(time.time() - start_time))
            print('epoch#: ', epoch)
            pbar.write(str_stats, table=True)
            self.early_stop(overall_val_loss, self.model)
            if self.early_stop.early_stop:
                print("Early stopping")
                break
Пример #11
0
 def on_epoch_end(self, epoch: int, smooth_loss: Tensor, last_metrics: MetricsList, **kwargs: Any) -> bool:
     "Add a line with `epoch` number, `smooth_loss` and `last_metrics`."
     last_metrics = ifnone(last_metrics, [])
     stats = [str(stat) if isinstance(stat, int) else '#na#' if stat is None else f'{stat:.6f}'
              for name, stat in zip(self.learn.recorder.names, [epoch, smooth_loss] + last_metrics)]
     if self.add_time: stats.append(format_time(time() - self.start_epoch))
     str_stats = ','.join(stats)
     if not self.gpu: self.file.write(str_stats + '\n')
     self.file.flush()
     os.fsync(self.file.fileno())
Пример #12
0
 def after_epoch(self):
     stats = [str(self.epoch)] # Begin with the epoch's number.
     for o in [self.train_stats, self.valid_stats]:
         # And nicely format all other metrics to be displayed.
         stats += [f'{v:.6f}' for v in o.avg_stats]
     # Also ensure total duration of is displayed.
     stats += [format_time(time.time() - self.start_time)]
     # Use Learner's self.logger function to display the metrics.
     # print is the default but we will pass fastprogress'
     # write() function to self.logger.
     self.logger(stats)
Пример #13
0
 def after_epoch(self):
     stats = [str(self.epoch)]
     for o in [self.train_stats, self.valid_stats]:
         stats += [f"{v:.6f}" for v in o.avg_stats]
     stats += [format_time(time.time() - self.start_time)]
     writer.add_scalar("Loss/train", float(stats[1]), int(self.epoch))
     writer.add_scalar("Accuracy/train", float(stats[2]), int(self.epoch))
     writer.add_scalar("Loss/valid", float(stats[3]), int(self.epoch))
     writer.add_scalar("Accuracy/train", float(stats[4]), int(self.epoch))
     writer.add_scalar(
         "Time/epoch", float(self.time_seconds(str(stats[5]))), int(self.epoch)
     )
Пример #14
0
 def on_epoch_end(self, epoch, smooth_loss, last_metrics, **kwargs):
     "Add a line with `epoch` number, `smooth_loss` and `last_metrics`."
     last_metrics = last_metrics if last_metrics is not None else []
     metrics = zip(self.learn.recorder.names,
                   [epoch, smooth_loss] + last_metrics)
     stats = [
         str(stat) if isinstance(stat, int) else
         '#na#' if stat is None else f'{stat:.6f}' for name, stat in metrics
     ]
     if self.add_time:
         stats.append(format_time(time.time() - self.start_epoch))
     str_stats = ','.join(stats)
     with self.path.open('a') as f:
         f.write(str_stats + '\n')
Пример #15
0
    def after_epoch(self):
        try:
            rec = self.learn.recorder

            names = [str(m) for m in rec.metric_names]
            values = [rec.epoch] + [f"{r:.3f}" for r in rec.final_record] + [
                format_time(time() - self.start_epoch_time)
            ]
            if len(names) == len(values) + 1:
                names = names[:1] + names[
                    2:]  # remove train_loss when validating only
            msg = '\n'.join(
                [f"{name}: {value}" for name, value in zip(names, values)])
            msg = self.pre_msg_txt + '\n\n' + msg + '\n-------------\n'

            self.async_send_message(msg)
        except Exception as e:
            warn("Could not deliver message. Error: " + str(e), RuntimeWarning)
Пример #16
0
    def predict(self, device='cuda:0', pbar=None):
        """
        Evaluate the model on a validation set
        :param device: str (defaults to 'cuda:0')
        :param pbar: fast_progress progress bar (defaults to None)
        :returns: None
        """
        self.model.to(device).load_state_dict(torch.load(self.model_path))
        self.model.eval()
        current_size = len(self.test_data_loader.dataset)
        preds_dict = {
            'y_true': np.zeros([current_size, 11]),
            'y_pred': np.zeros([current_size, 11])
        }
        start_time = time.time()
        with torch.no_grad():
            index_dict = 0
            for step, batch in enumerate(
                    progress_bar(self.test_data_loader,
                                 parent=pbar,
                                 leave=(pbar is not None))):
                _, num_rows, y_pred, targets = self.model(batch, device)
                current_index = index_dict
                preds_dict['y_true'][current_index:current_index +
                                     num_rows, :] = targets
                preds_dict['y_pred'][current_index:current_index +
                                     num_rows, :] = y_pred
                index_dict += num_rows

        y_true, y_pred = preds_dict['y_true'], preds_dict['y_pred']
        str_stats = []
        stats = [
            f1_score(y_true, y_pred, average="macro"),
            f1_score(y_true, y_pred, average="micro"),
            jaccard_score(y_true, y_pred, average="samples")
        ]

        for stat in stats:
            str_stats.append('NA' if stat is None else str(stat) if isinstance(
                stat, int) else f'{stat:.4f}')
        str_stats.append(format_time(time.time() - start_time))
        headers = ['F1-Macro', 'F1-Micro', 'JS', 'Time']
        print(' '.join('{}: {}'.format(*k) for k in zip(headers, str_stats)))
    def on_epoch_end(self, epoch: int, smooth_loss: Tensor,
                     last_metrics: MetricsList, **kwargs) -> bool:
        "Add a line with `epoch` number, `smooth_loss` and `last_metrics`."
        msg = ','.join(
            self.learn.recorder.names[:(None if self.add_time else -1)]) + '\n'
        last_metrics = ifnone(last_metrics, [])
        stats = [
            str(stat) if isinstance(stat, int) else
            '#na#' if stat is None else f'{stat:.6f}' for name, stat in zip(
                self.learn.recorder.names, [epoch, smooth_loss] + last_metrics)
        ]
        if self.add_time: stats.append(format_time(time() - self.start_epoch))
        str_stats = ','.join(stats)
        msg = msg + str_stats + '\n'

        try:
            self.bot.send_message(chat_id=self.chat_id, text=msg)
        except Exception as e:
            warn("Could not deliver message. Error: " + str(e), RuntimeWarning)
Пример #18
0
    def after_epoch(self):
        if not getattr(self.learn,'inner_xla',False):
            return # skip if not spawned

        if 'recorder' not in self.learn.cbs.attrgot('name'):
            all_metrics = {
                'train_mets': L([]),
                'valid_mets': L([]),
            }
        else:
            all_metrics = {
                'train_mets': self.recorder._train_mets,
                'valid_mets': self.recorder._valid_mets,
            }
        # send metrics data to sync ranks across spawned processes
        device = self.learn.xla_training.pdevice
        packed_metrics = pack_metrics(all_metrics, device) # convert metrics to tensor list on TPU
        reduced_metrics = xm.all_reduce(xm.REDUCE_SUM, packed_metrics)
        xm.mark_step()
        if xm.is_master_ordinal():
            all_metrics = restore_metrics(reduced_metrics, all_metrics) # convert list to metric objects
            for m in self.recorder._train_mets:
                self.sync_log += _maybe_item(m)

            for m in self.recorder._valid_mets:
                self.sync_log += _maybe_item(m)

            self.learn.final_record = self.sync_log[1:].copy()
            del self.recorder.values[-1] # remove last entry added by recorder
            self.recorder.values.append(self.learn.final_record) # add updated metrics
            if self.recorder.add_time:
                updated_time = (time.time() - self.recorder.start_epoch)
                self.sync_log.append(format_time(updated_time))
            self.recorder.log = self.sync_log
            self._sync_stats_log(self.sync_log) # write_stats to output
            self.learn.logger = self.orig_logger # restore orig logger after skipping recorder.logger(log)
Пример #19
0
 def after_epoch(self):
     "Store and log the loss/metric values"
     self.values.append(self.log[1:].copy())
     if self.add_time: self.log.append(format_time(time.time() - self.start_epoch))
     self.logger(self.log)
Пример #20
0
 def after_epoch(self):
     stats = [str(self.epoch)]
     for o in [self.train_stats, self.valid_stats]:
         stats += [f'{v:.6f}' for v in o.avg_stats]
     stats += [format_time(time.time() - self.start_time)]
     self.logger(stats)
Пример #21
0
    def fit(self,
            num_epochs,
            scheduler,
            step_scheduler_on_batch,
            loss_function,
            optimizer,
            device='cuda:0',
            best_model=False):
        """
        Fit the PyTorch model

        Parameters
        ----------
        num_epochs: int
            number of epochs to train
        scheduler: torch.optim.lr_scheduler
            PyTorch learning rate scheduler
        step_scheduler_on_batch: bool
            flag of whether to step scheduler on batch (if True) or on epoch (if False)
        loss_function: function
            function to calculate loss with in model
        optimizer: torch.optim
            PyTorch optimzer
        device: str (defaults to 'cuda:0')
            device to run calculations on
        best_model: bool (defaults to `False`)
            flag to save best model from a single `fit` training run based on validation loss
            The default is `False`, which will keep the final model from the training run.
            `True` will keep the best model from the training run instead of the model
            from the final epoch of the training cycle.
        """
        self.model = self.model.to(device)

        current_best_loss = np.iinfo(np.intp).max

        pbar = master_bar(range(num_epochs))
        headers = ['train_loss', 'val_loss']
        for task in self.tasks:
            headers.append(f'{task}_train_loss')
            headers.append(f'{task}_val_loss')
            headers.append(f'{task}_acc')
        headers.append('time')
        pbar.write(headers, table=True)

        for epoch in pbar:
            start_time = time.time()
            self.model.train()

            training_loss_dict = {task: 0.0 for task in self.tasks}

            overall_training_loss = 0.0

            for step, batch in enumerate(
                    progress_bar(self.train_dataloader, parent=pbar)):
                task_type, (x, y) = batch
                x = self._return_input_on_device(x, device)
                y = y.type(torch.LongTensor)
                y = y.to(device)
                num_rows = self._get_num_rows(x)

                output = self.model(x)

                current_loss = loss_function(output[task_type], y)

                scaled_loss = current_loss.item() * num_rows

                training_loss_dict[task_type] += scaled_loss

                overall_training_loss += scaled_loss

                optimizer.zero_grad()
                current_loss.backward()
                optimizer.step()
                if step_scheduler_on_batch:
                    scheduler.step()

            if not step_scheduler_on_batch:
                scheduler.step()

            overall_training_loss = overall_training_loss / self.train_dataloader.total_samples

            for task in self.tasks:
                training_loss_dict[task] = (
                    training_loss_dict[task] /
                    len(self.train_dataloader.loader_dict[task].dataset))

            overall_val_loss, val_loss_dict, accuracies = self.validate(
                loss_function, device, pbar)

            str_stats = []
            stats = [overall_training_loss, overall_val_loss]
            for stat in stats:
                str_stats.append('NA' if stat is None else str(stat)
                                 if isinstance(stat, int) else f'{stat:.6f}')

            for task in self.tasks:
                str_stats.append(f'{training_loss_dict[task]:.6f}')
                str_stats.append(f'{val_loss_dict[task]:.6f}')
                str_stats.append(f"{accuracies[task]['accuracy']:.6f}")

            str_stats.append(format_time(time.time() - start_time))

            pbar.write(str_stats, table=True)

            if best_model and overall_val_loss < current_best_loss:
                current_best_loss = overall_val_loss
                best_model_wts = copy.deepcopy(self.model.state_dict())
                best_model_epoch = epoch

        if best_model:
            self.model.load_state_dict(best_model_wts)
            print(
                f'Epoch {best_model_epoch} best model saved with loss of {current_best_loss}'
            )
Пример #22
0
    def fit(
        self,
        num_epochs,
        scheduler,
        step_scheduler_on_batch,
        optimizer,
        device='cuda:0',
        best_model=False,
        smooth_loss_alpha=0.2
    ):
        """
        Fit the PyTorch model

        Parameters
        ----------
        num_epochs: int
            number of epochs to train
        scheduler: torch.optim.lr_scheduler
            PyTorch learning rate scheduler
        step_scheduler_on_batch: bool
            flag of whether to step scheduler on batch (if True) or on epoch (if False)
        optimizer: torch.optim
            PyTorch optimzer
        device: str (defaults to 'cuda:0')
            device to run calculations on
        best_model: bool (defaults to `False`)
            flag to save best model from a single `fit` training run based on validation loss
            The default is `False`, which will keep the final model from the training run.
            `True` will keep the best model from the training run instead of the model
            from the final epoch of the training cycle.
        smooth_loss_alpha: float
            Training loss values displayed during fitting and at the end of each epoch are
            exponentially weighted moving averages over batches. Using an exponentially weighted
            average over batches is a compromise between reporting the value from the most recent
            batch, which is highly relevant but noisy, and reporting a simple average over batches,
            which is more stable but reflects the value of the loss at the beginning of the epoch
            as much as at the end. `smooth_loss_alpha` controls how much weight is given to the
            current batch. It must be in the `(0, 1]` interval. Higher values are more like
            reporting only the most recent batch, while lower values are more like giving all
            batches equal weight, so this value controls the tradeoff between stability and
            relevance.
        """
        if not 0 < smooth_loss_alpha <= 1:
            raise ValueError("`smooth_loss_alpha` must be in (0, 1]")

        self.model = self.model.to(device)

        current_best_loss = np.iinfo(np.intp).max

        pbar = master_bar(range(num_epochs))
        headers = ['train_loss', 'val_loss']
        for task in self.tasks:
            headers.append(f'{task}_train_loss')
            headers.append(f'{task}_val_loss')
            headers.append(f'{task}'+'_'+self.metric_function_dict[task].__name__)
        headers.append('time')
        pbar.write(headers, table=True)

        self.smooth_training_loss_dict = {}
        for epoch in pbar:
            start_time = time.time()
            self.model.train()

            overall_training_loss = 0.0

            subpbar = progress_bar(self.train_dataloader, parent=pbar)
            for _, batch in enumerate(subpbar):
                task_type, (x, y) = batch
                x = self._return_input_on_device(x, device)
                y = y.to(device)

                num_rows = self._get_num_rows(x)

                if num_rows == 1:
                    # skip batches of size 1
                    continue

                output = self.model(x)

                current_loss = self.loss_function_dict[task_type](output[task_type], y)

                optimizer.zero_grad()
                current_loss.backward()
                optimizer.step()
                if step_scheduler_on_batch:
                    scheduler.step()

                self._update_smooth_training_loss_dict(
                    task_type, current_loss.item(), smooth_loss_alpha
                )
                subpbar.comment = self._report_smooth_training_loss()

            overall_val_loss, val_loss_dict, metrics_scores = self.validate(
                device,
                pbar
            )

            if not step_scheduler_on_batch:
                if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
                    scheduler.step(overall_val_loss)
                else:
                    scheduler.step()

            overall_training_loss = self._calculate_overall_loss()

            stats = [overall_training_loss, overall_val_loss]
            str_stats = []
            for stat in stats:
                str_stats.append(
                    'NA' if stat is None else str(stat) if isinstance(stat, int) else f'{stat:.6f}'
                )

            for task in self.tasks:
                str_stats.append(f'{self.smooth_training_loss_dict[task]:.6f}')
                str_stats.append(f'{val_loss_dict[task]:.6f}')
                str_stats.append(
                    f"{metrics_scores[task][self.metric_function_dict[task].__name__]:.6f}"
                )

            str_stats.append(format_time(time.time() - start_time))

            pbar.write(str_stats, table=True)

            if best_model and overall_val_loss < current_best_loss:
                current_best_loss = overall_val_loss
                best_model_wts = copy.deepcopy(self.model.state_dict())
                best_model_epoch = epoch

        if best_model:
            self.model.load_state_dict(best_model_wts)
            print(f'Epoch {best_model_epoch} best model saved with loss of {current_best_loss}')
Пример #23
0
 def after_epoch(self):
     stats = [str(self.epoch)]
     for o in [self.train_stats, self.valid_stats]: stats += [f'{v:.6f}' for v in o.avg_stats]
     stats += [format_time(time.time() - self.start_time)]
     self.mbar.write(stats, table=True)