Пример #1
0
class ModelCheckpointMAE(Callback):
    """
    Save the best MAE model with target scaler

    Args:
        filepath (string): path to save the model file with format. For example
            `weights.{epoch:02d}-{val_mae:.6f}.hdf5` will save the corresponding epoch and
            val_mae in the filename
        monitor (string): quantity to monitor, default to "val_mae"
        verbose (int): 0 for no training log, 1 for only epoch-level log and 2 for batch-level log
        save_best_only (bool): whether to save only the best model
        save_weights_only (bool): whether to save the weights only excluding model structure
        val_gen (generator): validation generator
        steps_per_val (int): steps per epoch for validation generator
        target_scaler (object): exposing inverse_transform method to scale the output
        period (int): number of epoch interval for this callback
        mode: (string) choose from "min", "max" or "auto"
    """
    def __init__(
            self,
            filepath: str = './callback/val_mae_{epoch:05d}_{val_mae:.6f}.hdf5',
            monitor: str = 'val_mae',
            verbose: int = 0,
            save_best_only: bool = True,
            save_weights_only: bool = False,
            val_gen: Iterable = None,
            steps_per_val: int = None,
            target_scaler: Scaler = None,
            period: int = 1,
            mode: str = 'auto'):
        super().__init__()
        if val_gen is None:
            raise ValueError('No validation data is provided!')
        self.verbose = verbose
        if self.verbose > 0:
            logging.basicConfig(level=logging.INFO)
        self.filepath = filepath
        self.save_best_only = save_best_only
        self.save_weights_only = save_weights_only
        self.period = period
        self.epochs_since_last_save = 0
        self.val_gen = val_gen
        self.steps_per_val = steps_per_val
        self.target_scaler = target_scaler
        if self.target_scaler is None:
            self.target_scaler = DummyScaler()

        if monitor == 'val_mae':
            self.metric = mae
            self.monitor = 'val_mae'
        elif monitor == 'val_acc':
            self.metric = accuracy
            self.filepath = self.filepath.replace('val_mae', 'val_acc')
            self.monitor = 'val_acc'

        if mode == 'min':
            self.monitor_op = np.less
            self.best = np.Inf
        elif mode == 'max':
            self.monitor_op = np.greater
            self.best = -np.Inf
        else:
            if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
                self.monitor_op = np.greater
                self.best = -np.Inf
            else:
                self.monitor_op = np.less
                self.best = np.Inf

    def on_epoch_end(self, epoch: int, logs: Dict = None) -> None:
        """
        Codes called by the callback at the end of epoch
        Args:
            epoch (int): epoch id
            logs (dict): logs of training

        Returns:
            None
        """
        self.epochs_since_last_save += 1
        if self.epochs_since_last_save >= self.period:
            self.epochs_since_last_save = 0
            val_pred = []
            val_y = []
            for i in range(self.steps_per_val):
                val_data = self.val_gen[i]
                nb_atom = _count(np.array(val_data[0][-2]))
                stop_training = self.model.stop_training  # save stop_trainings state
                pred_ = self.model.predict(val_data[0])
                self.model.stop_training = stop_training
                val_pred.append(
                    self.target_scaler.inverse_transform(
                        pred_[0, :, :], nb_atom[:, None]))
                val_y.append(
                    self.target_scaler.inverse_transform(
                        val_data[1][0, :, :], nb_atom[:, None]))
            current = self.metric(np.concatenate(val_y, axis=0),
                                  np.concatenate(val_pred, axis=0))
            filepath = self.filepath.format(**{
                "epoch": epoch + 1,
                self.monitor: current
            })

            if self.save_best_only:
                if current is None:
                    warnings.warn(
                        'Can save best model only with %s available, '
                        'skipping.' % self.monitor, RuntimeWarning)
                else:
                    if self.monitor_op(current, self.best):
                        logger.info(
                            '\nEpoch %05d: %s improved from %0.5f to %0.5f,'
                            ' saving model to %s' %
                            (epoch + 1, self.monitor, self.best, current,
                             filepath))
                        self.best = current
                        if self.save_weights_only:
                            self.model.save_weights(filepath, overwrite=True)
                        else:
                            self.model.save(filepath, overwrite=True)
                    else:
                        if self.verbose > 0:
                            logger.info(
                                '\nEpoch %05d: %s did not improve from %0.5f' %
                                (epoch + 1, self.monitor, self.best))
            else:
                logger.info('\nEpoch %05d: saving model to %s' %
                            (epoch + 1, filepath))
                if self.save_weights_only:
                    self.model.save_weights(filepath, overwrite=True)
                else:
                    self.model.save(filepath, overwrite=True)
Пример #2
0
class GeneratorLog(Callback):
    """
    This callback logger.info out the MAE for train_generator and validation_generator every n_every steps.
    The default keras training log does not contain method to rescale the results, thus is not physically
    intuitive.

    Args:
        train_gen: (generator), yield (x, y) pairs for training
        steps_per_train: (int) number of generator steps per training epoch
        val_gen: (generator), yield (x, y) pairs for validation.
        steps_per_val: (int) number of generator steps per epoch for validation data
        y_scaler: (object) y_scaler.inverse_transform is used to convert the predicted values to its original scale
        n_every: (int) epoch interval for showing the log
        val_names: (list of string) variable names
        val_units: (list of string) variable units
        is_pa: (bool) whether it is a per-atom quantity
    """
    def __init__(self,
                 train_gen,
                 steps_per_train=None,
                 val_gen=None,
                 steps_per_val=None,
                 y_scaler=None,
                 n_every=5,
                 val_names=None,
                 val_units=None,
                 is_pa=False):
        super().__init__()
        self.train_gen = train_gen
        self.val_gen = val_gen
        self.steps_per_train = steps_per_train
        self.steps_per_val = steps_per_val
        self.yscaler = y_scaler
        self.epochs = []
        self.total_epoch = 0
        self.n_every = n_every
        self.val_names = val_names
        self.val_units = val_units
        self.is_pa = is_pa
        if self.yscaler is None:
            self.yscaler = DummyScaler()

    def on_epoch_end(self, epoch, logs=None):
        """
        Standard keras callback interface, executed at the end of epoch
        """
        self.total_epoch += 1
        if self.total_epoch % self.n_every == 0:
            train_pred = []
            train_y = []
            for i in range(self.steps_per_train):
                train_data = self.train_gen[i]
                nb_atom = _count(np.array(train_data[0][-2]))
                if not self.is_pa:
                    nb_atom = np.ones_like(nb_atom)
                pred_ = self.model.predict(train_data[0])
                train_pred.append(
                    self.yscaler.inverse_transform(pred_[0, :, :]) *
                    nb_atom[:, None])
                train_y.append(
                    self.yscaler.inverse_transform(train_data[1][0, :, :]) *
                    nb_atom[:, None])
            train_mae = np.mean(np.abs(
                np.concatenate(train_pred, axis=0) -
                np.concatenate(train_y, axis=0)),
                                axis=0)
            logger.info("Train MAE")
            _print_mae(self.val_names, train_mae, self.val_units)
            val_pred = []
            val_y = []
            for i in range(self.steps_per_val):
                val_data = self.val_gen[i]
                nb_atom = _count(np.array(val_data[0][-2]))
                if not self.is_pa:
                    nb_atom = np.ones_like(nb_atom)
                pred_ = self.model.predict(val_data[0])
                val_pred.append(
                    self.yscaler.inverse_transform(pred_[0, :, :]) *
                    nb_atom[:, None])
                val_y.append(
                    self.yscaler.inverse_transform(val_data[1][0, :, :]) *
                    nb_atom[:, None])
            val_mae = np.mean(np.abs(
                np.concatenate(val_pred, axis=0) -
                np.concatenate(val_y, axis=0)),
                              axis=0)
            logger.info("Test MAE")
            _print_mae(self.val_names, val_mae, self.val_units)
            self.model.history.history.setdefault("train_mae",
                                                  []).append(train_mae)
            self.model.history.history.setdefault("val_mae",
                                                  []).append(val_mae)