Exemplo n.º 1
0
def load_folder(folder=None):
    """
    To load astroNN model object from folder

    :param folder: [optional] you should provide folder name if outside folder, do not specific when you are inside the folder
    :type folder: str
    :return: astroNN Neural Network instance
    :rtype: astroNN.nn.NeuralNetMaster.NeuralNetMaster
    :History: 2017-Dec-29 - Written - Henry Leung (University of Toronto)
    """
    currentdir = os.getcwd()

    if folder is not None:
        fullfilepath = os.path.join(currentdir, folder)
    else:
        fullfilepath = currentdir

    astronn_model_obj = None

    if folder is not None and os.path.isfile(
            os.path.join(folder, 'astroNN_model_parameter.json')) is True:
        with open(os.path.join(folder, 'astroNN_model_parameter.json')) as f:
            parameter = json.load(f)
            f.close()
    elif os.path.isfile('astroNN_model_parameter.json') is True:
        with open('astroNN_model_parameter.json') as f:
            parameter = json.load(f)
            f.close()
    elif folder is not None and not os.path.exists(folder):
        raise IOError('Folder not exists: ' +
                      str(currentdir + os.sep + folder))
    else:
        raise FileNotFoundError(
            'Are you sure this is an astroNN generated folder?')

    identifier = parameter['id']
    unknown_model_message = f'Unknown model identifier -> {identifier}!'

    # need to point to the actual neural network if non-travial location
    if identifier == 'Galaxy10CNN':
        astronn_model_obj = Galaxy10CNN()
    else:
        # else try to import it from standard way
        try:
            astronn_model_obj = getattr(
                importlib.import_module(f"astroNN.models"), identifier)()
        except ImportError:
            # try to load custom model from CUSTOM_MODEL_PATH if none are working
            CUSTOM_MODEL_PATH = custom_model_path_reader()
            # try the current folder and see if there is any .py on top of CUSTOM_MODEL_PATH
            list_py_files = [
                os.path.join(fullfilepath, f) for f in os.listdir(fullfilepath)
                if f.endswith(".py")
            ]
            if CUSTOM_MODEL_PATH is None and list_py_files is None:
                print("\n")
                raise TypeError(unknown_model_message)
            else:
                for path_list in (
                        path_list
                        for path_list in [CUSTOM_MODEL_PATH, list_py_files]
                        if path_list is not None):
                    for path in path_list:
                        head, tail = os.path.split(path)
                        sys.path.insert(0, head)
                        try:
                            model = getattr(
                                importlib.import_module(tail.strip('.py')),
                                str(identifier))
                            astronn_model_obj = model()
                        except AttributeError:
                            pass

        if astronn_model_obj is None:
            print("\n")
            raise TypeError(unknown_model_message)

    astronn_model_obj.currentdir = currentdir
    astronn_model_obj.fullfilepath = fullfilepath
    astronn_model_obj.folder_name = folder if folder is not None else os.path.basename(
        os.path.normpath(currentdir))

    # Must have parameter
    astronn_model_obj._input_shape = parameter['input']
    astronn_model_obj._labels_shape = parameter['labels']
    astronn_model_obj.num_hidden = parameter['hidden']
    astronn_model_obj.input_norm_mode = parameter['input_norm_mode']
    astronn_model_obj.labels_norm_mode = parameter['labels_norm_mode']
    astronn_model_obj.input_mean = np.array(parameter['input_mean'])
    astronn_model_obj.labels_mean = np.array(parameter['labels_mean'])
    astronn_model_obj.input_std = np.array(parameter['input_std'])
    astronn_model_obj.labels_std = np.array(parameter['labels_std'])
    astronn_model_obj.batch_size = parameter['batch_size']
    astronn_model_obj.targetname = parameter['targetname']
    astronn_model_obj.val_size = parameter['valsize']

    # create normalizer and set correct mean and std
    astronn_model_obj.input_normalizer = Normalizer(
        mode=astronn_model_obj.input_norm_mode)
    astronn_model_obj.labels_normalizer = Normalizer(
        mode=astronn_model_obj.labels_norm_mode)
    astronn_model_obj.input_normalizer.mean_labels = astronn_model_obj.input_mean
    astronn_model_obj.input_normalizer.std_labels = astronn_model_obj.input_std
    astronn_model_obj.labels_normalizer.mean_labels = astronn_model_obj.labels_mean
    astronn_model_obj.labels_normalizer.std_labels = astronn_model_obj.labels_std

    # Conditional parameter depends on neural net architecture
    try:
        astronn_model_obj.num_filters = parameter['filternum']
    except KeyError:
        pass
    try:
        astronn_model_obj.filter_len = parameter['filterlen']
    except KeyError:
        pass
    try:
        pool_length = parameter['pool_length']
        if pool_length is not None:
            if isinstance(pool_length, int):  # multi-dimensional case
                astronn_model_obj.pool_length = parameter['pool_length']
            else:
                astronn_model_obj.pool_length = list(parameter['pool_length'])
    except KeyError or TypeError:
        pass
    try:
        # need to convert to int because of keras do not want array or list
        astronn_model_obj.latent_dim = int(parameter['latent'])
    except KeyError:
        pass
    try:
        astronn_model_obj.task = parameter['task']
    except KeyError:
        pass
    try:
        astronn_model_obj.dropout_rate = parameter['dropout_rate']
    except KeyError:
        pass
    try:
        # if inverse model precision exists, so does length_scale
        astronn_model_obj.inv_model_precision = parameter['inv_tau']
        astronn_model_obj.length_scale = parameter['length_scale']
    except KeyError:
        pass
    try:
        astronn_model_obj.l1 = parameter['l1']
    except KeyError:
        pass
    try:
        astronn_model_obj.l2 = parameter['l2']
    except KeyError:
        pass
    try:
        astronn_model_obj.maxnorm = parameter['maxnorm']
    except KeyError:
        pass
    try:
        astronn_model_obj._last_layer_activation = parameter[
            'last_layer_activation']
    except KeyError:
        pass
    try:
        astronn_model_obj.activation = parameter['activation']
    except KeyError:
        pass
    global _GRAPH_COUTNER
    global _GRAPH_STORAGE
    global _SESSION_STORAGE
    _GRAPH_COUTNER += 1
    _GRAPH_STORAGE.append(get_default_graph())

    # only 2 cases as thats all I can think of will happen
    if get_default_session() is not None:
        session = get_default_session()
    elif keras.backend.get_session() is not None:
        session = keras.backend.get_session()
    else:
        session = None
    _SESSION_STORAGE.append(session)

    with h5py.File(os.path.join(astronn_model_obj.fullfilepath,
                                'model_weights.h5'),
                   mode='r') as f:
        training_config = f.attrs.get('training_config')
        training_config = json.loads(training_config.decode('utf-8'))
        optimizer_config = training_config['optimizer_config']
        optimizer = optimizers.deserialize(optimizer_config)

        # Recover loss functions and metrics.
        losses_raw = convert_custom_objects(training_config['loss'])
        try:
            try:
                loss = [
                    losses_lookup(losses_raw[_loss]) for _loss in losses_raw
                ]
            except TypeError:
                loss = losses_lookup(losses_raw)
        except:
            pass

        metrics_raw = convert_custom_objects(training_config['metrics'])
        # its weird that keras needs -> metrics[metric][0] instead of metrics[metric] likes losses
        try:
            try:
                metrics = [
                    losses_lookup(metrics_raw[_metric][0])
                    for _metric in metrics_raw
                ]
            except TypeError:
                metrics = [losses_lookup(metrics_raw[0])]
        except:
            pass

        sample_weight_mode = training_config['sample_weight_mode']
        loss_weights = training_config['loss_weights']
        weighted_metrics = None

        # compile the model
        astronn_model_obj.compile(optimizer=optimizer,
                                  loss=loss,
                                  metrics=metrics,
                                  weighted_metrics=weighted_metrics,
                                  loss_weights=loss_weights,
                                  sample_weight_mode=sample_weight_mode)

        # set weights
        astronn_model_obj.keras_model.load_weights(
            os.path.join(astronn_model_obj.fullfilepath, 'model_weights.h5'))

        # Build train function (to get weight updates), need to consider Sequential model too
        astronn_model_obj.keras_model._make_train_function()
        optimizer_weights_group = f['optimizer_weights']
        optimizer_weight_names = [
            n.decode('utf8')
            for n in optimizer_weights_group.attrs['weight_names']
        ]
        optimizer_weight_values = [
            optimizer_weights_group[n] for n in optimizer_weight_names
        ]
        astronn_model_obj.keras_model.optimizer.set_weights(
            optimizer_weight_values)

    astronn_model_obj.graph = _GRAPH_STORAGE[
        _GRAPH_COUTNER - 1]  # the graph associated with the model
    astronn_model_obj.session = _SESSION_STORAGE[
        _GRAPH_COUTNER - 1]  # the model associated with the model

    print("========================================================")
    print(
        f"Loaded astroNN model, model type: {astronn_model_obj.name} -> {identifier}"
    )
    print("========================================================")
    return astronn_model_obj
Exemplo n.º 2
0
    def train_on_batch(self,
                       input_data,
                       labels,
                       inputs_err=None,
                       labels_err=None):
        """
        Train a Bayesian neural network by running a single gradient update on all of your data, suitable for fine-tuning

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :param inputs_err: Error for input_data (if any), same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :param labels_err: Labels error (if any)
        :type labels_err: Union([NoneType, ndarray])
        :return: None
        :rtype: NoneType
        :History:
            | 2018-Aug-25 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()
        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)

        if labels_err is None:
            labels_err = np.zeros_like(labels)

        # check if exists (exists mean fine-tuning, so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        # No need to care about Magic number as loss function looks for magic num in y_true only
        norm_input_err = inputs_err / self.input_std
        norm_labels_err = labels_err / self.labels_std

        start_time = time.time()

        fit_generator = BayesianCNNDataGenerator(
            batch_size=input_data.shape[0],
            shuffle=False,
            steps_per_epoch=1,
            data=[norm_data, norm_labels, norm_input_err, norm_labels_err])

        score = self.keras_model.fit_generator(
            fit_generator,
            epochs=1,
            verbose=self.verbose,
            workers=os.cpu_count(),
            use_multiprocessing=MULTIPROCESS_FLAG)

        print(
            f'Completed Training on Batch, {(time.time() - start_time):.{2}f}s in total'
        )

        return None
Exemplo n.º 3
0
    def evaluate(self, input_data, labels, inputs_err=None, labels_err=None):
        """
        Evaluate neural network by provided input data and labels and get back a metrics score

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :param inputs_err: Error for input_data (if any), same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :param labels_err: Labels error (if any)
        :type labels_err: Union([NoneType, ndarray])
        :return: metrics score dictionary
        :rtype: dict
        :History: 2018-May-20 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()
        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)

        if labels_err is None:
            labels_err = np.zeros_like(labels)

        # check if exists (exists mean fine-tuning, so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        # No need to care about Magic number as loss function looks for magic num in y_true only
        norm_input_err = inputs_err / self.input_std
        norm_labels_err = labels_err / self.labels_std

        eval_batchsize = self.batch_size if input_data.shape[
            0] > self.batch_size else input_data.shape[0]
        steps = input_data.shape[0] // self.batch_size if input_data.shape[
            0] > self.batch_size else 1

        start_time = time.time()
        print("Starting Evaluation")

        evaluate_generator = BayesianCNNDataGenerator(
            batch_size=eval_batchsize,
            shuffle=False,
            steps_per_epoch=steps,
            data=[norm_data, norm_labels, norm_input_err, norm_labels_err])

        scores = self.keras_model.evaluate_generator(evaluate_generator)
        if isinstance(scores, float):  # make sure scores is iterable
            scores = list(str(scores))
        outputname = self.keras_model.output_names
        funcname = []
        if isinstance(self.keras_model.metrics, dict):
            func_list = self.keras_model.metrics[outputname[0]]
        else:
            func_list = self.keras_model.metrics
        for func in func_list:
            if hasattr(func, __name__):
                funcname.append(func.__name__)
            else:
                funcname.append(func.__class__.__name__)
        # funcname = [func.__name__ for func in self.keras_model.metrics[outputname[0]]]
        loss_outputname = ['loss_' + name for name in outputname]
        output_funcname = [outputname[0] + '_' + name for name in funcname]
        list_names = ['loss', *loss_outputname, *output_funcname]

        print(
            f'Completed Evaluation, {(time.time() - start_time):.{2}f}s elapsed'
        )

        return {name: score for name, score in zip(list_names, scores)}
Exemplo n.º 4
0
class CNNBase(NeuralNetMaster, ABC):
    """Top-level class for a convolutional neural network"""
    def __init__(self):
        """
        NAME:
            __init__
        PURPOSE:
            To define astroNN convolutional neural network
        HISTORY:
            2018-Jan-06 - Written - Henry Leung (University of Toronto)
        """
        super().__init__()
        self.name = 'Convolutional Neural Network'
        self._model_type = 'CNN'
        self._model_identifier = None
        self.initializer = None
        self.activation = None
        self._last_layer_activation = None
        self.num_filters = None
        self.filter_len = None
        self.pool_length = None
        self.num_hidden = None
        self.reduce_lr_epsilon = None
        self.reduce_lr_min = None
        self.reduce_lr_patience = None
        self.l1 = None
        self.l2 = None
        self.maxnorm = None
        self.dropout_rate = 0.0
        self.val_size = 0.1
        self.early_stopping_min_delta = 0.0001
        self.early_stopping_patience = 4

        self.input_norm_mode = 1
        self.labels_norm_mode = 2

    def compile(self,
                optimizer=None,
                loss=None,
                metrics=None,
                weighted_metrics=None,
                loss_weights=None,
                sample_weight_mode=None):
        if optimizer is not None:
            self.optimizer = optimizer
        elif self.optimizer is None or self.optimizer == 'adam':
            self.optimizer = Adam(lr=self.lr,
                                  beta_1=self.beta_1,
                                  beta_2=self.beta_2,
                                  epsilon=self.optimizer_epsilon,
                                  decay=0.0)

        if self.task == 'regression':
            self._last_layer_activation = 'linear'
            loss_func = mean_squared_error if not loss else loss
            self.metrics = [mean_absolute_error, mean_error
                            ] if not (metrics, self.metrics) else metrics
        elif self.task == 'classification':
            self._last_layer_activation = 'softmax'
            loss_func = categorical_crossentropy if not loss else loss
            self.metrics = [categorical_accuracy
                            ] if not (metrics, self.metrics) else metrics
        elif self.task == 'binary_classification':
            self._last_layer_activation = 'sigmoid'
            loss_func = binary_crossentropy if not loss else loss
            self.metrics = [binary_accuracy(
                from_logits=False)] if not (metrics, self.metrics) else metrics
        else:
            raise RuntimeError(
                'Only "regression", "classification" and "binary_classification" are supported'
            )

        self.keras_model = self.model()

        self.keras_model.compile(loss=loss_func,
                                 optimizer=self.optimizer,
                                 metrics=self.metrics,
                                 weighted_metrics=weighted_metrics,
                                 loss_weights=loss_weights,
                                 sample_weight_mode=sample_weight_mode)

        return None

    def pre_training_checklist_child(self, input_data, labels):
        self.pre_training_checklist_master(input_data, labels)

        # check if exists (exists mean fine-tuning, so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        if self.keras_model is None:  # only compiler if there is no keras_model, e.g. fine-tuning does not required
            self.compile()

        self.train_idx, self.val_idx = train_test_split(
            np.arange(self.num_train + self.val_num), test_size=self.val_size)

        self.training_generator = CNNDataGenerator(
            batch_size=self.batch_size,
            shuffle=True,
            steps_per_epoch=self.num_train // self.batch_size,
            data=[norm_data[self.train_idx], norm_labels[self.train_idx]],
            manual_reset=False)

        val_batchsize = self.batch_size if len(
            self.val_idx) > self.batch_size else len(self.val_idx)
        self.validation_generator = CNNDataGenerator(
            batch_size=val_batchsize,
            shuffle=False,
            steps_per_epoch=max(self.val_num // self.batch_size, 1),
            data=[norm_data[self.val_idx], norm_labels[self.val_idx]],
            manual_reset=True)

        return input_data, labels

    def train(self, input_data, labels):
        """
        Train a Convolutional neural network

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :return: None
        :rtype: NoneType
        :History: 2017-Dec-06 - Written - Henry Leung (University of Toronto)
        """
        # Call the checklist to create astroNN folder and save parameters
        self.pre_training_checklist_child(input_data, labels)

        reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                      factor=0.5,
                                      min_delta=self.reduce_lr_epsilon,
                                      patience=self.reduce_lr_patience,
                                      min_lr=self.reduce_lr_min,
                                      mode='min',
                                      verbose=2)

        early_stopping = EarlyStopping(monitor='val_loss',
                                       min_delta=self.early_stopping_min_delta,
                                       patience=self.early_stopping_patience,
                                       verbose=2,
                                       mode='min')

        self.virtual_cvslogger = VirutalCSVLogger()

        self.__callbacks = [reduce_lr, self.virtual_cvslogger
                            ]  # default must have unchangeable callbacks

        if self.callbacks is not None:
            if isinstance(self.callbacks, list):
                self.__callbacks.extend(self.callbacks)
            else:
                self.__callbacks.append(self.callbacks)

        start_time = time.time()

        self.history = self.keras_model.fit_generator(
            generator=self.training_generator,
            validation_data=self.validation_generator,
            epochs=self.max_epochs,
            verbose=self.verbose,
            workers=os.cpu_count(),
            callbacks=self.__callbacks,
            use_multiprocessing=MULTIPROCESS_FLAG)

        print(
            f'Completed Training, {(time.time() - start_time):.{2}f}s in total'
        )

        if self.autosave is True:
            # Call the post training checklist to save parameters
            self.save()

        return None

    def train_on_batch(self, input_data, labels):
        """
        Train a neural network by running a single gradient update on all of your data, suitable for fine-tuning

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :return: None
        :rtype: NoneType
        :History: 2018-Aug-22 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()
        # check if exists (exists mean fine-tuning, so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        start_time = time.time()

        fit_generator = CNNDataGenerator(batch_size=input_data.shape[0],
                                         shuffle=False,
                                         steps_per_epoch=1,
                                         data=[norm_data, norm_labels])

        scores = self.keras_model.fit_generator(
            generator=fit_generator,
            epochs=1,
            verbose=self.verbose,
            workers=os.cpu_count(),
            use_multiprocessing=MULTIPROCESS_FLAG)

        print(
            f'Completed Training on Batch, {(time.time() - start_time):.{2}f}s in total'
        )

        return None

    def post_training_checklist_child(self):
        self.keras_model.save(self.fullfilepath + _astroNN_MODEL_NAME)
        print(_astroNN_MODEL_NAME +
              f' saved to {(self.fullfilepath + _astroNN_MODEL_NAME)}')

        self.hyper_txt.write(f"Dropout Rate: {self.dropout_rate} \n")
        self.hyper_txt.flush()
        self.hyper_txt.close()

        data = {
            'id':
            self.__class__.__name__
            if self._model_identifier is None else self._model_identifier,
            'pool_length':
            self.pool_length,
            'filterlen':
            self.filter_len,
            'filternum':
            self.num_filters,
            'hidden':
            self.num_hidden,
            'input':
            self._input_shape,
            'labels':
            self._labels_shape,
            'task':
            self.task,
            'last_layer_activation':
            self._last_layer_activation,
            'activation':
            self.activation,
            'input_mean':
            self.input_mean.tolist(),
            'labels_mean':
            self.labels_mean.tolist(),
            'input_std':
            self.input_std.tolist(),
            'labels_std':
            self.labels_std.tolist(),
            'valsize':
            self.val_size,
            'targetname':
            self.targetname,
            'dropout_rate':
            self.dropout_rate,
            'l1':
            self.l1,
            'l2':
            self.l2,
            'maxnorm':
            self.maxnorm,
            'input_norm_mode':
            self.input_norm_mode,
            'labels_norm_mode':
            self.labels_norm_mode,
            'batch_size':
            self.batch_size
        }

        with open(self.fullfilepath + '/astroNN_model_parameter.json',
                  'w') as f:
            json.dump(data, f, indent=4, sort_keys=True)

    def test(self, input_data):
        """
        Use the neural network to do inference

        :param input_data: Data to be inferred with neural network
        :type input_data: ndarray
        :return: prediction and prediction uncertainty
        :rtype: ndarry
        :History: 2017-Dec-06 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()
        self.pre_testing_checklist_master()

        input_data = np.atleast_2d(input_data)

        if self.input_normalizer is not None:
            input_array = self.input_normalizer.normalize(input_data,
                                                          calc=False)
        else:
            # Prevent shallow copy issue
            input_array = np.array(input_data)
            input_array -= self.input_mean
            input_array /= self.input_std

        total_test_num = input_data.shape[0]  # Number of testing data

        # for number of training data smaller than batch_size
        if input_data.shape[0] < self.batch_size:
            self.batch_size = input_data.shape[0]

        # Due to the nature of how generator works, no overlapped prediction
        data_gen_shape = (total_test_num // self.batch_size) * self.batch_size
        remainder_shape = total_test_num - data_gen_shape  # Remainder from generator

        predictions = np.zeros((total_test_num, self._labels_shape))

        start_time = time.time()
        print("Starting Inference")

        # Data Generator for prediction
        prediction_generator = CNNPredDataGenerator(
            batch_size=self.batch_size,
            shuffle=False,
            steps_per_epoch=input_array.shape[0] // self.batch_size,
            data=[input_array[:data_gen_shape]])
        predictions[:data_gen_shape] = np.asarray(
            self.keras_model.predict_generator(prediction_generator))

        if remainder_shape != 0:
            remainder_data = input_array[data_gen_shape:]
            # assume its caused by mono images, so need to expand dim by 1
            if len(input_array[0].shape) != len(self._input_shape):
                remainder_data = np.expand_dims(remainder_data, axis=-1)
            result = self.keras_model.predict(remainder_data)
            predictions[data_gen_shape:] = result.reshape(
                (remainder_shape, self._labels_shape))

        if self.labels_normalizer is not None:
            predictions = self.labels_normalizer.denormalize(predictions)
        else:
            predictions *= self.labels_std
            predictions += self.labels_mean

        print(
            f'Completed Inference, {(time.time() - start_time):.{2}f}s elapsed'
        )

        return predictions

    def evaluate(self, input_data, labels):
        """
        Evaluate neural network by provided input data and labels and get back a metrics score

        :param input_data: Data to be inferred with neural network
        :type input_data: ndarray
        :param labels: labels
        :type labels: ndarray
        :return: metrics score dictionary
        :rtype: dict
        :History: 2018-May-20 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()
        # check if exists (exists mean fine-tuning, so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        eval_batchsize = self.batch_size if input_data.shape[
            0] > self.batch_size else input_data.shape[0]
        steps = input_data.shape[0] // self.batch_size if input_data.shape[
            0] > self.batch_size else 1

        start_time = time.time()
        print("Starting Evaluation")

        evaluate_generator = CNNDataGenerator(batch_size=eval_batchsize,
                                              shuffle=False,
                                              steps_per_epoch=steps,
                                              data=[norm_data, norm_labels])

        scores = self.keras_model.evaluate_generator(evaluate_generator)
        if isinstance(scores, float):  # make sure scores is iterable
            scores = list(str(scores))
        outputname = self.keras_model.output_names
        funcname = []
        if isinstance(self.keras_model.metrics, dict):
            func_list = self.keras_model.metrics[outputname[0]]
        else:
            func_list = self.keras_model.metrics
        for func in func_list:
            if hasattr(func, __name__):
                funcname.append(func.__name__)
            else:
                funcname.append(func.__class__.__name__)
        # funcname = [func.__name__ for func in self.keras_model.metrics]
        output_funcname = [outputname[0] + '_' + name for name in funcname]
        list_names = ['loss', *output_funcname]

        print(
            f'Completed Evaluation, {(time.time() - start_time):.{2}f}s elapsed'
        )

        return {name: score for name, score in zip(list_names, scores)}
Exemplo n.º 5
0
class BayesianCNNBase(NeuralNetMaster, ABC):
    """
    Top-level class for a Bayesian convolutional neural network

    :History: 2018-Jan-06 - Written - Henry Leung (University of Toronto)
    """
    def __init__(self):
        super().__init__()
        self.name = 'Bayesian Convolutional Neural Network'
        self._model_type = 'BCNN'
        self.initializer = None
        self.activation = None
        self._last_layer_activation = None
        self.num_filters = None
        self.filter_len = None
        self.pool_length = None
        self.num_hidden = None
        self.reduce_lr_epsilon = None
        self.reduce_lr_min = None
        self.reduce_lr_patience = None
        self.l1 = None
        self.l2 = None
        self.maxnorm = None
        self.inv_model_precision = None  # inverse model precision
        self.dropout_rate = 0.2
        self.length_scale = 3  # prior length scale
        self.mc_num = 100  # increased to 100 due to high performance VI on GPU implemented on 14 April 2018 (Henry)
        self.val_size = 0.1
        self.disable_dropout = False

        self.input_norm_mode = 1
        self.labels_norm_mode = 2

        self.keras_model_predict = None

    def pre_training_checklist_child(self, input_data, labels, input_err,
                                     labels_err):
        self.pre_training_checklist_master(input_data, labels)

        if isinstance(input_data, H5Loader):
            self.targetname = input_data.target
            input_data, labels = input_data.load()

        # check if exists (exists mean fine-tuning, so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        # No need to care about Magic number as loss function looks for magic num in y_true only
        norm_input_err = input_err / self.input_std
        norm_labels_err = labels_err / self.labels_std

        if self.keras_model is None:  # only compiler if there is no keras_model, e.g. fine-tuning does not required
            self.compile()

        self.train_idx, self.val_idx = train_test_split(
            np.arange(self.num_train + self.val_num), test_size=self.val_size)

        self.inv_model_precision = (2 * self.num_train *
                                    self.l2) / (self.length_scale**2 *
                                                (1 - self.dropout_rate))

        self.training_generator = BayesianCNNDataGenerator(
            batch_size=self.batch_size,
            shuffle=True,
            steps_per_epoch=self.num_train // self.batch_size,
            data=[
                norm_data[self.train_idx], norm_labels[self.train_idx],
                norm_input_err[self.train_idx], norm_labels_err[self.train_idx]
            ],
            manual_reset=False)

        val_batchsize = self.batch_size if len(
            self.val_idx) > self.batch_size else len(self.val_idx)
        self.validation_generator = BayesianCNNDataGenerator(
            batch_size=val_batchsize,
            shuffle=False,
            steps_per_epoch=max(self.val_num // self.batch_size, 1),
            data=[
                norm_data[self.val_idx], norm_labels[self.val_idx],
                norm_input_err[self.val_idx], norm_labels_err[self.val_idx]
            ],
            manual_reset=True)

        return norm_data, norm_labels, norm_input_err, norm_labels_err

    def compile(self,
                optimizer=None,
                loss=None,
                metrics=None,
                weighted_metrics=None,
                loss_weights=None,
                sample_weight_mode=None):
        if optimizer is not None:
            self.optimizer = optimizer
        elif self.optimizer is None or self.optimizer == 'adam':
            self.optimizer = Adam(lr=self.lr,
                                  beta_1=self.beta_1,
                                  beta_2=self.beta_2,
                                  epsilon=self.optimizer_epsilon,
                                  decay=0.0)
        if metrics is not None:
            self.metrics = metrics
        if self.task == 'regression':
            if self._last_layer_activation is None:
                self._last_layer_activation = 'linear'
        elif self.task == 'classification':
            if self._last_layer_activation is None:
                self._last_layer_activation = 'softmax'
        elif self.task == 'binary_classification':
            if self._last_layer_activation is None:
                self._last_layer_activation = 'sigmoid'
        else:
            raise RuntimeError(
                'Only "regression", "classification" and "binary_classification" are supported'
            )

        self.keras_model, self.keras_model_predict, output_loss, variance_loss = self.model(
        )

        if self.task == 'regression':
            self.metrics = [mean_absolute_error, mean_error
                            ] if not self.metrics else self.metrics
            self.keras_model.compile(loss={
                'output': output_loss,
                'variance_output': variance_loss
            },
                                     optimizer=self.optimizer,
                                     metrics={'output': self.metrics},
                                     weighted_metrics=weighted_metrics,
                                     loss_weights={
                                         'output': .5,
                                         'variance_output': .5
                                     } if not loss_weights else loss_weights,
                                     sample_weight_mode=sample_weight_mode)
        elif self.task == 'classification':
            self.metrics = [categorical_accuracy
                            ] if not self.metrics else self.metrics
            self.keras_model.compile(loss={
                'output': output_loss,
                'variance_output': variance_loss
            },
                                     optimizer=self.optimizer,
                                     metrics={'output': self.metrics},
                                     weighted_metrics=weighted_metrics,
                                     loss_weights={
                                         'output': .5,
                                         'variance_output': .5
                                     } if not loss_weights else loss_weights,
                                     sample_weight_mode=sample_weight_mode)
        elif self.task == 'binary_classification':
            self.metrics = [binary_accuracy
                            ] if not self.metrics else self.metrics
            self.keras_model.compile(loss={
                'output': output_loss,
                'variance_output': variance_loss
            },
                                     optimizer=self.optimizer,
                                     metrics={'output': self.metrics},
                                     weighted_metrics=weighted_metrics,
                                     loss_weights={
                                         'output': .5,
                                         'variance_output': .5
                                     } if not loss_weights else loss_weights,
                                     sample_weight_mode=sample_weight_mode)
        return None

    def train(self, input_data, labels, inputs_err=None, labels_err=None):
        """
        Train a Bayesian neural network

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :param inputs_err: Error for input_data (if any), same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :param labels_err: Labels error (if any)
        :type labels_err: Union([NoneType, ndarray])
        :return: None
        :rtype: NoneType
        :History:
            | 2018-Jan-06 - Written - Henry Leung (University of Toronto)
            | 2018-Apr-12 - Updated - Henry Leung (University of Toronto)
        """
        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)

        if labels_err is None:
            labels_err = np.zeros_like(labels)

        # Call the checklist to create astroNN folder and save parameters
        self.pre_training_checklist_child(input_data, labels, inputs_err,
                                          labels_err)

        reduce_lr = ReduceLROnPlateau(monitor='val_output_loss',
                                      factor=0.5,
                                      min_delta=self.reduce_lr_epsilon,
                                      patience=self.reduce_lr_patience,
                                      min_lr=self.reduce_lr_min,
                                      mode='min',
                                      verbose=2)

        self.virtual_cvslogger = VirutalCSVLogger()

        self.__callbacks = [reduce_lr, self.virtual_cvslogger
                            ]  # default must have unchangeable callbacks

        if self.callbacks is not None:
            if isinstance(self.callbacks, list):
                self.__callbacks.extend(self.callbacks)
            else:
                self.__callbacks.append(self.callbacks)

        start_time = time.time()

        self.history = self.keras_model.fit_generator(
            generator=self.training_generator,
            validation_data=self.validation_generator,
            epochs=self.max_epochs,
            verbose=self.verbose,
            workers=os.cpu_count(),
            callbacks=self.__callbacks,
            use_multiprocessing=MULTIPROCESS_FLAG)

        print(
            f'Completed Training, {(time.time() - start_time):.{2}f}s in total'
        )

        if self.autosave is True:
            # Call the post training checklist to save parameters
            self.save()

        return None

    def train_on_batch(self,
                       input_data,
                       labels,
                       inputs_err=None,
                       labels_err=None):
        """
        Train a Bayesian neural network by running a single gradient update on all of your data, suitable for fine-tuning

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :param inputs_err: Error for input_data (if any), same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :param labels_err: Labels error (if any)
        :type labels_err: Union([NoneType, ndarray])
        :return: None
        :rtype: NoneType
        :History:
            | 2018-Aug-25 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()
        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)

        if labels_err is None:
            labels_err = np.zeros_like(labels)

        # check if exists (exists mean fine-tuning, so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        # No need to care about Magic number as loss function looks for magic num in y_true only
        norm_input_err = inputs_err / self.input_std
        norm_labels_err = labels_err / self.labels_std

        start_time = time.time()

        fit_generator = BayesianCNNDataGenerator(
            batch_size=input_data.shape[0],
            shuffle=False,
            steps_per_epoch=1,
            data=[norm_data, norm_labels, norm_input_err, norm_labels_err])

        score = self.keras_model.fit_generator(
            fit_generator,
            epochs=1,
            verbose=self.verbose,
            workers=os.cpu_count(),
            use_multiprocessing=MULTIPROCESS_FLAG)

        print(
            f'Completed Training on Batch, {(time.time() - start_time):.{2}f}s in total'
        )

        return None

    def post_training_checklist_child(self):
        self.keras_model.save(self.fullfilepath + _astroNN_MODEL_NAME)
        print(_astroNN_MODEL_NAME +
              f' saved to {(self.fullfilepath + _astroNN_MODEL_NAME)}')

        self.hyper_txt.write(f"Dropout Rate: {self.dropout_rate} \n")
        self.hyper_txt.flush()
        self.hyper_txt.close()

        data = {
            'id':
            self.__class__.__name__
            if self._model_identifier is None else self._model_identifier,
            'pool_length':
            self.pool_length,
            'filterlen':
            self.filter_len,
            'filternum':
            self.num_filters,
            'hidden':
            self.num_hidden,
            'input':
            self._input_shape,
            'labels':
            self._labels_shape,
            'task':
            self.task,
            'last_layer_activation':
            self._last_layer_activation,
            'activation':
            self.activation,
            'input_mean':
            self.input_mean.tolist(),
            'inv_tau':
            self.inv_model_precision,
            'length_scale':
            self.length_scale,
            'labels_mean':
            self.labels_mean.tolist(),
            'input_std':
            self.input_std.tolist(),
            'labels_std':
            self.labels_std.tolist(),
            'valsize':
            self.val_size,
            'targetname':
            self.targetname,
            'dropout_rate':
            self.dropout_rate,
            'l1':
            self.l1,
            'l2':
            self.l2,
            'maxnorm':
            self.maxnorm,
            'input_norm_mode':
            self.input_norm_mode,
            'labels_norm_mode':
            self.labels_norm_mode,
            'batch_size':
            self.batch_size
        }

        with open(self.fullfilepath + '/astroNN_model_parameter.json',
                  'w') as f:
            json.dump(data, f, indent=4, sort_keys=True)

    def test(self, input_data, inputs_err=None):
        """
        Test model, High performance version designed for fast variational inference on GPU

        :param input_data: Data to be inferred with neural network
        :type input_data: ndarray
        :param inputs_err: Error for input_data, same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :return: prediction and prediction uncertainty
        :History:
            | 2018-Jan-06 - Written - Henry Leung (University of Toronto)
            | 2018-Apr-12 - Updated - Henry Leung (University of Toronto)
        """
        self.has_model_check()
        if gpu_availability() is False and self.mc_num > 25:
            warnings.warn(
                f'You are using CPU version Tensorflow, doing {self.mc_num} times Monte Carlo Inference can '
                f'potentially be very slow! \n '
                f'A possible fix is to decrease the mc_num parameter of the model to do less MC Inference \n'
                f'This is just a warning, and will not shown if mc_num < 25 on CPU'
            )
        if self.mc_num < 2:
            raise AttributeError("mc_num cannot be smaller than 2")
        self.pre_testing_checklist_master()

        input_data = np.atleast_2d(input_data)

        if self.input_normalizer is not None:
            input_array = self.input_normalizer.normalize(input_data,
                                                          calc=False)
        else:
            # Prevent shallow copy issue
            input_array = np.array(input_data)
            input_array -= self.input_mean
            input_array /= self.input_std

        # if no error array then just zeros
        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)
        else:
            inputs_err = np.atleast_2d(inputs_err)
            inputs_err /= self.input_std

        total_test_num = input_data.shape[0]  # Number of testing data

        # for number of training data smaller than batch_size
        if total_test_num < self.batch_size:
            batch_size = total_test_num
        else:
            batch_size = self.batch_size

        # Due to the nature of how generator works, no overlapped prediction
        data_gen_shape = (total_test_num // batch_size) * batch_size
        remainder_shape = total_test_num - data_gen_shape  # Remainder from generator

        start_time = time.time()
        print("Starting Dropout Variational Inference")

        # Data Generator for prediction
        prediction_generator = BayesianCNNPredDataGenerator(
            batch_size=batch_size,
            shuffle=False,
            steps_per_epoch=data_gen_shape // batch_size,
            data=[input_array[:data_gen_shape], inputs_err[:data_gen_shape]])

        new = FastMCInference(self.mc_num)(self.keras_model_predict)

        result = np.asarray(new.predict_generator(prediction_generator))

        if remainder_shape != 0:  # deal with remainder
            remainder_generator = BayesianCNNPredDataGenerator(
                batch_size=remainder_shape,
                shuffle=False,
                steps_per_epoch=1,
                data=[
                    input_array[data_gen_shape:], inputs_err[data_gen_shape:]
                ])
            remainder_result = np.asarray(
                new.predict_generator(remainder_generator))
            if remainder_shape == 1:
                remainder_result = np.expand_dims(remainder_result, axis=0)
            result = np.concatenate((result, remainder_result))

        # in case only 1 test data point, in such case we need to add a dimension
        if result.ndim < 3 and batch_size == 1:
            result = np.expand_dims(result, axis=0)

        half_first_dim = result.shape[
            1] // 2  # result.shape[1] is guarantee an even number, otherwise sth is wrong

        predictions = result[:, :half_first_dim, 0]  # mean prediction
        mc_dropout_uncertainty = result[:, :half_first_dim, 1] * (
            self.labels_std**2)  # model uncertainty
        predictions_var = np.exp(result[:, half_first_dim:, 0]) * (
            self.labels_std**2)  # predictive uncertainty

        print(
            f'Completed Dropout Variational Inference with {self.mc_num} forward passes, '
            f'{(time.time() - start_time):.{2}f}s elapsed')

        if self.labels_normalizer is not None:
            predictions = self.labels_normalizer.denormalize(predictions)
        else:
            predictions *= self.labels_std
            predictions += self.labels_mean

        if self.task == 'regression':
            # Predictive variance
            pred_var = predictions_var + mc_dropout_uncertainty  # epistemic plus aleatoric uncertainty
            pred_uncertainty = np.sqrt(pred_var)  # Convert back to std error

            # final correction from variance to standard derivation
            mc_dropout_uncertainty = np.sqrt(mc_dropout_uncertainty)
            predictive_uncertainty = np.sqrt(predictions_var)

        elif self.task == 'classification':
            # we want entropy for classification uncertainty
            predicted_class = np.argmax(predictions, axis=1)
            mc_dropout_uncertainty = np.ones_like(predicted_class, dtype=float)
            predictive_uncertainty = np.ones_like(predicted_class, dtype=float)

            # center variance
            predictions_var -= 1.
            for i in range(predicted_class.shape[0]):
                all_prediction = np.array(predictions[i, :])
                mc_dropout_uncertainty[i] = -np.sum(
                    all_prediction * np.log(all_prediction))
                predictive_uncertainty[i] = predictions_var[i,
                                                            predicted_class[i]]

            pred_uncertainty = mc_dropout_uncertainty + predictive_uncertainty
            # We only want the predicted class back
            predictions = predicted_class

        elif self.task == 'binary_classification':
            # we want entropy for classification uncertainty, so need prediction in logits space
            mc_dropout_uncertainty = -np.sum(predictions * np.log(predictions),
                                             axis=0)
            # need to activate before round to int so that the prediction is always 0 or 1
            predictions = np.rint(sigmoid(predictions))
            predictive_uncertainty = predictions_var
            pred_uncertainty = mc_dropout_uncertainty + predictions_var

        else:
            raise AttributeError('Unknown Task')

        return predictions, {
            'total': pred_uncertainty,
            'model': mc_dropout_uncertainty,
            'predictive': predictive_uncertainty
        }

    @deprecated
    def test_old(self, input_data, inputs_err=None):
        """
        Tests model, it is recommended to use the new test() instead of this deprecated method

        :param input_data: Data to be inferred with neural network
        :type input_data: ndarray
        :param inputs_err: Error for input_data, same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :return: prediction and prediction uncertainty
        :History: 2018-Jan-06 - Written - Henry Leung (University of Toronto)
        """
        self.pre_testing_checklist_master()

        if self.input_normalizer is not None:
            input_array = self.input_normalizer.normalize(input_data,
                                                          calc=False)
        else:
            # Prevent shallow copy issue
            input_array = np.array(input_data)
            input_array -= self.input_mean
            input_array /= self.input_std

        # if no error array then just zeros
        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)
        else:
            inputs_err /= self.input_std

        total_test_num = input_data.shape[0]  # Number of testing data

        # for number of training data smaller than batch_size
        if total_test_num < self.batch_size:
            self.batch_size = total_test_num

        predictions = np.zeros(
            (self.mc_num, total_test_num, self._labels_shape))
        predictions_var = np.zeros(
            (self.mc_num, total_test_num, self._labels_shape))

        # Due to the nature of how generator works, no overlapped prediction
        data_gen_shape = (total_test_num // self.batch_size) * self.batch_size
        remainder_shape = total_test_num - data_gen_shape  # Remainder from generator

        start_time = time.time()
        print("Starting Dropout Variational Inference")
        for i in range(self.mc_num):
            if i % 5 == 0:
                print(
                    f'Completed {i} of {self.mc_num} Monte Carlo Dropout, {(time.time() - start_time):.{2}f}s '
                    f'elapsed')

            # Data Generator for prediction
            prediction_generator = BayesianCNNPredDataGenerator(
                batch_size=self.batch_size,
                shuffle=False,
                steps_per_epoch=data_gen_shape // self.batch_size,
                data=[
                    input_array[:data_gen_shape], inputs_err[:data_gen_shape]
                ])

            result = np.asarray(
                self.keras_model_predict.predict_generator(
                    prediction_generator))

            if result.ndim < 2:  # in case only 1 test data point, in such case we need to add a dimension
                result = np.expand_dims(result, axis=0)

            half_first_dim = result.shape[
                1] // 2  # result.shape[1] is guarantee an even number, otherwise sth is wrong

            predictions[
                i, :data_gen_shape] = result[:, :half_first_dim].reshape(
                    (data_gen_shape, self._labels_shape))
            predictions_var[
                i, :data_gen_shape] = result[:, half_first_dim:].reshape(
                    (data_gen_shape, self._labels_shape))

            if remainder_shape != 0:
                remainder_data = input_array[data_gen_shape:]
                remainder_data_err = inputs_err[data_gen_shape:]
                # assume its caused by mono images, so need to expand dim by 1
                if len(input_array[0].shape) != len(self._input_shape):
                    remainder_data = np.expand_dims(remainder_data, axis=-1)
                    remainder_data_err = np.expand_dims(remainder_data_err,
                                                        axis=-1)
                result = self.keras_model_predict.predict({
                    'input':
                    remainder_data,
                    'input_err':
                    remainder_data_err
                })
                predictions[
                    i, data_gen_shape:] = result[:, :half_first_dim].reshape(
                        (remainder_shape, self._labels_shape))
                predictions_var[
                    i, data_gen_shape:] = result[:, half_first_dim:].reshape(
                        (remainder_shape, self._labels_shape))

        print(
            f'Completed Dropout Variational Inference, {(time.time() - start_time):.{2}f}s in total'
        )

        if self.labels_normalizer is not None:
            predictions = self.labels_normalizer.denormalize(predictions)
        else:
            predictions *= self.labels_std
            predictions += self.labels_mean

        pred = np.mean(predictions, axis=0)

        if self.task == 'regression':
            # Predictive variance
            mc_dropout_uncertainty = np.var(predictions, axis=0)  # var
            predictive_uncertainty = np.mean(np.exp(predictions_var) *
                                             (np.array(self.labels_std)**2),
                                             axis=0)
            pred_var = predictive_uncertainty + mc_dropout_uncertainty  # epistemic plus aleatoric uncertainty
            pred_uncertainty = np.sqrt(pred_var)  # Convert back to std error

            # final correction from variance to standard derivation
            mc_dropout_uncertainty = np.sqrt(mc_dropout_uncertainty)
            predictive_uncertainty = np.sqrt(predictive_uncertainty)

        elif self.task == 'classification':
            # we want entropy for classification uncertainty
            pred_shape = pred.shape[0]
            pred = np.argmax(pred, axis=1)
            predictions_var = np.mean(predictions_var, axis=0)
            mc_dropout_uncertainty = np.ones_like(pred, dtype=float)
            predictive_uncertainty = np.ones_like(pred, dtype=float)
            for i in range(pred_shape):
                all_prediction = np.array(predictions[:, i, pred[i]])
                mc_dropout_uncertainty[i] = -np.sum(
                    all_prediction * np.log(all_prediction))
                predictive_uncertainty[i] = np.array(predictions_var[i,
                                                                     pred[i]])

            pred_uncertainty = mc_dropout_uncertainty + predictive_uncertainty

        elif self.task == 'binary_classification':
            # we want entropy for classification uncertainty
            mc_dropout_uncertainty = -np.sum(
                pred * np.log(pred),
                axis=0)  # need to use raw prediction for uncertainty
            pred = np.rint(pred)
            predictive_uncertainty = np.mean(predictions_var, axis=0)
            pred_uncertainty = mc_dropout_uncertainty + predictive_uncertainty
        else:
            raise AttributeError('Unknown Task')

        return pred, {
            'total': pred_uncertainty,
            'model': mc_dropout_uncertainty,
            'predictive': predictive_uncertainty
        }

    def evaluate(self, input_data, labels, inputs_err=None, labels_err=None):
        """
        Evaluate neural network by provided input data and labels and get back a metrics score

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :param inputs_err: Error for input_data (if any), same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :param labels_err: Labels error (if any)
        :type labels_err: Union([NoneType, ndarray])
        :return: metrics score dictionary
        :rtype: dict
        :History: 2018-May-20 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()
        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)

        if labels_err is None:
            labels_err = np.zeros_like(labels)

        # check if exists (exists mean fine-tuning, so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        # No need to care about Magic number as loss function looks for magic num in y_true only
        norm_input_err = inputs_err / self.input_std
        norm_labels_err = labels_err / self.labels_std

        eval_batchsize = self.batch_size if input_data.shape[
            0] > self.batch_size else input_data.shape[0]
        steps = input_data.shape[0] // self.batch_size if input_data.shape[
            0] > self.batch_size else 1

        start_time = time.time()
        print("Starting Evaluation")

        evaluate_generator = BayesianCNNDataGenerator(
            batch_size=eval_batchsize,
            shuffle=False,
            steps_per_epoch=steps,
            data=[norm_data, norm_labels, norm_input_err, norm_labels_err])

        scores = self.keras_model.evaluate_generator(evaluate_generator)
        if isinstance(scores, float):  # make sure scores is iterable
            scores = list(str(scores))
        outputname = self.keras_model.output_names
        funcname = []
        if isinstance(self.keras_model.metrics, dict):
            func_list = self.keras_model.metrics[outputname[0]]
        else:
            func_list = self.keras_model.metrics
        for func in func_list:
            if hasattr(func, __name__):
                funcname.append(func.__name__)
            else:
                funcname.append(func.__class__.__name__)
        # funcname = [func.__name__ for func in self.keras_model.metrics[outputname[0]]]
        loss_outputname = ['loss_' + name for name in outputname]
        output_funcname = [outputname[0] + '_' + name for name in funcname]
        list_names = ['loss', *loss_outputname, *output_funcname]

        print(
            f'Completed Evaluation, {(time.time() - start_time):.{2}f}s elapsed'
        )

        return {name: score for name, score in zip(list_names, scores)}
Exemplo n.º 6
0
    def evaluate(self, input_data, labels, inputs_err=None, labels_err=None):
        """
        Evaluate neural network by provided input data and labels and get back a metrics score

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :param inputs_err: Error for input_data (if any), same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :param labels_err: Labels error (if any)
        :type labels_err: Union([NoneType, ndarray])
        :return: metrics score dictionary
        :rtype: dict
        :History: 2018-May-20 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()

        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)

        if labels_err is None:
            labels_err = np.zeros_like(labels)

        input_data = {"input": input_data}
        labels = {"output": labels}

        # check if exists (existing means the model has already been trained (e.g. fine-tuning), so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        # No need to care about Magic number as loss function looks for magic num in y_true only
        norm_input_err = inputs_err / self.input_std['input']
        norm_labels_err = labels_err / self.labels_std['output']

        norm_data.update({
            "input_err": norm_input_err,
            "labels_err": norm_labels_err
        })
        norm_labels.update({"variance_output": norm_labels["output"]})

        total_num = input_data['input'].shape[0]
        eval_batchsize = self.batch_size if total_num > self.batch_size else total_num
        steps = total_num // self.batch_size if total_num > self.batch_size else 1

        start_time = time.time()
        print("Starting Evaluation")

        evaluate_generator = BayesianCNNDataGenerator(
            batch_size=eval_batchsize,
            shuffle=False,
            steps_per_epoch=steps,
            data=[norm_data, norm_labels])

        scores = self.keras_model.evaluate(evaluate_generator)
        if isinstance(scores, float):  # make sure scores is iterable
            scores = list(str(scores))
        outputname = self.keras_model.output_names
        funcname = self.keras_model.metrics_names

        print(
            f'Completed Evaluation, {(time.time() - start_time):.{2}f}s elapsed'
        )

        return list_to_dict(funcname, scores)
Exemplo n.º 7
0
def load_folder(folder=None):
    """
    To load astroNN model object from folder

    :param folder: [optional] you should provide folder name if outside folder, do not specific when you are inside the folder
    :type folder: str
    :return: astroNN Neural Network instance
    :rtype: astroNN.nn.NeuralNetMaster.NeuralNetMaster
    :History: 2017-Dec-29 - Written - Henry Leung (University of Toronto)
    """
    currentdir = os.getcwd()

    if folder is not None:
        fullfilepath = os.path.join(currentdir, folder)
    else:
        fullfilepath = currentdir

    astronn_model_obj = None

    if folder is not None and os.path.isfile(os.path.join(folder, 'astroNN_model_parameter.json')) is True:
        with open(os.path.join(folder, 'astroNN_model_parameter.json')) as f:
            parameter = json.load(f)
            f.close()
    elif os.path.isfile('astroNN_model_parameter.json') is True:
        with open('astroNN_model_parameter.json') as f:
            parameter = json.load(f)
            f.close()
    elif folder is not None and not os.path.exists(folder):
        raise IOError('Folder not exists: ' + str(currentdir + '/' + folder))
    else:
        raise FileNotFoundError('Are you sure this is an astroNN generated folder? Or it is a folder trained by old '
                                'astroNN version?')

    identifier = parameter['id']

    if identifier == 'ApogeeCNN':
        astronn_model_obj = ApogeeCNN()
    elif identifier == 'ApogeeBCNN':
        astronn_model_obj = ApogeeBCNN()
    elif identifier == 'ApogeeBCNNCensored':
        astronn_model_obj = ApogeeBCNNCensored()
    elif identifier == 'ApogeeCVAE':
        astronn_model_obj = ApogeeCVAE()
    elif identifier == 'Cifar10CNN':
        astronn_model_obj = Cifar10CNN()
    elif identifier == 'MNIST_BCNN':
        astronn_model_obj = MNIST_BCNN()
    elif identifier == 'Galaxy10CNN':
        astronn_model_obj = Galaxy10CNN()
    elif identifier == 'StarNet2017':
        astronn_model_obj = StarNet2017()
    elif identifier == 'GalaxyGAN2017':
        astronn_model_obj = GalaxyGAN2017()
    elif identifier == 'Galaxy10GAN':
        astronn_model_obj = Galaxy10GAN()
    else:
        unknown_model_message = f'Unknown model identifier -> {identifier}!'
        # try to load custom model from CUSTOM_MODEL_PATH
        CUSTOM_MODEL_PATH = custom_model_path_reader()
        # try the current folder and see if there is any .py on top of CUSTOM_MODEL_PATH
        list_py_files = [os.path.join(fullfilepath, f) for f in os.listdir(fullfilepath) if f.endswith(".py")]
        if CUSTOM_MODEL_PATH is None and list_py_files is None:
            print("\n")
            raise TypeError(unknown_model_message)
        else:
            import sys
            from importlib import import_module
            for path_list in (path_list for path_list in [CUSTOM_MODEL_PATH, list_py_files] if path_list is not None):
                for path in path_list:
                    head, tail = os.path.split(path)
                    sys.path.insert(0, head)
                    try:
                        model = getattr(import_module(tail.strip('.py')), str(identifier))
                        astronn_model_obj = model()
                    except AttributeError:
                        pass

        if astronn_model_obj is None:
            print("\n")
            raise TypeError(unknown_model_message)

    astronn_model_obj.currentdir = currentdir
    astronn_model_obj.fullfilepath = fullfilepath
    astronn_model_obj.folder_name = folder if folder is not None else os.path.basename(os.path.normpath(currentdir))

    # Must have parameter
    astronn_model_obj._input_shape = parameter['input']
    astronn_model_obj._labels_shape = parameter['labels']
    astronn_model_obj.num_hidden = parameter['hidden']
    astronn_model_obj.input_norm_mode = parameter['input_norm_mode']
    astronn_model_obj.labels_norm_mode = parameter['labels_norm_mode']
    astronn_model_obj.input_mean = np.array(parameter['input_mean'])
    astronn_model_obj.labels_mean = np.array(parameter['labels_mean'])
    astronn_model_obj.input_std = np.array(parameter['input_std'])
    astronn_model_obj.labels_std = np.array(parameter['labels_std'])
    astronn_model_obj.batch_size = parameter['batch_size']
    astronn_model_obj.targetname = parameter['targetname']
    astronn_model_obj.val_size = parameter['valsize']

    # create normalizer and set correct mean and std
    astronn_model_obj.input_normalizer = Normalizer(mode=astronn_model_obj.input_norm_mode)
    astronn_model_obj.labels_normalizer = Normalizer(mode=astronn_model_obj.labels_norm_mode)
    astronn_model_obj.input_normalizer.mean_labels = astronn_model_obj.input_mean
    astronn_model_obj.input_normalizer.std_labels = astronn_model_obj.input_std
    astronn_model_obj.labels_normalizer.mean_labels = astronn_model_obj.labels_mean
    astronn_model_obj.labels_normalizer.std_labels = astronn_model_obj.labels_std

    # Conditional parameter depends on neural net architecture
    try:
        astronn_model_obj.num_filters = parameter['filternum']
    except KeyError:
        pass
    try:
        astronn_model_obj.filter_len = parameter['filterlen']
    except KeyError:
        pass
    try:
        pool_length = parameter['pool_length']
        if isinstance(pool_length, int):  # multi-dimensional case
            astronn_model_obj.pool_length = parameter['pool_length']
        else:
            astronn_model_obj.pool_length = list(parameter['pool_length'])
    except KeyError:
        pass
    try:
        # need to convert to int because of keras do not want array or list
        astronn_model_obj.latent_dim = int(parameter['latent'])
    except KeyError:
        pass
    try:
        astronn_model_obj.task = parameter['task']
    except KeyError:
        pass
    try:
        astronn_model_obj.dropout_rate = parameter['dropout_rate']
    except KeyError:
        pass
    try:
        # if inverse model precision exists, so does length_scale
        astronn_model_obj.inv_model_precision = parameter['inv_tau']
        astronn_model_obj.length_scale = parameter['length_scale']
    except KeyError:
        pass
    try:
        astronn_model_obj.l1 = parameter['l1']
    except KeyError:
        pass
    try:
        astronn_model_obj.l2 = parameter['l2']
    except KeyError:
        pass
    try:
        astronn_model_obj.maxnorm = parameter['maxnorm']
    except KeyError:
        pass
    with h5py.File(os.path.join(astronn_model_obj.fullfilepath, 'model_weights.h5'), mode='r') as f:
        training_config = f.attrs.get('training_config')
        training_config = json.loads(training_config.decode('utf-8'))
        optimizer_config = training_config['optimizer_config']
        optimizer = optimizers.deserialize(optimizer_config)

        # Recover loss functions and metrics.
        losses = convert_custom_objects(training_config['loss'])
        try:
            try:
                [losses_lookup(losses[loss]) for loss in losses]
            except TypeError:
                losses_lookup(losses)
        except:
            pass

        metrics = convert_custom_objects(training_config['metrics'])
        # its weird that keras needs -> metrics[metric][0] instead of metrics[metric] likes losses, need attention
        try:
            try:
                [losses_lookup(metrics[metric][0]) for metric in metrics]
            except TypeError:
                losses_lookup(metrics[0])
        except:
            pass

        sample_weight_mode = training_config['sample_weight_mode']
        loss_weights = training_config['loss_weights']

        # compile the model
        astronn_model_obj.compile(optimizer=optimizer)

        # set weights
        astronn_model_obj.keras_model.load_weights(os.path.join(astronn_model_obj.fullfilepath, 'model_weights.h5'))

        # Build train function (to get weight updates), need to consider Sequential model too
        astronn_model_obj.keras_model._make_train_function()
        if isinstance(astronn_model_obj.keras_model, Sequential):
            astronn_model_obj.keras_model.model._make_train_function()
        else:
            astronn_model_obj.keras_model._make_train_function()
        optimizer_weights_group = f['optimizer_weights']
        optimizer_weight_names = [n.decode('utf8') for n in optimizer_weights_group.attrs['weight_names']]
        optimizer_weight_values = [optimizer_weights_group[n] for n in optimizer_weight_names]
        astronn_model_obj.keras_model.optimizer.set_weights(optimizer_weight_values)

    print("========================================================")
    print(f"Loaded astroNN model, model type: {astronn_model_obj.name} -> {identifier}")
    print("========================================================")
    return astronn_model_obj
Exemplo n.º 8
0
    def evaluate(self, input_data, labels):
        """
        Evaluate neural network by provided input data and labels and get back a metrics score

        :param input_data: Data to be inferred with neural network
        :type input_data: ndarray
        :param labels: labels
        :type labels: ndarray
        :return: metrics score dictionary
        :rtype: dict
        :History: 2018-May-20 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()
        # check if exists (existing means the model has already been trained (e.g. fine-tuning), so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        eval_batchsize = self.batch_size if input_data.shape[
            0] > self.batch_size else input_data.shape[0]
        steps = input_data.shape[0] // self.batch_size if input_data.shape[
            0] > self.batch_size else 1

        start_time = time.time()
        print("Starting Evaluation")

        evaluate_generator = CNNDataGenerator(batch_size=eval_batchsize,
                                              shuffle=False,
                                              steps_per_epoch=steps,
                                              data=[norm_data, norm_labels])

        scores = self.keras_model.evaluate_generator(evaluate_generator)
        if isinstance(scores, float):  # make sure scores is iterable
            scores = list(str(scores))
        outputname = self.keras_model.output_names
        funcname = []
        if isinstance(self.keras_model.metrics, dict):
            func_list = self.keras_model.metrics[outputname[0]]
        else:
            func_list = self.keras_model.metrics
        for func in func_list:
            if hasattr(func, __name__):
                funcname.append(func.__name__)
            else:
                funcname.append(func.__class__.__name__)
        # funcname = [func.__name__ for func in self.keras_model.metrics]
        output_funcname = [outputname[0] + '_' + name for name in funcname]
        list_names = ['loss', *output_funcname]

        print(
            f'Completed Evaluation, {(time.time() - start_time):.{2}f}s elapsed'
        )

        return {name: score for name, score in zip(list_names, scores)}
Exemplo n.º 9
0
    def pre_training_checklist_child(self, input_data, labels):
        input_data, labels = self.pre_training_checklist_master(
            input_data, labels)

        # check if exists (existing means the model has already been trained (e.g. fine-tuning), so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        # No need to care about Magic number as loss function looks for magic num in y_true only
        norm_data.update({
            "input_err": (input_data['input_err'] / self.input_std['input']),
            "labels_err":
            input_data['labels_err'] / self.labels_std['output']
        })
        norm_labels.update({"variance_output": norm_labels['output']})

        if self.keras_model is None:  # only compile if there is no keras_model, e.g. fine-tuning does not required
            self.compile()

        self.train_idx, self.val_idx = train_test_split(
            np.arange(self.num_train + self.val_num), test_size=self.val_size)

        norm_data_training = {}
        norm_data_val = {}
        norm_labels_training = {}
        norm_labels_val = {}
        for name in norm_data.keys():
            norm_data_training.update({name: norm_data[name][self.train_idx]})
            norm_data_val.update({name: norm_data[name][self.val_idx]})
        for name in norm_labels.keys():
            norm_labels_training.update(
                {name: norm_labels[name][self.train_idx]})
            norm_labels_val.update({name: norm_labels[name][self.val_idx]})

        self.inv_model_precision = (2 * self.num_train *
                                    self.l2) / (self.length_scale**2 *
                                                (1 - self.dropout_rate))

        self.training_generator = BayesianCNNDataGenerator(
            batch_size=self.batch_size,
            shuffle=True,
            steps_per_epoch=self.num_train // self.batch_size,
            data=[norm_data_training, norm_labels_training],
            manual_reset=False)

        val_batchsize = self.batch_size if len(
            self.val_idx) > self.batch_size else len(self.val_idx)
        self.validation_generator = BayesianCNNDataGenerator(
            batch_size=val_batchsize,
            shuffle=False,
            steps_per_epoch=max(self.val_num // self.batch_size, 1),
            data=[norm_data_val, norm_labels_val],
            manual_reset=True)

        return norm_data, norm_labels
Exemplo n.º 10
0
class BayesianCNNBase(NeuralNetMaster, ABC):
    """
    Top-level class for a Bayesian convolutional neural network

    :History: 2018-Jan-06 - Written - Henry Leung (University of Toronto)
    """
    def __init__(self):
        super().__init__()
        self.name = 'Bayesian Convolutional Neural Network'
        self._model_type = 'BCNN'
        self.initializer = None
        self.activation = None
        self._last_layer_activation = None
        self.num_filters = None
        self.filter_len = None
        self.pool_length = None
        self.num_hidden = None
        self.reduce_lr_epsilon = None
        self.reduce_lr_min = None
        self.reduce_lr_patience = None
        self.l1 = None
        self.l2 = None
        self.maxnorm = None
        self.inv_model_precision = None  # inverse model precision
        self.dropout_rate = 0.2
        self.length_scale = 3  # prior length scale
        self.mc_num = 100  # increased to 100 due to high performance VI on GPU implemented on 14 April 2018 (Henry)
        self.val_size = 0.1
        self.disable_dropout = False

        self.input_norm_mode = 1
        self.labels_norm_mode = 2

        self.keras_model_predict = None

    def pre_training_checklist_child(self, input_data, labels, sample_weights):
        input_data, labels = self.pre_training_checklist_master(
            input_data, labels)

        # check if exists (existing means the model has already been trained (e.g. fine-tuning), so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        # No need to care about Magic number as loss function looks for magic num in y_true only
        norm_data.update({
            "input_err": (input_data['input_err'] / self.input_std['input']),
            "labels_err":
            input_data['labels_err'] / self.labels_std['output']
        })
        norm_labels.update({"variance_output": norm_labels['output']})

        if self.keras_model is None:  # only compile if there is no keras_model, e.g. fine-tuning does not required
            self.compile()

        self.train_idx, self.val_idx = train_test_split(
            np.arange(self.num_train + self.val_num), test_size=self.val_size)

        norm_data_training = {}
        norm_data_val = {}
        norm_labels_training = {}
        norm_labels_val = {}
        for name in norm_data.keys():
            norm_data_training.update({name: norm_data[name][self.train_idx]})
            norm_data_val.update({name: norm_data[name][self.val_idx]})
        for name in norm_labels.keys():
            norm_labels_training.update(
                {name: norm_labels[name][self.train_idx]})
            norm_labels_val.update({name: norm_labels[name][self.val_idx]})

        if sample_weights is not None:
            sample_weights_training = sample_weights[self.train_idx]
            sample_weights_val = sample_weights[self.val_idx]
        else:
            sample_weights_training = None
            sample_weights_val = None

        self.inv_model_precision = (2 * self.num_train *
                                    self.l2) / (self.length_scale**2 *
                                                (1 - self.dropout_rate))

        self.training_generator = BayesianCNNDataGenerator(
            batch_size=self.batch_size,
            shuffle=True,
            steps_per_epoch=self.num_train // self.batch_size,
            data=[norm_data_training, norm_labels_training],
            manual_reset=False,
            sample_weights=sample_weights_training)

        val_batchsize = self.batch_size if len(
            self.val_idx) > self.batch_size else len(self.val_idx)
        self.validation_generator = BayesianCNNDataGenerator(
            batch_size=val_batchsize,
            shuffle=False,
            steps_per_epoch=max(self.val_num // self.batch_size, 1),
            data=[norm_data_val, norm_labels_val],
            manual_reset=True,
            sample_weights=sample_weights_val)

        return norm_data_training, norm_data_val, norm_labels_training, norm_labels_val, sample_weights_training, sample_weights_val

    def compile(self,
                optimizer=None,
                loss=None,
                metrics=None,
                weighted_metrics=None,
                loss_weights=None,
                sample_weight_mode=None):
        if optimizer is not None:
            self.optimizer = optimizer
        elif self.optimizer is None or self.optimizer == 'adam':
            self.optimizer = Adam(learning_rate=self.lr,
                                  beta_1=self.beta_1,
                                  beta_2=self.beta_2,
                                  epsilon=self.optimizer_epsilon,
                                  decay=0.0)
        if metrics is not None:
            self.metrics = metrics
        if self.task == 'regression':
            if self._last_layer_activation is None:
                self._last_layer_activation = 'linear'
        elif self.task == 'classification':
            if self._last_layer_activation is None:
                self._last_layer_activation = 'softmax'
        elif self.task == 'binary_classification':
            if self._last_layer_activation is None:
                self._last_layer_activation = 'sigmoid'
        else:
            raise RuntimeError(
                'Only "regression", "classification" and "binary_classification" are supported'
            )

        self.keras_model, self.keras_model_predict, self.output_loss, self.variance_loss = self.model(
        )

        if self.task == 'regression':
            self._output_loss = lambda predictive, labelerr: mse_lin_wrapper(
                predictive, labelerr)
        elif self.task == 'classification':
            self._output_loss = lambda predictive, labelerr: bayesian_categorical_crossentropy_wrapper(
                predictive)
        elif self.task == 'binary_classification':
            self._output_loss = lambda predictive, labelerr: bayesian_binary_crossentropy_wrapper(
                predictive)
        else:
            raise RuntimeError(
                'Only "regression", "classification" and "binary_classification" are supported'
            )

        # all zero losss as dummy lose
        if self.task == 'regression':
            self.metrics = [mean_absolute_error, mean_error
                            ] if not self.metrics else self.metrics
            self.keras_model.compile(optimizer=self.optimizer,
                                     loss=zeros_loss,
                                     metrics=self.metrics,
                                     weighted_metrics=weighted_metrics,
                                     sample_weight_mode=sample_weight_mode)
        elif self.task == 'classification':
            self.metrics = [categorical_accuracy
                            ] if not self.metrics else self.metrics
            self.keras_model.compile(optimizer=self.optimizer,
                                     loss=zeros_loss,
                                     metrics={'output': self.metrics},
                                     weighted_metrics=weighted_metrics,
                                     sample_weight_mode=sample_weight_mode)
        elif self.task == 'binary_classification':
            self.metrics = [binary_accuracy
                            ] if not self.metrics else self.metrics
            self.keras_model.compile(optimizer=self.optimizer,
                                     loss=zeros_loss,
                                     metrics={'output': self.metrics},
                                     weighted_metrics=weighted_metrics,
                                     sample_weight_mode=sample_weight_mode)

        # inject custom training step if needed
        try:
            self.custom_train_step()
        except NotImplementedError:
            pass
        except TypeError:
            self.keras_model.train_step = self.custom_train_step

    # inject custom testing  step if needed
        try:
            self.custom_test_step()
        except NotImplementedError:
            pass
        except TypeError:
            self.keras_model.test_step = self.custom_test_step

        return None

    def custom_train_step(self, data):
        """
        Custom training logic

        :param data:
        :return:
        """
        data = data_adapter.expand_1d(data)
        x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data)

        # Run forward pass.
        with tf.GradientTape() as tape:
            y_pred = self.keras_model(x, training=True)
            self.keras_model.compiled_loss._losses = self._output_loss(
                y_pred[1], x['labels_err'])
            self.keras_model.compiled_loss._losses = nest.map_structure(
                self.keras_model.compiled_loss._get_loss_object,
                self.keras_model.compiled_loss._losses)
            self.keras_model.compiled_loss._losses = nest.flatten(
                self.keras_model.compiled_loss._losses)
            loss = self.keras_model.compiled_loss(
                y,
                y_pred,
                sample_weight,
                regularization_losses=self.keras_model.losses)

        # Run backwards pass.
        self.keras_model.optimizer.minimize(
            loss, self.keras_model.trainable_variables, tape=tape)
        self.keras_model.compiled_metrics.update_state(y, y_pred,
                                                       sample_weight)
        # Collect metrics to return
        return_metrics = {}
        for metric in self.keras_model.metrics:
            result = metric.result()
            if isinstance(result, dict):
                return_metrics.update(result)
            else:
                return_metrics[metric.name] = result
        return return_metrics

    def custom_test_step(self, data):
        data = data_adapter.expand_1d(data)
        x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data)

        y_pred = self.keras_model(x, training=False)
        # Updates stateful loss metrics.
        temploss = self._output_loss(y_pred[1], x['labels_err'])
        self.keras_model.compiled_loss._losses = temploss
        self.keras_model.compiled_loss._losses = nest.map_structure(
            self.keras_model.compiled_loss._get_loss_object,
            self.keras_model.compiled_loss._losses)
        self.keras_model.compiled_loss._losses = nest.flatten(
            self.keras_model.compiled_loss._losses)

        self.keras_model.compiled_loss(
            y,
            y_pred,
            sample_weight,
            regularization_losses=self.keras_model.losses)

        self.keras_model.compiled_metrics.update_state(y, y_pred,
                                                       sample_weight)
        # Collect metrics to return
        return_metrics = {}

        for metric in self.keras_model.metrics:
            result = metric.result()
            if isinstance(result, dict):
                return_metrics.update(result)
            else:
                return_metrics[metric.name] = result
        return return_metrics

    def fit(self,
            input_data,
            labels,
            inputs_err=None,
            labels_err=None,
            sample_weights=None,
            experimental=False):
        """
        Train a Bayesian neural network

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :param inputs_err: Error for input_data (if any), same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :param labels_err: Labels error (if any)
        :type labels_err: Union([NoneType, ndarray])
        :param sample_weights: Sample weights (if any)
        :type sample_weights: Union([NoneType, ndarray])
        :return: None
        :rtype: NoneType
        :History:
            | 2018-Jan-06 - Written - Henry Leung (University of Toronto)
            | 2018-Apr-12 - Updated - Henry Leung (University of Toronto)
        """
        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)

        if labels_err is None:
            labels_err = np.zeros_like(labels)

        # TODO: allow named inputs too??
        input_data = {
            "input": input_data,
            "input_err": inputs_err,
            "labels_err": labels_err
        }
        labels = {"output": labels, "variance_output": labels}

        # Call the checklist to create astroNN folder and save parameters
        norm_data_training, norm_data_val, norm_labels_training, norm_labels_val, sample_weights_training, sample_weights_val = self.pre_training_checklist_child(
            input_data, labels, sample_weights)

        # norm_data_training['labels_err'] = norm_data_training['labels_err'].filled(MAGIC_NUMBER).astype(np.float32)

        # TODO: fix the monitor name
        reduce_lr = ReduceLROnPlateau(monitor='val_output_mean_absolute_error',
                                      factor=0.5,
                                      min_delta=self.reduce_lr_epsilon,
                                      patience=self.reduce_lr_patience,
                                      min_lr=self.reduce_lr_min,
                                      mode='min',
                                      verbose=2)

        self.virtual_cvslogger = VirutalCSVLogger()

        self.__callbacks = [reduce_lr, self.virtual_cvslogger
                            ]  # default must have unchangeable callbacks

        if self.callbacks is not None:
            if isinstance(self.callbacks, list):
                self.__callbacks.extend(self.callbacks)
            else:
                self.__callbacks.append(self.callbacks)

        start_time = time.time()

        if experimental:
            dataset = tf.data.Dataset.from_tensor_slices(
                (norm_data_training, norm_labels_training,
                 sample_weights_training)).batch(self.batch_size).shuffle(
                     5000,
                     reshuffle_each_iteration=True).prefetch(tf.data.AUTOTUNE)
            val_dataset = tf.data.Dataset.from_tensor_slices(
                (norm_data_val, norm_labels_val, sample_weights_val)).batch(
                    self.batch_size).prefetch(tf.data.AUTOTUNE)

            self.history = self.keras_model.fit(
                dataset,
                validation_data=val_dataset,
                epochs=self.max_epochs,
                verbose=self.verbose,
                workers=os.cpu_count() // 2,
                callbacks=self.__callbacks,
                use_multiprocessing=MULTIPROCESS_FLAG)
        else:
            self.history = self.keras_model.fit(
                self.training_generator,
                validation_data=self.validation_generator,
                epochs=self.max_epochs,
                verbose=self.verbose,
                workers=os.cpu_count() // 2,
                callbacks=self.__callbacks,
                use_multiprocessing=MULTIPROCESS_FLAG)

        print(
            f'Completed Training, {(time.time() - start_time):.{2}f}s in total'
        )
        if self.autosave is True:
            # Call the post training checklist to save parameters
            self.save()

        return None

    def fit_on_batch(self,
                     input_data,
                     labels,
                     inputs_err=None,
                     labels_err=None,
                     sample_weights=None):
        """
        Train a Bayesian neural network by running a single gradient update on all of your data, suitable for fine-tuning

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :param inputs_err: Error for input_data (if any), same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :param labels_err: Labels error (if any)
        :type labels_err: Union([NoneType, ndarray])
        :param sample_weights: Sample weights (if any)
        :type sample_weights: Union([NoneType, ndarray])
        :return: None
        :rtype: NoneType
        :History:
            | 2018-Aug-25 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()

        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)

        if labels_err is None:
            labels_err = np.zeros_like(labels)

        input_data = {
            "input": input_data,
            "input_err": inputs_err,
            "labels_err": labels_err
        }
        labels = {"output": labels, "variance_output": labels}

        # check if exists (existing means the model has already been trained (e.g. fine-tuning), so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        # No need to care about Magic number as loss function looks for magic num in y_true only
        norm_data.update({
            "input_err": (input_data['input_err'] / self.input_std['input']),
            "labels_err":
            input_data['labels_err'] / self.labels_std['output']
        })
        norm_labels.update({"variance_output": norm_labels['output']})

        start_time = time.time()

        fit_generator = BayesianCNNDataGenerator(
            batch_size=input_data['input'].shape[0],
            shuffle=False,
            steps_per_epoch=1,
            data=[norm_data, norm_labels],
            sample_weights=sample_weights)

        score = self.keras_model.fit(fit_generator,
                                     epochs=1,
                                     verbose=self.verbose,
                                     workers=os.cpu_count(),
                                     use_multiprocessing=MULTIPROCESS_FLAG)

        print(
            f'Completed Training on Batch, {(time.time() - start_time):.{2}f}s in total'
        )

        return None

    def post_training_checklist_child(self):
        self.keras_model.save(self.fullfilepath + _astroNN_MODEL_NAME)
        print(_astroNN_MODEL_NAME +
              f' saved to {(self.fullfilepath + _astroNN_MODEL_NAME)}')

        self.hyper_txt.write(f"Dropout Rate: {self.dropout_rate} \n")
        self.hyper_txt.flush()
        self.hyper_txt.close()

        data = {
            'id':
            self.__class__.__name__
            if self._model_identifier is None else self._model_identifier,
            'pool_length':
            self.pool_length,
            'filterlen':
            self.filter_len,
            'filternum':
            self.num_filters,
            'hidden':
            self.num_hidden,
            'input':
            self._input_shape,
            'labels':
            self._labels_shape,
            'task':
            self.task,
            'last_layer_activation':
            self._last_layer_activation,
            'activation':
            self.activation,
            'input_mean':
            dict_np_to_dict_list(self.input_mean),
            'inv_tau':
            self.inv_model_precision,
            'length_scale':
            self.length_scale,
            'labels_mean':
            dict_np_to_dict_list(self.labels_mean),
            'input_std':
            dict_np_to_dict_list(self.input_std),
            'labels_std':
            dict_np_to_dict_list(self.labels_std),
            'valsize':
            self.val_size,
            'targetname':
            self.targetname,
            'dropout_rate':
            self.dropout_rate,
            'l1':
            self.l1,
            'l2':
            self.l2,
            'maxnorm':
            self.maxnorm,
            'input_norm_mode':
            self.input_normalizer.normalization_mode,
            'labels_norm_mode':
            self.labels_normalizer.normalization_mode,
            'input_names':
            self.input_names,
            'output_names':
            self.output_names,
            'batch_size':
            self.batch_size
        }

        with open(self.fullfilepath + '/astroNN_model_parameter.json',
                  'w') as f:
            json.dump(data, f, indent=4, sort_keys=True)

    def predict(self, input_data, inputs_err=None):
        """
        Test model, High performance version designed for fast variational inference on GPU

        :param input_data: Data to be inferred with neural network
        :type input_data: ndarray
        :param inputs_err: Error for input_data, same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :return: prediction and prediction uncertainty
        :History:
            | 2018-Jan-06 - Written - Henry Leung (University of Toronto)
            | 2018-Apr-12 - Updated - Henry Leung (University of Toronto)
        """
        self.has_model_check()

        if gpu_availability() is False and self.mc_num > 25:
            warnings.warn(
                f'You are using CPU version Tensorflow, doing {self.mc_num} times Monte Carlo Inference can '
                f'potentially be very slow! \n '
                f'A possible fix is to decrease the mc_num parameter of the model to do less MC Inference \n'
                f'This is just a warning, and will not shown if mc_num < 25 on CPU'
            )
            if self.mc_num < 2:
                raise AttributeError("mc_num cannot be smaller than 2")

        # if no error array then just zeros
        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)
        else:
            inputs_err = np.atleast_2d(inputs_err)
            inputs_err /= self.input_std['input']

        input_data = {"input": input_data, "input_err": inputs_err}
        input_data = self.pre_testing_checklist_master(input_data)

        if self.input_normalizer is not None:
            input_array = self.input_normalizer.normalize(input_data,
                                                          calc=False)
        else:
            # Prevent shallow copy issue
            input_array = np.array(input_data)
            input_array -= self.input_mean['input']
            input_array /= self.input_std['input']

        total_test_num = input_data['input'].shape[0]  # Number of testing data

        # for number of training data smaller than batch_size
        if total_test_num < self.batch_size:
            batch_size = total_test_num
        else:
            batch_size = self.batch_size

        # Due to the nature of how generator works, no overlapped prediction
        data_gen_shape = (total_test_num // batch_size) * batch_size
        remainder_shape = total_test_num - data_gen_shape  # Remainder from generator

        norm_data_main = {}
        norm_data_remainder = {}
        for name in input_array.keys():
            norm_data_main.update({name: input_array[name][:data_gen_shape]})
            norm_data_remainder.update(
                {name: input_array[name][data_gen_shape:]})

        # Data Generator for prediction
        with tqdm(total=total_test_num, unit="sample") as pbar:
            pbar.set_postfix({'Monte-Carlo': self.mc_num})
            # suppress pfor warning from TF
            old_level = tf.get_logger().level
            tf.get_logger().setLevel('ERROR')

            prediction_generator = BayesianCNNPredDataGenerator(
                batch_size=batch_size,
                shuffle=False,
                steps_per_epoch=data_gen_shape // batch_size,
                data=[norm_data_main],
                pbar=pbar)

            new = FastMCInference(self.mc_num)(self.keras_model_predict)

            result = np.asarray(new.predict(prediction_generator))

            if remainder_shape != 0:  # deal with remainder
                remainder_generator = BayesianCNNPredDataGenerator(
                    batch_size=remainder_shape,
                    shuffle=False,
                    steps_per_epoch=1,
                    data=[norm_data_remainder],
                    pbar=pbar)
                remainder_result = np.asarray(new.predict(remainder_generator))
                if remainder_shape == 1:
                    remainder_result = np.expand_dims(remainder_result, axis=0)
                result = np.concatenate((result, remainder_result))

            tf.get_logger().setLevel(old_level)

        # in case only 1 test data point, in such case we need to add a dimension
        if result.ndim < 3 and batch_size == 1:
            result = np.expand_dims(result, axis=0)

        half_first_dim = result.shape[
            1] // 2  # result.shape[1] is guarantee an even number, otherwise sth is wrong

        predictions = result[:, :half_first_dim, 0]  # mean prediction
        mc_dropout_uncertainty = result[:, :half_first_dim, 1] * (
            self.labels_std['output']**2)  # model uncertainty
        predictions_var = np.exp(result[:, half_first_dim:, 0]) * (
            self.labels_std['output']**2)  # predictive uncertainty

        if self.labels_normalizer is not None:
            predictions = self.labels_normalizer.denormalize(
                list_to_dict([self.keras_model.output_names[0]], predictions))
            predictions = predictions['output']
        else:
            predictions *= self.labels_std['output']
            predictions += self.labels_mean['output']

        if self.task == 'regression':
            # Predictive variance
            pred_var = predictions_var + mc_dropout_uncertainty  # epistemic plus aleatoric uncertainty
            pred_uncertainty = np.sqrt(pred_var)  # Convert back to std error

            # final correction from variance to standard derivation
            mc_dropout_uncertainty = np.sqrt(mc_dropout_uncertainty)
            predictive_uncertainty = np.sqrt(predictions_var)

        elif self.task == 'classification':
            # we want entropy for classification uncertainty
            predicted_class = np.argmax(predictions, axis=1)
            mc_dropout_uncertainty = np.ones_like(predicted_class, dtype=float)
            predictive_uncertainty = np.ones_like(predicted_class, dtype=float)

            # center variance
            predictions_var -= 1.
            for i in range(predicted_class.shape[0]):
                all_prediction = np.array(predictions[i, :])
                mc_dropout_uncertainty[i] = -np.sum(
                    all_prediction * np.log(all_prediction))
                predictive_uncertainty[i] = predictions_var[i,
                                                            predicted_class[i]]

            pred_uncertainty = mc_dropout_uncertainty + predictive_uncertainty
            # We only want the predicted class back
            predictions = predicted_class

        elif self.task == 'binary_classification':
            # we want entropy for classification uncertainty, so need prediction in logits space
            mc_dropout_uncertainty = -np.sum(predictions * np.log(predictions),
                                             axis=0)
            # need to activate before round to int so that the prediction is always 0 or 1
            predictions = np.rint(sigmoid(predictions))
            predictive_uncertainty = predictions_var
            pred_uncertainty = mc_dropout_uncertainty + predictions_var

        else:
            raise AttributeError('Unknown Task')

        return predictions, {
            'total': pred_uncertainty,
            'model': mc_dropout_uncertainty,
            'predictive': predictive_uncertainty
        }

    def predict_dataset(self, file):
        class BayesianCNNPredDataGeneratorV2(GeneratorMaster):
            def __init__(self,
                         batch_size,
                         shuffle,
                         steps_per_epoch,
                         manual_reset=False,
                         pbar=None,
                         nn_model=None):
                super().__init__(batch_size=batch_size,
                                 shuffle=shuffle,
                                 steps_per_epoch=steps_per_epoch,
                                 data=None,
                                 manual_reset=manual_reset)
                self.pbar = pbar

                # initial idx
                self.idx_list = self._get_exploration_order(range(len(file)))
                self.current_idx = 0
                self.nn_model = nn_model

            def _data_generation(self, idx_list_temp):
                # Generate data
                inputs = self.nn_model.input_normalizer.normalize(
                    {
                        "input": file[idx_list_temp],
                        "input_err": np.zeros_like(file[idx_list_temp])
                    },
                    calc=False)
                x = self.input_d_checking(inputs,
                                          np.arange(len(idx_list_temp)))
                return x

            def __getitem__(self, index):
                x = self._data_generation(
                    self.idx_list[index * self.batch_size:(index + 1) *
                                  self.batch_size])
                if self.pbar: self.pbar.update(self.batch_size)
                return x

            def on_epoch_end(self):
                # shuffle the list when epoch ends for the next epoch
                self.idx_list = self._get_exploration_order(range(len(file)))

        self.has_model_check()

        if gpu_availability() is False and self.mc_num > 25:
            warnings.warn(
                f'You are using CPU version Tensorflow, doing {self.mc_num} times Monte Carlo Inference can '
                f'potentially be very slow! \n '
                f'A possible fix is to decrease the mc_num parameter of the model to do less MC Inference \n'
                f'This is just a warning, and will not shown if mc_num < 25 on CPU'
            )
            if self.mc_num < 2:
                raise AttributeError("mc_num cannot be smaller than 2")

        total_test_num = len(file)  # Number of testing data

        # for number of training data smaller than batch_size
        if total_test_num < self.batch_size:
            batch_size = total_test_num
        else:
            batch_size = self.batch_size

        # Due to the nature of how generator works, no overlapped prediction
        data_gen_shape = (total_test_num // batch_size) * batch_size
        remainder_shape = total_test_num - data_gen_shape  # Remainder from generator

        # Data Generator for prediction
        with tqdm(total=total_test_num, unit="sample") as pbar:
            pbar.set_postfix({'Monte-Carlo': self.mc_num})
            # suppress pfor warning from TF
            old_level = tf.get_logger().level
            tf.get_logger().setLevel('ERROR')
            prediction_generator = BayesianCNNPredDataGeneratorV2(
                batch_size=batch_size,
                shuffle=False,
                steps_per_epoch=data_gen_shape // batch_size,
                pbar=pbar,
                nn_model=self)

            new = FastMCInference(self.mc_num)(self.keras_model_predict)

            result = np.asarray(new.predict(prediction_generator))

            if remainder_shape != 0:  # deal with remainder
                remainder_generator = BayesianCNNPredDataGeneratorV2(
                    batch_size=remainder_shape,
                    shuffle=False,
                    steps_per_epoch=1,
                    pbar=pbar,
                    nn_model=self)
                remainder_result = np.asarray(new.predict(remainder_generator))
                if remainder_shape == 1:
                    remainder_result = np.expand_dims(remainder_result, axis=0)
                result = np.concatenate((result, remainder_result))

            tf.get_logger().setLevel(old_level)

        # in case only 1 test data point, in such case we need to add a dimension
        if result.ndim < 3 and batch_size == 1:
            result = np.expand_dims(result, axis=0)

        half_first_dim = result.shape[
            1] // 2  # result.shape[1] is guarantee an even number, otherwise sth is wrong

        predictions = result[:, :half_first_dim, 0]  # mean prediction
        mc_dropout_uncertainty = result[:, :half_first_dim, 1] * (
            self.labels_std['output']**2)  # model uncertainty
        predictions_var = np.exp(result[:, half_first_dim:, 0]) * (
            self.labels_std['output']**2)  # predictive uncertainty

        if self.labels_normalizer is not None:
            predictions = self.labels_normalizer.denormalize(
                list_to_dict([self.keras_model.output_names[0]], predictions))
            predictions = predictions['output']
        else:
            predictions *= self.labels_std['output']
            predictions += self.labels_mean['output']

        if self.task == 'regression':
            # Predictive variance
            pred_var = predictions_var + mc_dropout_uncertainty  # epistemic plus aleatoric uncertainty
            pred_uncertainty = np.sqrt(pred_var)  # Convert back to std error

            # final correction from variance to standard derivation
            mc_dropout_uncertainty = np.sqrt(mc_dropout_uncertainty)
            predictive_uncertainty = np.sqrt(predictions_var)

        elif self.task == 'classification':
            # we want entropy for classification uncertainty
            predicted_class = np.argmax(predictions, axis=1)
            mc_dropout_uncertainty = np.ones_like(predicted_class, dtype=float)
            predictive_uncertainty = np.ones_like(predicted_class, dtype=float)

            # center variance
            predictions_var -= 1.
            for i in range(predicted_class.shape[0]):
                all_prediction = np.array(predictions[i, :])
                mc_dropout_uncertainty[i] = -np.sum(
                    all_prediction * np.log(all_prediction))
                predictive_uncertainty[i] = predictions_var[i,
                                                            predicted_class[i]]

            pred_uncertainty = mc_dropout_uncertainty + predictive_uncertainty
            # We only want the predicted class back
            predictions = predicted_class

        elif self.task == 'binary_classification':
            # we want entropy for classification uncertainty, so need prediction in logits space
            mc_dropout_uncertainty = -np.sum(predictions * np.log(predictions),
                                             axis=0)
            # need to activate before round to int so that the prediction is always 0 or 1
            predictions = np.rint(sigmoid(predictions))
            predictive_uncertainty = predictions_var
            pred_uncertainty = mc_dropout_uncertainty + predictions_var

        else:
            raise AttributeError('Unknown Task')

        return predictions, {
            'total': pred_uncertainty,
            'model': mc_dropout_uncertainty,
            'predictive': predictive_uncertainty
        }

    def evaluate(self, input_data, labels, inputs_err=None, labels_err=None):
        """
        Evaluate neural network by provided input data and labels and get back a metrics score

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :param inputs_err: Error for input_data (if any), same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :param labels_err: Labels error (if any)
        :type labels_err: Union([NoneType, ndarray])
        :return: metrics score dictionary
        :rtype: dict
        :History: 2018-May-20 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()

        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)

        if labels_err is None:
            labels_err = np.zeros_like(labels)

        input_data = {"input": input_data}
        labels = {"output": labels}

        # check if exists (existing means the model has already been trained (e.g. fine-tuning), so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        # No need to care about Magic number as loss function looks for magic num in y_true only
        norm_input_err = inputs_err / self.input_std['input']
        norm_labels_err = labels_err / self.labels_std['output']

        norm_data.update({
            "input_err": norm_input_err,
            "labels_err": norm_labels_err
        })
        norm_labels.update({"variance_output": norm_labels["output"]})

        total_num = input_data['input'].shape[0]
        eval_batchsize = self.batch_size if total_num > self.batch_size else total_num
        steps = total_num // self.batch_size if total_num > self.batch_size else 1

        start_time = time.time()
        print("Starting Evaluation")

        # suppress pfor warning from TF
        old_level = tf.get_logger().level
        tf.get_logger().setLevel('ERROR')

        evaluate_generator = BayesianCNNDataGenerator(
            batch_size=eval_batchsize,
            shuffle=False,
            steps_per_epoch=steps,
            data=[norm_data, norm_labels])

        scores = self.keras_model.evaluate(evaluate_generator)

        tf.get_logger().setLevel(old_level)

        if isinstance(scores, float):  # make sure scores is iterable
            scores = list(str(scores))
        outputname = self.keras_model.output_names
        funcname = self.keras_model.metrics_names

        print(
            f'Completed Evaluation, {(time.time() - start_time):.{2}f}s elapsed'
        )

        return list_to_dict(funcname, scores)

    @deprecated_copy_signature(fit)
    def train(self, *args, **kwargs):
        return self.fit(*args, **kwargs)

    @deprecated_copy_signature(fit_on_batch)
    def train_on_batch(self, *args, **kwargs):
        return self.fit_on_batch(*args, **kwargs)

    @deprecated_copy_signature(predict)
    def test(self, *args, **kwargs):
        return self.predict(*args, **kwargs)
Exemplo n.º 11
0
class ConvVAEBase(NeuralNetMaster, ABC):
    """Top-level class for a Convolutional Variational Autoencoder"""
    def __init__(self):
        """
        NAME:
            __init__
        PURPOSE:
            To define astroNN Convolutional Variational Autoencoder
        HISTORY:
            2018-Jan-06 - Written - Henry Leung (University of Toronto)
        """
        super().__init__()
        self.name = 'Convolutional Variational Autoencoder'
        self._model_type = 'CVAE'
        self.initializer = None
        self.activation = None
        self._last_layer_activation = None
        self.num_filters = None
        self.filter_len = None
        self.pool_length = None
        self.num_hidden = None
        self.reduce_lr_epsilon = None
        self.reduce_lr_min = None
        self.reduce_lr_patience = None
        self.l2 = None
        self.latent_dim = None
        self.val_size = 0.1
        self.dropout_rate = 0.0

        self.keras_vae = None
        self.keras_encoder = None
        self.keras_decoder = None
        self.loss = None

        self.input_shape = None

        self.input_norm_mode = 255
        self.labels_norm_mode = 255
        self.input_mean = None
        self.input_std = None
        self.labels_mean = None
        self.labels_std = None

    def compile(self,
                optimizer=None,
                loss=None,
                metrics=None,
                loss_weights=None,
                sample_weight_mode=None):
        self.keras_model, self.keras_encoder, self.keras_decoder = self.model()

        if optimizer is not None:
            self.optimizer = optimizer
        elif self.optimizer is None or self.optimizer == 'adam':
            self.optimizer = Adam(lr=self.lr,
                                  beta_1=self.beta_1,
                                  beta_2=self.beta_2,
                                  epsilon=self.optimizer_epsilon,
                                  decay=0.0)
        if self.loss is None:
            self.loss = mean_squared_error

        self.keras_model.compile(loss=self.loss, optimizer=self.optimizer)
        return None

    def pre_training_checklist_child(self, input_data, input_recon_target):
        if self.task == 'classification':
            raise RuntimeError(
                'astroNN VAE does not support classification task')
        elif self.task == 'binary_classification':
            raise RuntimeError(
                'astroNN VAE does not support binary classification task')

        self.pre_training_checklist_master(input_data, input_recon_target)

        if isinstance(input_data, H5Loader):
            self.targetname = input_data.target
            input_data, input_recon_target = input_data.load()

        # check if exists (exists mean fine-tuning, so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(input_recon_target)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(input_recon_target,
                                                           calc=False)

        if self.keras_model is None:  # only compiler if there is no keras_model, e.g. fine-tuning does not required
            self.compile()

        self.train_idx, self.val_idx = train_test_split(
            np.arange(self.num_train), test_size=self.val_size)

        self.training_generator = CVAEDataGenerator(self.batch_size).generate(
            norm_data[self.train_idx], norm_labels[self.train_idx])
        self.validation_generator = CVAEDataGenerator(
            self.batch_size).generate(norm_data[self.val_idx],
                                      norm_labels[self.val_idx])

        return input_data, input_recon_target

    def train(self, input_data, input_recon_target):
        # Call the checklist to create astroNN folder and save parameters
        self.pre_training_checklist_child(input_data, input_recon_target)

        reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                      factor=0.5,
                                      min_delta=self.reduce_lr_epsilon,
                                      patience=self.reduce_lr_patience,
                                      min_lr=self.reduce_lr_min,
                                      mode='min',
                                      verbose=2)

        self.virtual_cvslogger = VirutalCSVLogger()

        self.__callbacks = [reduce_lr, self.virtual_cvslogger
                            ]  # default must have unchangeable callbacks

        if self.callbacks is not None:
            if isinstance(self.callbacks, list):
                self.__callbacks.extend(self.callbacks)
            else:
                self.__callbacks.append(self.callbacks)

        start_time = time.time()

        self.keras_model.fit_generator(
            generator=self.training_generator,
            steps_per_epoch=self.num_train // self.batch_size,
            validation_data=self.validation_generator,
            validation_steps=self.val_num // self.batch_size,
            epochs=self.max_epochs,
            verbose=self.verbose,
            workers=os.cpu_count(),
            callbacks=self.__callbacks,
            use_multiprocessing=MULTIPROCESS_FLAG)

        print(
            f'Completed Training, {(time.time() - start_time):.{2}f}s in total'
        )

        if self.autosave is True:
            # Call the post training checklist to save parameters
            self.save()

        return None

    def post_training_checklist_child(self):
        astronn_model = 'model_weights.h5'
        self.keras_model.save(self.fullfilepath + astronn_model)
        print(astronn_model +
              f' saved to {(self.fullfilepath + astronn_model)}')

        self.hyper_txt.write(f"Dropout Rate: {self.dropout_rate} \n")
        self.hyper_txt.flush()
        self.hyper_txt.close()

        data = {
            'id': self.__class__.__name__,
            'pool_length': self.pool_length,
            'filterlen': self.filter_len,
            'filternum': self.num_filters,
            'hidden': self.num_hidden,
            'input': self.input_shape,
            'labels': self.labels_shape,
            'task': self.task,
            'input_mean': self.input_mean.tolist(),
            'labels_mean': self.labels_mean.tolist(),
            'input_std': self.input_std.tolist(),
            'labels_std': self.labels_std.tolist(),
            'valsize': self.val_size,
            'targetname': self.targetname,
            'dropout_rate': self.dropout_rate,
            'l2': self.l2,
            'input_norm_mode': self.input_norm_mode,
            'labels_norm_mode': self.labels_norm_mode,
            'batch_size': self.batch_size,
            'latent': self.latent_dim
        }

        with open(self.fullfilepath + '/astroNN_model_parameter.json',
                  'w') as f:
            json.dump(data, f, indent=4, sort_keys=True)

    def test(self, input_data):
        self.pre_testing_checklist_master()

        input_data = np.atleast_2d(input_data)

        if self.input_normalizer is not None:
            input_array = self.input_normalizer.normalize(input_data,
                                                          calc=False)
        else:
            # Prevent shallow copy issue
            input_array = np.array(input_data)
            input_array -= self.input_mean
            input_array /= self.input_std

        total_test_num = input_data.shape[0]  # Number of testing data

        # for number of training data smaller than batch_size
        if input_data.shape[0] < self.batch_size:
            self.batch_size = input_data.shape[0]

        # Due to the nature of how generator works, no overlapped prediction
        data_gen_shape = (total_test_num // self.batch_size) * self.batch_size
        remainder_shape = total_test_num - data_gen_shape  # Remainder from generator

        predictions = np.zeros((total_test_num, self.labels_shape, 1))

        # Data Generator for prediction
        prediction_generator = CVAEPredDataGenerator(self.batch_size).generate(
            input_array[:data_gen_shape])
        predictions[:data_gen_shape] = np.asarray(
            self.keras_model.predict_generator(prediction_generator,
                                               steps=input_array.shape[0] //
                                               self.batch_size))

        if remainder_shape != 0:
            remainder_data = input_array[data_gen_shape:]
            # assume its caused by mono images, so need to expand dim by 1
            if len(input_array[0].shape) != len(self.input_shape):
                remainder_data = np.expand_dims(remainder_data, axis=-1)
            result = self.keras_model.predict(remainder_data)
            predictions[data_gen_shape:] = result

        if self.labels_normalizer is not None:
            predictions[:, :, 0] = self.labels_normalizer.denormalize(
                predictions[:, :, 0])
        else:
            predictions[:, :, 0] *= self.labels_std
            predictions[:, :, 0] += self.labels_mean

        return predictions

    def test_encoder(self, input_data):
        self.pre_testing_checklist_master()
        # Prevent shallow copy issue
        if self.input_normalizer is not None:
            input_array = self.input_normalizer.normalize(input_data,
                                                          calc=False)
        else:
            # Prevent shallow copy issue
            input_array = np.array(input_data)
            input_array -= self.input_mean
            input_array /= self.input_std

        total_test_num = input_data.shape[0]  # Number of testing data

        # for number of training data smaller than batch_size
        if input_data.shape[0] < self.batch_size:
            self.batch_size = input_data.shape[0]

        # Due to the nature of how generator works, no overlapped prediction
        data_gen_shape = (total_test_num // self.batch_size) * self.batch_size
        remainder_shape = total_test_num - data_gen_shape  # Remainder from generator

        encoding = np.zeros((total_test_num, self.latent_dim))

        # Data Generator for prediction
        prediction_generator = CVAEPredDataGenerator(self.batch_size).generate(
            input_array[:data_gen_shape])
        encoding[:data_gen_shape] = np.asarray(
            self.keras_encoder.predict_generator(prediction_generator,
                                                 steps=input_array.shape[0] //
                                                 self.batch_size))

        if remainder_shape != 0:
            remainder_data = input_array[data_gen_shape:]
            # assume its caused by mono images, so need to expand dim by 1
            if len(input_array[0].shape) != len(self.input_shape):
                remainder_data = np.expand_dims(remainder_data, axis=-1)
            result = self.keras_encoder.predict(remainder_data)
            encoding[data_gen_shape:] = result

        return encoding
Exemplo n.º 12
0
    def fit_on_batch(self,
                     input_data,
                     labels,
                     inputs_err=None,
                     labels_err=None,
                     sample_weights=None):
        """
        Train a Bayesian neural network by running a single gradient update on all of your data, suitable for fine-tuning

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :param inputs_err: Error for input_data (if any), same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :param labels_err: Labels error (if any)
        :type labels_err: Union([NoneType, ndarray])
        :param sample_weights: Sample weights (if any)
        :type sample_weights: Union([NoneType, ndarray])
        :return: None
        :rtype: NoneType
        :History:
            | 2018-Aug-25 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()

        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)

        if labels_err is None:
            labels_err = np.zeros_like(labels)

        input_data = {
            "input": input_data,
            "input_err": inputs_err,
            "labels_err": labels_err
        }
        labels = {"output": labels, "variance_output": labels}

        # check if exists (existing means the model has already been trained (e.g. fine-tuning), so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        # No need to care about Magic number as loss function looks for magic num in y_true only
        norm_data.update({
            "input_err": (input_data['input_err'] / self.input_std['input']),
            "labels_err":
            input_data['labels_err'] / self.labels_std['output']
        })
        norm_labels.update({"variance_output": norm_labels['output']})

        norm_data = self._tensor_dict_sanitize(norm_data,
                                               self.keras_model.input_names)
        norm_labels = self._tensor_dict_sanitize(norm_labels,
                                                 self.keras_model.output_names)

        start_time = time.time()

        fit_generator = BayesianCNNDataGenerator(
            batch_size=input_data['input'].shape[0],
            shuffle=False,
            steps_per_epoch=1,
            data=[norm_data, norm_labels],
            sample_weights=sample_weights)

        score = self.keras_model.fit(fit_generator,
                                     epochs=1,
                                     verbose=self.verbose,
                                     workers=os.cpu_count(),
                                     use_multiprocessing=MULTIPROCESS_FLAG)

        print(
            f'Completed Training on Batch, {(time.time() - start_time):.{2}f}s in total'
        )

        return None
Exemplo n.º 13
0
    def pre_training_checklist_child(self, input_data, labels, sample_weights):
        # on top of checklist, convert input_data/labels to dict
        input_data, labels = self.pre_training_checklist_master(input_data, labels)

        # check if exists (existing means the model has already been trained (e.g. fine-tuning)
        # so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)
            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)
        if self.keras_model is None:  # only compile if there is no keras_model, e.g. fine-tuning does not required
            self.compile()
        
        norm_data = self._tensor_dict_sanitize(norm_data, self.keras_model.input_names)
        norm_labels = self._tensor_dict_sanitize(norm_labels, self.keras_model.output_names)

        self.train_idx, self.val_idx = train_test_split(np.arange(self.num_train + self.val_num),
                                                        test_size=self.val_size)

        norm_data_training = {}
        norm_data_val = {}
        norm_labels_training = {}
        norm_labels_val = {}
        for name in norm_data.keys():
            norm_data_training.update({name: norm_data[name][self.train_idx]})
            norm_data_val.update({name: norm_data[name][self.val_idx]})
        for name in norm_labels.keys():
            norm_labels_training.update({name: norm_labels[name][self.train_idx]})
            norm_labels_val.update({name: norm_labels[name][self.val_idx]})

        if sample_weights is not None:        
            sample_weights_training = sample_weights[self.train_idx]
            sample_weights_val = sample_weights[self.val_idx]
        else:
            sample_weights_training = None
            sample_weights_val = None

        self.training_generator = CNNDataGenerator(
            batch_size=self.batch_size,
            shuffle=True,
            steps_per_epoch=self.num_train // self.batch_size,
            data=[norm_data_training, norm_labels_training],
            manual_reset=False, 
            sample_weights=sample_weights_training)

        val_batchsize = self.batch_size if len(self.val_idx) > self.batch_size else len(self.val_idx)
        self.validation_generator = CNNDataGenerator(
            batch_size=val_batchsize,
            shuffle=False,
            steps_per_epoch=max(self.val_num // self.batch_size, 1),
            data=[norm_data_val, norm_labels_val],
            manual_reset=True, 
            sample_weights=sample_weights_val)

        return input_data, labels
Exemplo n.º 14
0
class CNNBase(NeuralNetMaster, ABC):
    """Top-level class for a convolutional neural network"""

    def __init__(self):
        """
        NAME:
            __init__
        PURPOSE:
            To define astroNN convolutional neural network
        HISTORY:
            2018-Jan-06 - Written - Henry Leung (University of Toronto)
        """
        super().__init__()
        self.name = 'Convolutional Neural Network'
        self._model_type = 'CNN'
        self._model_identifier = None
        self.initializer = None
        self.activation = None
        self._last_layer_activation = None
        self.num_filters = None
        self.filter_len = None
        self.pool_length = None
        self.num_hidden = None
        self.reduce_lr_epsilon = None
        self.reduce_lr_min = None
        self.reduce_lr_patience = None
        self.l1 = None
        self.l2 = None
        self.maxnorm = None
        self.dropout_rate = 0.0
        self.val_size = 0.1
        self.early_stopping_min_delta = 0.0001
        self.early_stopping_patience = 4

        self.input_norm_mode = 1
        self.labels_norm_mode = 2

    def compile(self, optimizer=None,
                loss=None,
                metrics=None,
                weighted_metrics=None,
                loss_weights=None,
                sample_weight_mode=None):

        if optimizer is not None:
            self.optimizer = optimizer
        elif self.optimizer is None or self.optimizer == 'adam':
            self.optimizer = Adam(learning_rate=self.lr, beta_1=self.beta_1, beta_2=self.beta_2, epsilon=self.optimizer_epsilon,
                                  decay=0.0)
        if metrics is not None:
            self.metrics = metrics

        if self.task == 'regression':
            self._last_layer_activation = 'linear'
            loss_func = mean_squared_error if not loss else loss
            self.metrics = [mean_absolute_error, mean_error] if not self.metrics else self.metrics
        elif self.task == 'classification':
            self._last_layer_activation = 'softmax'
            loss_func = categorical_crossentropy if not loss else loss
            self.metrics = [categorical_accuracy] if not self.metrics else self.metrics
        elif self.task == 'binary_classification':
            self._last_layer_activation = 'sigmoid'
            loss_func = binary_crossentropy if not loss else loss
            self.metrics = [binary_accuracy] if not self.metrics else self.metrics
        else:
            raise RuntimeError('Only "regression", "classification" and "binary_classification" are supported')

        self.keras_model = self.model()

        self.keras_model.compile(loss=loss_func,
                                 optimizer=self.optimizer,
                                 metrics=self.metrics,
                                 weighted_metrics=weighted_metrics,
                                 loss_weights=loss_weights,
                                 sample_weight_mode=sample_weight_mode)

        # inject custom training step if needed
        try:
            self.custom_train_step()
        except NotImplementedError:
            pass
        except TypeError:
            self.keras_model.train_step = self.custom_train_step

        return None

    def pre_training_checklist_child(self, input_data, labels, sample_weights):
        # on top of checklist, convert input_data/labels to dict
        input_data, labels = self.pre_training_checklist_master(input_data, labels)

        # check if exists (existing means the model has already been trained (e.g. fine-tuning)
        # so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)
            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)
        if self.keras_model is None:  # only compile if there is no keras_model, e.g. fine-tuning does not required
            self.compile()
        
        norm_data = self._tensor_dict_sanitize(norm_data, self.keras_model.input_names)
        norm_labels = self._tensor_dict_sanitize(norm_labels, self.keras_model.output_names)

        self.train_idx, self.val_idx = train_test_split(np.arange(self.num_train + self.val_num),
                                                        test_size=self.val_size)

        norm_data_training = {}
        norm_data_val = {}
        norm_labels_training = {}
        norm_labels_val = {}
        for name in norm_data.keys():
            norm_data_training.update({name: norm_data[name][self.train_idx]})
            norm_data_val.update({name: norm_data[name][self.val_idx]})
        for name in norm_labels.keys():
            norm_labels_training.update({name: norm_labels[name][self.train_idx]})
            norm_labels_val.update({name: norm_labels[name][self.val_idx]})

        if sample_weights is not None:        
            sample_weights_training = sample_weights[self.train_idx]
            sample_weights_val = sample_weights[self.val_idx]
        else:
            sample_weights_training = None
            sample_weights_val = None

        self.training_generator = CNNDataGenerator(
            batch_size=self.batch_size,
            shuffle=True,
            steps_per_epoch=self.num_train // self.batch_size,
            data=[norm_data_training, norm_labels_training],
            manual_reset=False, 
            sample_weights=sample_weights_training)

        val_batchsize = self.batch_size if len(self.val_idx) > self.batch_size else len(self.val_idx)
        self.validation_generator = CNNDataGenerator(
            batch_size=val_batchsize,
            shuffle=False,
            steps_per_epoch=max(self.val_num // self.batch_size, 1),
            data=[norm_data_val, norm_labels_val],
            manual_reset=True, 
            sample_weights=sample_weights_val)

        return input_data, labels

    def fit(self, input_data, labels, sample_weights=None):
        """
        Train a Convolutional neural network

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :param sample_weights: Sample weights (if any)
        :type sample_weights: Union([NoneType, ndarray])
        :return: None
        :rtype: NoneType
        :History: 2017-Dec-06 - Written - Henry Leung (University of Toronto)
        """
        # Call the checklist to create astroNN folder and save parameters
        self.pre_training_checklist_child(input_data, labels, sample_weights)

        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                                      min_delta=self.reduce_lr_epsilon,
                                      patience=self.reduce_lr_patience, min_lr=self.reduce_lr_min, mode='min',
                                      verbose=2)

        early_stopping = EarlyStopping(monitor='val_loss', min_delta=self.early_stopping_min_delta,
                                       patience=self.early_stopping_patience, verbose=2, mode='min')

        self.virtual_cvslogger = VirutalCSVLogger()

        self.__callbacks = [reduce_lr, self.virtual_cvslogger]  # default must have unchangeable callbacks

        if self.callbacks is not None:
            if isinstance(self.callbacks, list):
                self.__callbacks.extend(self.callbacks)
            else:
                self.__callbacks.append(self.callbacks)

        start_time = time.time()

        self.history = self.keras_model.fit(x=self.training_generator,
                                            validation_data=self.validation_generator,
                                            epochs=self.max_epochs, verbose=self.verbose,
                                            workers=os.cpu_count(),
                                            callbacks=self.__callbacks,
                                            use_multiprocessing=MULTIPROCESS_FLAG)

        print(f'Completed Training, {(time.time() - start_time):.{2}f}s in total')

        if self.autosave is True:
            # Call the post training checklist to save parameters
            self.save()

        return None

    def fit_on_batch(self, input_data, labels, sample_weights=None):
        """
        Train a neural network by running a single gradient update on all of your data, suitable for fine-tuning

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :param sample_weights: Sample weights (if any)
        :type sample_weights: Union([NoneType, ndarray])
        :return: None
        :rtype: NoneType
        :History: 2018-Aug-22 - Written - Henry Leung (University of Toronto)
        """

        input_data, labels = self.pre_training_checklist_master(input_data, labels)

        # check if exists (existing means the model has already been trained (e.g. fine-tuning),
        # so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        start_time = time.time()

        fit_generator = CNNDataGenerator(batch_size=input_data['input'].shape[0],
                                         shuffle=False,
                                         steps_per_epoch=1,
                                         data=[norm_data, norm_labels], 
                                         sample_weights=sample_weights)

        scores = self.keras_model.fit(x=fit_generator,
                                      epochs=1,
                                      verbose=self.verbose,
                                      workers=os.cpu_count(),
                                      use_multiprocessing=MULTIPROCESS_FLAG)

        print(f'Completed Training on Batch, {(time.time() - start_time):.{2}f}s in total')

        return None

    def post_training_checklist_child(self):
        self.keras_model.save(self.fullfilepath + _astroNN_MODEL_NAME)
        print(_astroNN_MODEL_NAME + f' saved to {(self.fullfilepath + _astroNN_MODEL_NAME)}')

        self.hyper_txt.write(f"Dropout Rate: {self.dropout_rate} \n")
        self.hyper_txt.flush()
        self.hyper_txt.close()

        data = {'id': self.__class__.__name__ if self._model_identifier is None else self._model_identifier,
                'pool_length': self.pool_length,
                'filterlen': self.filter_len,
                'filternum': self.num_filters,
                'hidden': self.num_hidden,
                'input': self._input_shape,
                'labels': self._labels_shape,
                'task': self.task,
                'last_layer_activation': self._last_layer_activation,
                'activation': self.activation,
                'input_mean': dict_np_to_dict_list(self.input_mean),
                'labels_mean': dict_np_to_dict_list(self.labels_mean),
                'input_std': dict_np_to_dict_list(self.input_std),
                'labels_std': dict_np_to_dict_list(self.labels_std),
                'valsize': self.val_size,
                'targetname': self.targetname,
                'dropout_rate': self.dropout_rate,
                'l1': self.l1,
                'l2': self.l2,
                'maxnorm': self.maxnorm,
                'input_norm_mode': self.input_normalizer.normalization_mode,
                'labels_norm_mode': self.labels_normalizer.normalization_mode,
                'input_names': self.input_names,
                'output_names': self.output_names,
                'batch_size': self.batch_size}

        with open(self.fullfilepath + '/astroNN_model_parameter.json', 'w') as f:
            json.dump(data, f, indent=4, sort_keys=True)

    def predict(self, input_data):
        """
        Use the neural network to do inference

        :param input_data: Data to be inferred with neural network
        :type input_data: ndarray
        :return: prediction and prediction uncertainty
        :rtype: ndarry
        :History: 2017-Dec-06 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()
        input_data = self.pre_testing_checklist_master(input_data)

        input_array = self.input_normalizer.normalize(input_data, calc=False)
        total_test_num = input_data['input'].shape[0]  # Number of testing data

        # for number of training data smaller than batch_size
        if total_test_num < self.batch_size:
            self.batch_size = total_test_num

        # Due to the nature of how generator works, no overlapped prediction
        data_gen_shape = (total_test_num // self.batch_size) * self.batch_size
        remainder_shape = total_test_num - data_gen_shape  # Remainder from generator

        # TODO: named output????
        predictions = np.zeros((total_test_num, self._labels_shape['output']))

        norm_data_main = {}
        norm_data_remainder = {}
        for name in input_array.keys():
            norm_data_main.update({name: input_array[name][:data_gen_shape]})
            norm_data_remainder.update({name: input_array[name][data_gen_shape:]})
            
        norm_data_main = self._tensor_dict_sanitize(norm_data_main, self.keras_model.input_names)
        norm_data_remainder = self._tensor_dict_sanitize(norm_data_remainder, self.keras_model.input_names)

        # Data Generator for prediction
        with tqdm(total=total_test_num, unit="sample") as pbar:
            prediction_generator = CNNPredDataGenerator(batch_size=self.batch_size,
                                                        shuffle=False,
                                                        steps_per_epoch=total_test_num // self.batch_size,
                                                        data=[norm_data_main],
                                                        pbar=pbar)
            predictions[:data_gen_shape] = np.asarray(self.keras_model.predict(prediction_generator))

            if remainder_shape != 0:
                remainder_generator = CNNPredDataGenerator(batch_size=remainder_shape,
                                                           shuffle=False,
                                                           steps_per_epoch=1,
                                                           data=[norm_data_remainder], 
                                                           pbar=pbar)
                predictions[data_gen_shape:] = np.asarray(self.keras_model.predict(remainder_generator))

        if self.labels_normalizer is not None:
            predictions = self.labels_normalizer.denormalize(list_to_dict(self.keras_model.output_names, predictions))
        else:
            predictions *= self.labels_std
            predictions += self.labels_mean

        return predictions['output']

    def evaluate(self, input_data, labels):
        """
        Evaluate neural network by provided input data and labels and get back a metrics score

        :param input_data: Data to be inferred with neural network
        :type input_data: ndarray
        :param labels: labels
        :type labels: ndarray
        :return: metrics score dictionary
        :rtype: dict
        :History: 2018-May-20 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()
        input_data = list_to_dict(self.keras_model.input_names, input_data)
        labels = list_to_dict(self.keras_model.output_names, labels)

        # check if exists (existing means the model has already been trained (e.g. fine-tuning), so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        norm_data = self._tensor_dict_sanitize(norm_data, self.keras_model.input_names)
        norm_labels = self._tensor_dict_sanitize(norm_labels, self.keras_model.output_names)

        total_num = input_data['input'].shape[0]
        eval_batchsize = self.batch_size if total_num > self.batch_size else total_num
        steps = total_num // self.batch_size if total_num > self.batch_size else 1

        start_time = time.time()
        print("Starting Evaluation")

        evaluate_generator = CNNDataGenerator(batch_size=eval_batchsize,
                                              shuffle=False,
                                              steps_per_epoch=steps,
                                              data=[norm_data, norm_labels])

        scores = self.keras_model.evaluate(evaluate_generator)
        if isinstance(scores, float):  # make sure scores is iterable
            scores = list(str(scores))
        outputname = self.keras_model.output_names
        funcname = self.keras_model.metrics_names

        print(f'Completed Evaluation, {(time.time() - start_time):.{2}f}s elapsed')

        return list_to_dict(funcname, scores)

    @deprecated_copy_signature(fit)
    def train(self, *args, **kwargs):
        return self.fit(*args, **kwargs)

    @deprecated_copy_signature(fit_on_batch)
    def train_on_batch(self, *args, **kwargs):
        return self.fit_on_batch(*args, **kwargs)
    
    @deprecated_copy_signature(predict)
    def test(self, *args, **kwargs):
        return self.predict(*args, **kwargs)