Exemplos de validate em Python, exemplos de lightwood.data_schemas.predictor_config.predictor_config_schema.validate em Python

Exemplo n.º 1

0

Exibir arquivo

    def setUp(self):
        config = {
            'input_features': [{
                'name': 'x1',
                'type': 'numeric',
            }, {
                'name': 'x2',
                'type': 'numeric',
            }],
            'output_features': [{
                'name': 'y',
                'type': 'categorical',
            }],
            'data_source': {
                'cache_transformed_data': True
            }
        }
        config = predictor_config_schema.validate(config)
        n_points = 100
        data = {
            'x1': [i for i in range(n_points)],
            'x2': [random.randint(i, i + 20) for i in range(n_points)]
        }
        nums = [data['x1'][i] * data['x2'][i] for i in range(n_points)]

        data['y'] = ['low' if i < 50 else 'high' for i in nums]

        df = pd.DataFrame(data)

        self.config = config
        self.df = df

Exemplo n.º 2

0

Exibir arquivo

Arquivo: predictor.py Projeto: mindsdb/lightwood

    def __init__(self, config=None, output=None, load_from_path=None):
        """
        :param config: dict
        :param output: list, the columns you want to predict, ludwig will try to generate a config
        :param load_from_path: str, the path to load the predictor from
        """
        if load_from_path is not None:
            with open(load_from_path, 'rb') as pickle_in:
                self_dict = torch.load(pickle_in)
            self.__dict__ = self_dict
            self.convert_to_device()
            return

        if output is None and config is None:
            raise ValueError('You must provide either `output` or `config`')

        if config is not None and output is None:
            try:
                self.config = predictor_config_schema.validate(config)
            except Exception:
                error = traceback.format_exc(1)
                raise ValueError('[BAD DEFINITION] argument has errors: {err}'.format(err=error))

        # this is if we need to automatically generate a configuration variable
        self._generate_config = True if output is not None or self.config is None else False

        self._output_columns = output
        self._input_columns = None
        self.train_accuracy = None

        self._mixer = None

Exemplo n.º 3

0

Exibir arquivo

Arquivo: predictor.py Projeto: mindsdb/lightwood

    def learn(self, from_data, test_data=None):
        """
        Train and save a model (you can use this to retrain model from data).

        :param from_data: DataFrame or DataSource
            The data to learn from

        :param test_data: DataFrame or DataSource
            The data to test accuracy and learn_error from
        """
        device, _available_devices = get_devices()
        log.info(f'Computing device used: {device}')
        # generate the configuration and set the order for the input and output columns
        if self._generate_config is True:
            self._input_columns = [col for col in from_data if col not in self._output_columns]
            self.config = {
                'input_features': [{'name': col, 'type': self._type_map(from_data, col)} for col in self._input_columns],
                'output_features': [{'name': col, 'type': self._type_map(from_data, col)} for col in self._output_columns]
            }
            self.config = predictor_config_schema.validate(self.config)
            log.info('Automatically generated a configuration')
            log.info(self.config)
        else:
            self._output_columns = [col['name'] for col in self.config['output_features']]
            self._input_columns = [col['name'] for col in self.config['input_features']]

        if isinstance(from_data, pandas.DataFrame):
            train_ds = DataSource(from_data, self.config)
        elif isinstance(from_data, DataSource):
            train_ds = from_data
        else:
            raise TypeError(':from_data: must be either DataFrame or DataSource')

        nr_subsets = 3 if len(train_ds) > 100 else 1

        if test_data is None:
            test_ds = train_ds.subset(0.1)
        elif isinstance(test_data, pandas.DataFrame):
            test_ds = train_ds.make_child(test_data)
        elif isinstance(test_data, DataSource):
            test_ds = test_data
        else:
            raise TypeError(':test_data: must be either DataFrame or DataSource')

        train_ds.create_subsets(nr_subsets)
        test_ds.create_subsets(nr_subsets)

        train_ds.train()
        test_ds.train()

        mixer_class = self.config['mixer']['class']
        mixer_kwargs = self.config['mixer']['kwargs']
        self._mixer = mixer_class(**mixer_kwargs)
        self._mixer.fit(train_ds=train_ds, test_ds=test_ds)
        self.train_accuracy = self._mixer.calculate_accuracy(test_ds)

        return self

Exemplo n.º 4

0

Exibir arquivo

    def __init__(self, config=None, output=None, load_from_path=None):
        """
        Start a predictor pass the

        :param config: a predictor definition object (can be a dictionary or a PredictorDefinition object)
        :param output: the columns you want to predict, ludwig will try to generate a config
        :param load_from_path: The path to load the predictor from
        :type config: dictionary
        """

        if load_from_path is not None:
            pickle_in = open(load_from_path, "rb")
            self_dict = dill.load(pickle_in)
            pickle_in.close()
            self.__dict__ = self_dict
            self.convert_to_device()
            return

        if output is None and config is None:
            raise ValueError(
                'You must give one argument to the Predictor constructor')
        try:
            if config is not None and output is None:
                predictor_config_schema.validate(config)
        except:
            error = traceback.format_exc(1)
            raise ValueError(
                '[BAD DEFINITION] argument has errors: {err}'.format(
                    err=error))

        self.config = config

        self._generate_config = True if output is not None or self.config is None else False  # this is if we need to automatically generate a configuration variable

        self._output_columns = output
        self._input_columns = None

        self._mixer = None

        self.train_accuracy = None
        self.overall_certainty = None

Exemplo n.º 5

0

Exibir arquivo

    def __init__(self, config=None, output=None, load_from_path=None):
        """
        Start a predictor pass the

        :param config: a predictor definition object (can be a dictionary or a PredictorDefinition object)
        :param output: the columns you want to predict, ludwig will try to generate a config
        :param load_from_path: The path to load the predictor from
        :type config: dictionary
        """
        try:
            from lightwood.mixers.boost.boost import BoostMixer
            self.has_boosting_mixer = True
        except Exception as e:
            self.has_boosting_mixer = False
            logging.info(
                f'Boosting mixer can\'t be loaded due to error: {e} !')
            print((f'Boosting mixer can\'t be loaded due to error: {e} !'))

        if load_from_path is not None:
            pickle_in = open(load_from_path, "rb")
            self_dict = dill.load(pickle_in)
            pickle_in.close()
            self.__dict__ = self_dict
            self.convert_to_device()
            return

        if output is None and config is None:
            raise ValueError(
                'You must give one argument to the Predictor constructor')
        try:
            if config is not None and output is None:
                self.config = predictor_config_schema.validate(config)
        except:
            error = traceback.format_exc(1)
            raise ValueError(
                '[BAD DEFINITION] argument has errors: {err}'.format(
                    err=error))

        # this is if we need to automatically generate a configuration variable
        self._generate_config = True if output is not None or self.config is None else False

        self._output_columns = output
        self._input_columns = None
        self.train_accuracy = None

        self._mixer = None
        self._helper_mixers = None

Exemplo n.º 6

0

Exibir arquivo

    def test_fit_and_predict(self):
        config = {
            'input_features': [
                {
                    'name': 'x',
                    'type': 'numeric'
                },
                {
                    'name': 'y',
                    'type': 'numeric'
                }
            ],

            'output_features': [
                {
                    'name': 'z',
                    'type': 'numeric'
                },
                {
                    'name': 'z`',
                    'type': 'categorical'
                }
            ]
        }
        config = predictor_config_schema.validate(config)

        N = 100

        data = {'x': [i for i in range(N)], 'y': [random.randint(i, i + 20) for i in range(N)]}
        nums = [data['x'][i] * data['y'][i] for i in range(N)]

        data['z'] = [i + 0.5 for i in range(N)]
        data['z`'] = ['low' if i < 50 else 'high' for i in nums]

        data_frame = pandas.DataFrame(data)
        train_ds = DataSource(data_frame, config)
        train_ds.create_subsets(1)

        mixer = NnMixer(stop_training_after_seconds=50)
        mixer.fit(train_ds, train_ds)

        test_ds = train_ds.make_child(data_frame[['x', 'y']])
        predictions = mixer.predict(test_ds)

Exemplo n.º 7

0

Exibir arquivo

    config = {
        'input_features': [{
            'name': 'x',
            'type': 'numeric'
        }, {
            'name': 'y',
            'type': 'numeric'
        }],
        'output_features': [{
            'name': 'z',
            'type': 'categorical'
        }]
    }

    config = predictor_config_schema.validate(config)
    # For Classification
    data = {
        'x': [i for i in range(10)],
        'y': [random.randint(i, i + 20) for i in range(10)]
    }
    nums = [data['x'][i] * data['y'][i] for i in range(10)]

    data['z'] = ['low' if i < 50 else 'high' for i in nums]

    data_frame = pandas.DataFrame(data)

    # print(data_frame)

    ds = DataSource(data_frame, config)
    ds.prepare_encoders()

Exemplo n.º 8

0

Exibir arquivo

    def learn(self,
              from_data,
              test_data=None,
              callback_on_iter=None,
              eval_every_x_epochs=20,
              stop_training_after_seconds=None,
              stop_model_building_after_seconds=None):
        """
        Train and save a model (you can use this to retrain model from data)

        :param from_data: (Pandas DataFrame) The data to learn from
        :param test_data: (Pandas DataFrame) The data to test accuracy and learn_error from
        :param callback_on_iter: This is function that can be called on every X evaluation cycle
        :param eval_every_x_epochs: This is every how many epochs we want to calculate the test error and accuracy

        :return: None
        """

        # This is a helper function that will help us auto-determine roughly what data types are in each column
        # NOTE: That this assumes the data is clean and will only return types for 'CATEGORICAL', 'NUMERIC' and 'TEXT'
        def type_map(col_name):
            col_pd_type = from_data[col_name].dtype
            col_pd_type = str(col_pd_type)

            if col_pd_type in ['int64', 'float64', 'timedelta']:
                return COLUMN_DATA_TYPES.NUMERIC
            elif col_pd_type in ['bool', 'category']:
                return COLUMN_DATA_TYPES.CATEGORICAL
            else:
                # if the number of uniques is elss than 100 or less,
                # than 10% of the total number of rows then keep it as categorical
                unique = from_data[col_name].nunique()
                if unique < 100 or unique < len(from_data[col_name]) / 10:
                    return COLUMN_DATA_TYPES.CATEGORICAL
                # else assume its text
                return COLUMN_DATA_TYPES.TEXT

        # generate the configuration and set the order for the input and output columns
        if self._generate_config is True:
            self._input_columns = [
                col for col in from_data if col not in self._output_columns
            ]
            self.config = {
                'input_features': [{
                    'name': col,
                    'type': type_map(col)
                } for col in self._input_columns],
                'output_features': [{
                    'name': col,
                    'type': type_map(col)
                } for col in self._output_columns]
            }
            self.config = predictor_config_schema.validate(self.config)
            logging.info('Automatically generated a configuration')
            logging.info(self.config)
        else:
            self._output_columns = [
                col['name'] for col in self.config['output_features']
            ]
            self._input_columns = [
                col['name'] for col in self.config['input_features']
            ]

        if stop_training_after_seconds is None:
            stop_training_after_seconds = round(from_data.shape[0] *
                                                from_data.shape[1] / 5)

        if stop_model_building_after_seconds is None:
            stop_model_building_after_seconds = stop_training_after_seconds * 3

        from_data_ds = DataSource(from_data, self.config)

        if test_data is not None:
            test_data_ds = DataSource(test_data, self.config)
        else:
            test_data_ds = from_data_ds.extractRandomSubset(0.1)

        from_data_ds.training = True

        mixer_class = NnMixer
        mixer_params = {}

        if 'mixer' in self.config:
            if 'class' in self.config['mixer']:
                mixer_class = self.config['mixer']['class']
            if 'attrs' in self.config['mixer']:
                mixer_params = self.config['mixer']['attrs']

        # Initialize data sources
        if len(from_data_ds) > 100:
            nr_subsets = 3
        else:
            # Don't use k-fold cross validation for very small input sizes
            nr_subsets = 1

        from_data_ds.prepare_encoders()
        from_data_ds.create_subsets(nr_subsets)
        try:
            mixer_class({}).fit_data_source(from_data_ds)
        except Exception as e:
            # Not all mixers might require this
            # print(e)
            pass

        input_size = len(from_data_ds[0][0])
        training_data_length = len(from_data_ds)

        test_data_ds.transformer = from_data_ds.transformer
        test_data_ds.encoders = from_data_ds.encoders
        test_data_ds.output_weights = from_data_ds.output_weights
        test_data_ds.create_subsets(nr_subsets)

        if 'optimizer' in self.config:
            optimizer = self.config['optimizer']()

            while True:
                training_time_per_iteration = stop_model_building_after_seconds / optimizer.total_trials

                # Some heuristics...
                if training_time_per_iteration > input_size:
                    if training_time_per_iteration > min(
                        (training_data_length /
                         (4 * input_size)), 16 * input_size):
                        break

                optimizer.total_trials = optimizer.total_trials - 1
                if optimizer.total_trials < 8:
                    optimizer.total_trials = 8
                    break

            training_time_per_iteration = stop_model_building_after_seconds / optimizer.total_trials

            best_parameters = optimizer.evaluate(
                lambda dynamic_parameters: Predictor.evaluate_mixer(
                    self.config,
                    mixer_class,
                    mixer_params,
                    from_data_ds,
                    test_data_ds,
                    dynamic_parameters,
                    max_training_time=training_time_per_iteration,
                    max_epochs=None))

            logging.info('Using hyperparameter set: ', best_parameters)
        else:
            best_parameters = {}

        self._mixer = mixer_class(best_parameters, self.config)

        for param in mixer_params:
            if hasattr(self._mixer, param):
                setattr(self._mixer, param, mixer_params[param])
            else:
                logging.warning(
                    'trying to set mixer param {param} but mixerclass {mixerclass} does not have such parameter'
                    .format(param=param, mixerclass=str(type(self._mixer))))

        def callback_on_iter_w_acc(epoch, training_error, test_error,
                                   delta_mean):
            callback_on_iter(epoch, training_error, test_error, delta_mean,
                             self.calculate_accuracy(test_data_ds))

        self._mixer.fit(
            train_ds=from_data_ds,
            test_ds=test_data_ds,
            callback=callback_on_iter_w_acc,
            stop_training_after_seconds=stop_training_after_seconds,
            eval_every_x_epochs=eval_every_x_epochs)
        self.train_accuracy = self.calculate_accuracy(test_data_ds)

        # Train some alternative mixers
        if CONFIG.HELPER_MIXERS and self.has_boosting_mixer and (
                CONFIG.FORCE_HELPER_MIXERS
                or len(from_data_ds) < 12 * pow(10, 3)):
            try:
                self._helper_mixers = self.train_helper_mixers(
                    from_data_ds, test_data_ds,
                    self._mixer.quantiles[self._mixer.quantiles_pair[0] +
                                          1:self._mixer.quantiles_pair[1] + 1])
            except Exception as e:
                logging.warning(
                    f'Failed to train helper mixers with error: {e}')

        return self

Exemplo n.º 9

0

Exibir arquivo

    def learn(self,
              from_data,
              test_data=None,
              callback_on_iter=None,
              eval_every_x_epochs=20,
              stop_training_after_seconds=None,
              stop_model_building_after_seconds=None):
        """
        Train and save a model (you can use this to retrain model from data)

        :param from_data: (Pandas DataFrame) The data to learn from
        :param test_data: (Pandas DataFrame) The data to test accuracy and learn_error from
        :param callback_on_iter: This is function that can be called on every X evaluation cycle
        :param eval_every_x_epochs: This is every how many epochs we want to calculate the test error and accuracy

        :return: None
        """

        # This is a helper function that will help us auto-determine roughly what data types are in each column
        # NOTE: That this assumes the data is clean and will only return types for 'CATEGORICAL', 'NUMERIC' and 'TEXT'
        def type_map(col_name):
            col_pd_type = from_data[col_name].dtype
            col_pd_type = str(col_pd_type)

            if col_pd_type in ['int64', 'float64', 'timedelta']:
                return COLUMN_DATA_TYPES.NUMERIC
            elif col_pd_type in ['bool', 'category']:
                return COLUMN_DATA_TYPES.CATEGORICAL
            else:
                # if the number of uniques is elss than 100 or less,
                # than 10% of the total number of rows then keep it as categorical
                unique = from_data[col_name].nunique()
                if unique < 100 or unique < len(from_data[col_name]) / 10:
                    return COLUMN_DATA_TYPES.CATEGORICAL
                # else assume its text
                return COLUMN_DATA_TYPES.TEXT

        # generate the configuration and set the order for the input and output columns
        if self._generate_config is True:
            self._input_columns = [
                col for col in from_data if col not in self._output_columns
            ]
            self.config = {
                'input_features': [{
                    'name': col,
                    'type': type_map(col)
                } for col in self._input_columns],
                'output_features': [{
                    'name': col,
                    'type': type_map(col)
                } for col in self._output_columns]
            }
            self.config = predictor_config_schema.validate(self.config)
            logging.info('Automatically generated a configuration')
            logging.info(self.config)
        else:
            self._output_columns = [
                col['name'] for col in self.config['output_features']
            ]
            self._input_columns = [
                col['name'] for col in self.config['input_features']
            ]

        if stop_training_after_seconds is None:
            stop_training_after_seconds = round(from_data.shape[0] *
                                                from_data.shape[1] / 5)

        if stop_model_building_after_seconds is None:
            stop_model_building_after_seconds = stop_training_after_seconds * 3

        from_data_ds = DataSource(from_data, self.config)

        if test_data is not None:
            test_data_ds = DataSource(test_data, self.config)
        else:
            test_data_ds = from_data_ds.extractRandomSubset(0.1)

        from_data_ds.training = True

        mixer_class = NnMixer
        mixer_params = {}

        if 'mixer' in self.config:
            if 'class' in self.config['mixer']:
                mixer_class = self.config['mixer']['class']
            if 'attrs' in self.config['mixer']:
                mixer_params = self.config['mixer']['attrs']

        # Initialize data sources
        nr_subsets = 3
        from_data_ds.prepare_encoders()
        from_data_ds.create_subsets(nr_subsets)
        try:
            mixer_class({}).fit_data_source(from_data_ds)
        except Exception as e:
            # Not all mixers might require this
            # print(e)
            pass

        input_size = len(from_data_ds[0][0])
        training_data_length = len(from_data_ds)

        test_data_ds.transformer = from_data_ds.transformer
        test_data_ds.encoders = from_data_ds.encoders
        test_data_ds.output_weights = from_data_ds.output_weights
        test_data_ds.create_subsets(nr_subsets)

        if 'optimizer' in self.config:
            optimizer = self.config['optimizer']()

            while True:
                training_time_per_iteration = stop_model_building_after_seconds / optimizer.total_trials

                # Some heuristics...
                if training_time_per_iteration > input_size:
                    if training_time_per_iteration > min(
                        (training_data_length /
                         (4 * input_size)), 16 * input_size):
                        break

                optimizer.total_trials = optimizer.total_trials - 1
                if optimizer.total_trials < 8:
                    optimizer.total_trials = 8
                    break

            training_time_per_iteration = stop_model_building_after_seconds / optimizer.total_trials

            best_parameters = optimizer.evaluate(
                lambda dynamic_parameters: Predictor.evaluate_mixer(
                    self.config,
                    mixer_class,
                    mixer_params,
                    from_data_ds,
                    test_data_ds,
                    dynamic_parameters,
                    max_training_time=training_time_per_iteration,
                    max_epochs=None))

            logging.info('Using hyperparameter set: ', best_parameters)
        else:
            best_parameters = {}

        if CONFIG.HELPER_MIXERS and self.has_boosting_mixer and (
                CONFIG.FORCE_HELPER_MIXERS
                or len(from_data_ds) < 12 * pow(10, 3)):
            try:
                self._helper_mixers = self.train_helper_mixers(
                    from_data_ds, test_data_ds)
            except Exception as e:
                logging.warning(
                    f'Failed to train helper mixers with error: {e}')

        mixer = mixer_class(best_parameters, self.config)
        self._mixer = mixer

        for param in mixer_params:
            if hasattr(mixer, param):
                setattr(mixer, param, mixer_params[param])
            else:
                logging.warning(
                    'trying to set mixer param {param} but mixerclass {mixerclass} does not have such parameter'
                    .format(param=param, mixerclass=str(type(mixer))))

        started = time.time()
        log_reasure = time.time()
        first_run = True
        stop_training = False

        for subset_iteration in [1, 2]:
            if stop_training:
                break
            subset_id_arr = [*from_data_ds.subsets.keys()]  # [1]
            for subset_id in subset_id_arr:
                started_subset = time.time()
                if stop_training:
                    break

                #subset_train_ds = from_data_ds #.subsets[subset_id]
                #subset_test_ds = test_data_ds #.subsets[subset_id]

                subset_train_ds = from_data_ds.subsets[subset_id]
                subset_test_ds = test_data_ds.subsets[subset_id]

                lowest_error = None
                last_test_error = None
                last_subset_test_error = None
                test_error_delta_buff = []
                subset_test_error_delta_buff = []
                best_model = None
                best_selfaware_model = None

                #iterate over the iter_fit and see what the epoch and mixer error is
                for epoch, training_error in enumerate(
                        mixer.iter_fit(subset_train_ds,
                                       initialize=first_run,
                                       subset_id=subset_id)):
                    first_run = False

                    # Log this every now and then so that the user knows it's running
                    if (int(time.time()) - log_reasure) > 30:
                        log_reasure = time.time()
                        logging.info(
                            f'Lightwood training, iteration {epoch}, training error {training_error}'
                        )

                    # Prime the model on each subset for a bit
                    if subset_iteration == 1:
                        break

                    # Once the training error is getting smaller, enable dropout to teach the network to predict without certain features
                    if subset_iteration > 1 and training_error < 0.4 and not from_data_ds.enable_dropout:
                        eval_every_x_epochs = max(1,
                                                  int(eval_every_x_epochs / 2))
                        logging.info('Enabled dropout !')
                        from_data_ds.enable_dropout = True
                        lowest_error = None
                        last_test_error = None
                        last_subset_test_error = None
                        test_error_delta_buff = []
                        subset_test_error_delta_buff = []
                        continue

                    # If the selfaware network isn't able to train, go back to the original network
                    if subset_iteration > 1 and (
                            np.isnan(training_error)
                            or np.isinf(training_error) or training_error >
                            pow(10, 5)) and not mixer.stop_selfaware_training:
                        mixer.start_selfaware_training = False
                        mixer.stop_selfaware_training = True
                        lowest_error = None
                        last_test_error = None
                        last_subset_test_error = None
                        test_error_delta_buff = []
                        subset_test_error_delta_buff = []
                        continue

                    # Once we are past the priming/warmup period, start training the selfaware network

                    if subset_iteration > 1 and not mixer.is_selfaware and self.config[
                            'mixer'][
                                'selfaware'] and not mixer.stop_selfaware_training and training_error < 0.35:
                        logging.info('Started selfaware training !')
                        mixer.start_selfaware_training = True
                        lowest_error = None
                        last_test_error = None
                        last_subset_test_error = None
                        test_error_delta_buff = []
                        subset_test_error_delta_buff = []
                        continue

                    if epoch % eval_every_x_epochs == 0:
                        test_error = mixer.error(test_data_ds)
                        subset_test_error = mixer.error(subset_test_ds,
                                                        subset_id=subset_id)
                        logging.info(
                            f'Subtest test error: {subset_test_error} on subset {subset_id}, overall test error: {test_error}'
                        )

                        if lowest_error is None or test_error < lowest_error:
                            lowest_error = test_error
                            if mixer.is_selfaware:
                                best_selfaware_model = mixer.get_model_copy()
                            else:
                                best_model = mixer.get_model_copy()

                        if last_subset_test_error is None:
                            pass
                        else:
                            subset_test_error_delta_buff.append(
                                last_subset_test_error - subset_test_error)

                        last_subset_test_error = subset_test_error

                        if last_test_error is None:
                            pass
                        else:
                            test_error_delta_buff.append(last_test_error -
                                                         test_error)

                        last_test_error = test_error

                        delta_mean = np.mean(test_error_delta_buff[-5:])
                        subset_delta_mean = np.mean(
                            subset_test_error_delta_buff[-5:])

                        if callback_on_iter is not None:
                            callback_on_iter(
                                epoch, training_error, test_error, delta_mean,
                                self.calculate_accuracy(test_data_ds))

                        ## Stop if the model is overfitting
                        #if delta_mean <= 0 and len(test_error_delta_buff) > 4:
                        #    stop_training = True

                        # Stop if we're past the time limit allocated for training
                        if (time.time() -
                                started) > stop_training_after_seconds:
                            stop_training = True

                        # If the trauining subset is overfitting on it's associated testing subset
                        if (subset_delta_mean <= 0
                                and len(subset_test_error_delta_buff) > 4
                            ) or (time.time() - started_subset
                                  ) > stop_training_after_seconds / len(
                                      from_data_ds.subsets.keys()):
                            logging.info(
                                'Finished fitting on {subset_id} of {no_subsets} subset'
                                .format(subset_id=subset_id,
                                        no_subsets=len(
                                            from_data_ds.subsets.keys())))

                            if mixer.is_selfaware:
                                if best_selfaware_model is not None:
                                    mixer.update_model(best_selfaware_model)
                            else:
                                mixer.update_model(best_model)

                            if subset_id == subset_id_arr[-1]:
                                stop_training = True
                            elif not stop_training:
                                break

                        if stop_training:
                            if mixer.is_selfaware:
                                mixer.update_model(best_selfaware_model)
                            else:
                                mixer.update_model(best_model)
                            self._mixer = mixer
                            self.train_accuracy = self.calculate_accuracy(
                                test_data_ds)
                            self.overall_certainty = mixer.overall_certainty()
                            logging.info('Finished training model !')
                            break

        self._mixer.build_confidence_normalization_data(test_data_ds)
        self._mixer.encoders = from_data_ds.encoders
        return self