Пример #1
0
    def learn(self, from_data, test_data=None):
        """
        Train and save a model (you can use this to retrain model from data).

        :param from_data: DataFrame or DataSource
            The data to learn from

        :param test_data: DataFrame or DataSource
            The data to test accuracy and learn_error from
        """
        device, _available_devices = get_devices()
        log.info(f'Computing device used: {device}')
        # generate the configuration and set the order for the input and output columns
        if self._generate_config is True:
            self._input_columns = [col for col in from_data if col not in self._output_columns]
            self.config = {
                'input_features': [{'name': col, 'type': self._type_map(from_data, col)} for col in self._input_columns],
                'output_features': [{'name': col, 'type': self._type_map(from_data, col)} for col in self._output_columns]
            }
            self.config = predictor_config_schema.validate(self.config)
            log.info('Automatically generated a configuration')
            log.info(self.config)
        else:
            self._output_columns = [col['name'] for col in self.config['output_features']]
            self._input_columns = [col['name'] for col in self.config['input_features']]

        if isinstance(from_data, pandas.DataFrame):
            train_ds = DataSource(from_data, self.config)
        elif isinstance(from_data, DataSource):
            train_ds = from_data
        else:
            raise TypeError(':from_data: must be either DataFrame or DataSource')

        nr_subsets = 3 if len(train_ds) > 100 else 1

        if test_data is None:
            test_ds = train_ds.subset(0.1)
        elif isinstance(test_data, pandas.DataFrame):
            test_ds = train_ds.make_child(test_data)
        elif isinstance(test_data, DataSource):
            test_ds = test_data
        else:
            raise TypeError(':test_data: must be either DataFrame or DataSource')

        train_ds.create_subsets(nr_subsets)
        test_ds.create_subsets(nr_subsets)

        train_ds.train()
        test_ds.train()

        mixer_class = self.config['mixer']['class']
        mixer_kwargs = self.config['mixer']['kwargs']
        self._mixer = mixer_class(**mixer_kwargs)
        self._mixer.fit(train_ds=train_ds, test_ds=test_ds)
        self.train_accuracy = self._mixer.calculate_accuracy(test_ds)

        return self
Пример #2
0
    def test_fit_and_predict(self):
        config = {
            'input_features': [{
                'name': 'x',
                'type': 'numeric'
            }, {
                'name': 'y',
                'type': 'numeric'
            }],
            'output_features': [{
                'name': 'z',
                'type': 'numeric'
            }, {
                'name': 'z`',
                'type': 'categorical'
            }]
        }
        config = predictor_config_schema.validate(config)

        N = 100

        data = {
            'x': [i for i in range(N)],
            'y': [random.randint(i, i + 20) for i in range(N)]
        }
        nums = [data['x'][i] * data['y'][i] for i in range(N)]

        data['z'] = [i + 0.5 for i in range(N)]
        data['z`'] = ['low' if i < 50 else 'high' for i in nums]

        data_frame = pandas.DataFrame(data)
        train_ds = DataSource(data_frame, config)
        test_ds = train_ds.subset(0.25)

        mixer = BoostMixer()
        mixer.fit(train_ds, test_ds)

        predictions = mixer.predict(train_ds.make_child(data_frame[['x',
                                                                    'y']]))