def learn(self, from_data, test_data=None): """ Train and save a model (you can use this to retrain model from data). :param from_data: DataFrame or DataSource The data to learn from :param test_data: DataFrame or DataSource The data to test accuracy and learn_error from """ device, _available_devices = get_devices() log.info(f'Computing device used: {device}') # generate the configuration and set the order for the input and output columns if self._generate_config is True: self._input_columns = [col for col in from_data if col not in self._output_columns] self.config = { 'input_features': [{'name': col, 'type': self._type_map(from_data, col)} for col in self._input_columns], 'output_features': [{'name': col, 'type': self._type_map(from_data, col)} for col in self._output_columns] } self.config = predictor_config_schema.validate(self.config) log.info('Automatically generated a configuration') log.info(self.config) else: self._output_columns = [col['name'] for col in self.config['output_features']] self._input_columns = [col['name'] for col in self.config['input_features']] if isinstance(from_data, pandas.DataFrame): train_ds = DataSource(from_data, self.config) elif isinstance(from_data, DataSource): train_ds = from_data else: raise TypeError(':from_data: must be either DataFrame or DataSource') nr_subsets = 3 if len(train_ds) > 100 else 1 if test_data is None: test_ds = train_ds.subset(0.1) elif isinstance(test_data, pandas.DataFrame): test_ds = train_ds.make_child(test_data) elif isinstance(test_data, DataSource): test_ds = test_data else: raise TypeError(':test_data: must be either DataFrame or DataSource') train_ds.create_subsets(nr_subsets) test_ds.create_subsets(nr_subsets) train_ds.train() test_ds.train() mixer_class = self.config['mixer']['class'] mixer_kwargs = self.config['mixer']['kwargs'] self._mixer = mixer_class(**mixer_kwargs) self._mixer.fit(train_ds=train_ds, test_ds=test_ds) self.train_accuracy = self._mixer.calculate_accuracy(test_ds) return self
def test_fit_and_predict(self): config = { 'input_features': [{ 'name': 'x', 'type': 'numeric' }, { 'name': 'y', 'type': 'numeric' }], 'output_features': [{ 'name': 'z', 'type': 'numeric' }, { 'name': 'z`', 'type': 'categorical' }] } config = predictor_config_schema.validate(config) N = 100 data = { 'x': [i for i in range(N)], 'y': [random.randint(i, i + 20) for i in range(N)] } nums = [data['x'][i] * data['y'][i] for i in range(N)] data['z'] = [i + 0.5 for i in range(N)] data['z`'] = ['low' if i < 50 else 'high' for i in nums] data_frame = pandas.DataFrame(data) train_ds = DataSource(data_frame, config) test_ds = train_ds.subset(0.25) mixer = BoostMixer() mixer.fit(train_ds, test_ds) predictions = mixer.predict(train_ds.make_child(data_frame[['x', 'y']]))