def test_save_wrong(self, config): X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset, 'Pollutant', 'Uncertainty', size=0.5, normalize=True) gp = SparseGaussianProcesses() gp.train(X_train_set, y_train_set, stats=stats) result, msg = gp.save_model(config) assert not result and isinstance(msg, str)
def test_update_stats(self): sparse_gp = SparseGaussianProcesses() X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset, 'Pollutant', 'Uncertainty', size=0.85, normalize=True) sparse_gp.train(X_train_set, y_train_set, stats=stats) instances = sparse_gp.stats['n_instances_trained'] dataset_stats = sparse_gp.stats['dataset_stats'] assert X_train_set.shape[0] == instances assert stats == dataset_stats X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset, 'Pollutant', 'Uncertainty', size=0.5, normalize=True) sparse_gp.train(X_train_set, y_train_set, stats) assert X_train_set.shape[0] + instances == sparse_gp.stats['n_instances_trained'] assert len(sparse_gp.stats['dataset_stats'].keys()) == len(stats.keys()) == len(dataset_stats.keys()) missing_data = X_train_set.drop(axis=1, columns='Temperature', inplace=False, errors='ignore') with pytest.raises(WrongNumberOfFeatures): sparse_gp.train(missing_data, y_train_set, stats)
def test_eval(self, error_func): X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset, 'Pollutant', 'Uncertainty', size=0.5, normalize=True) gp = SparseGaussianProcesses() gp.train(X_train_set, y_train_set, stats=stats) result, predictions, y_test_set = gp.eval(X_test, y_test, error_func=error_func) predictions_size = len(predictions) assert predictions_size == len(X_test)
def test_load_saved_model(self): global sparse_gp copied_config = copy.copy(ConfigReader.CONFIG) copied_config['loadedModel'] = { 'modelName': 'gp_sparse' } loaded = SparseGaussianProcesses() result, msg = loaded.load_model(copied_config) assert result and msg is None assert sparse_gp.stats == loaded.stats assert sparse_gp.kernel.to_dict() == loaded.kernel.to_dict() assert sparse_gp.model.param_array.tolist() == loaded.model.param_array.tolist()
def test_save(self, config): X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset, 'Pollutant', 'Uncertainty', size=0.5, normalize=True) gp = SparseGaussianProcesses() gp.train(X_train_set, y_train_set, stats=stats) result, msg = gp.save_model(config) global sparse_gp sparse_gp = gp assert result and msg is None
def get_model_by_name(name): """ Get a model from database and reproduce it given the parameters saved :param name: str - name of the model :return: (None, None, str) | (None, dict, str) | (BaseModel, dict, None) - str is error message, dict is model's parameters from DB, BaseModel is the instance of the model, might be ConvolutionalNeuralNetwork, GaussianProcesses, SparseGaussianProcesses up to date... """ model_record, err = DBManager.get_model_by_name(name) if model_record is None: return None, None, err if model_record.type == 'CNN': cnn, err = ConvolutionalNeuralNetwork.new_from_json( model_record.model_params, model_record.extra_params) return cnn, model_record, None elif model_record.type == 'FullGP': full_gp, err = GaussianProcesses.new_from_json( model_record.model_params, model_record.extra_params) return full_gp, model_record, None elif model_record.type == 'SparseGP': sparse_gp, err = SparseGaussianProcesses.new_from_json( model_record.model_params, model_record.extra_params) return sparse_gp, model_record, None return None, model_record, err
def test_train_and_test(self, uncertainty): global dataset data_transformer = MainTransformer(config=ConfigReader.CONFIG) data_transformer.add_transformer(Transformers.WEATHER_TRANSFORMER) data_transformer.add_transformer(Transformers.POLLUTANT_TRANSFORMER) data_transformer.transform() dataset = data_transformer.get_dataset() complete_dataset = dataset.dropna(inplace=False) MainTransformer.periodic_f(complete_dataset) X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset, 'Pollutant', 'Uncertainty', size=0.8, normalize=True) print(np.array(X_train_set)) print(y_train_set) # X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(complete_dataset, # 'Pollutant', # 'Uncertainty', # size=0.8, # normalize=True) # # print(np.array(X_train_set)) # print(X_train_set) # print(y_train_set) gp = SparseGaussianProcesses() gp.train(X_train_set, y_train_set, stats=stats) assert gp.stats['n_instances_trained'] == X_train_set.shape[0] assert gp.stats['dataset_stats'] == stats predictions = gp.predict(X_test, uncertainty=uncertainty) assert len(predictions) == X_test.shape[0] if uncertainty: values_without_uncertainty = list(filter(lambda x: len(x) != 2, predictions)) assert len(values_without_uncertainty) == 0 if not isinstance(uncertainty, bool): assert len(list(filter(lambda x: not isinstance(x, tuple), predictions))) == X_test.shape[0]
def test_retrain(self): X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset, 'Pollutant', 'Uncertainty', size=0.5, normalize=True) gp = SparseGaussianProcesses() gp.train(X_train_set, y_train_set, stats=stats) instances = gp.stats['n_instances_trained'] model_stats = gp.stats['dataset_stats'] assert instances == X_train_set.shape[0] assert model_stats == stats X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset, 'Pollutant', 'Uncertainty', size=1, normalize=True) gp.train(X_train_set, y_train_set, stats) assert instances + X_train_set.shape[0] == gp.stats['n_instances_trained'] assert model_stats != gp.stats['dataset_stats'] != stats
def test_model_to_json_load_from_json(self): global sparse_gp model_params, extra_params = sparse_gp.model_to_json() model_params_dict = json.loads(model_params) extra_params_dict = json.loads(extra_params) assert model_params_dict['data']['kernel'] == sparse_gp.kernel.to_dict() assert model_params_dict['data']['params'] == sparse_gp.model.param_array.tolist() loaded_gp, msg = SparseGaussianProcesses.new_from_json(model_params_dict, extra_params_dict) assert msg is None assert sparse_gp.stats == loaded_gp.stats assert sparse_gp.kernel.to_dict() == loaded_gp.kernel.to_dict() assert sparse_gp.model.param_array.tolist() == loaded_gp.model.param_array.tolist()
def create_model(name, body): """ Function for creating a non-existing model and training it with a given dataset This function should happen in the background to prevent overhead to Flask :param name: unique name of the model :param body: dict with following data: * type - type of model (CNN, FullGP, etc.) * range - dict with start and end fields, each storing datetime in DATE_TIME_FORMAT * locations - list of lists, nested list should have two entries 0 - longitude, 1 - latitude * pollutant - name of the polllutant PM10, PM2.5 * data - dict object with additional data that would be stored as JSONB data, it could have keys such as weather :return: bool: whether model was created """ if body is None: return False, Errors.MISSING_BODY.value print('Getting dataset...') dataset = DatasetsApi.get_dataset(body, use_dataframe=True) print(dataset) if dataset is None: return False, Errors.NO_DATA.value model = None complete_dataset = dataset[dataset['Pollutant'].notnull()] X_train, y_train, _, _, stats = MainTransformer.get_training_and_test_set( complete_dataset, 'Pollutant', 'Uncertainty', size=1, normalize=True) if 'type' not in body: return False, Errors.NO_MODEL_TYPE_GIVEN.value if body['type'] == 'CNN': model = ConvolutionalNeuralNetwork() model.train(X_train, y_train, stats=stats) resource = 'keras' model_params, extra_params = model.model_to_json() result = DBManager.upsert_model(name, body['type'], resource, model_params=model_params, extra_params=extra_params) return True, None elif body['type'] == 'FullGP': model = GaussianProcesses() model.train(X_train, y_train, stats=stats) resource = 'GPy' model_params, extra_params = model.model_to_json() result = DBManager.upsert_model(name, body['type'], resource, model_params=model_params, extra_params=extra_params) return True, None elif body['type'] == 'SparseGP': model = SparseGaussianProcesses() model.train(X_train, y_train, stats=stats) resource = 'GPy' model_params, extra_params = model.model_to_json() result = DBManager.upsert_model(name, body['type'], resource, model_params=model_params, extra_params=extra_params) return True, None return False, Errors.NO_SUCH_MODEL_TYPE.value