Exemplo n.º 1
0
 def get_pollutants():
     """
     Function for getting all pollutants currently in the DB
     :return: list of str
     """
     pollutants = DBManager.get_pollutants()
     return pollutants
Exemplo n.º 2
0
    def get_model_by_name(name):
        """
        Get a model from database and reproduce it given the parameters saved
        :param name: str - name of the model
        :return: (None, None, str) | (None, dict, str) | (BaseModel, dict, None) - str is error message, dict is model's
        parameters from DB, BaseModel is the instance of the model, might be ConvolutionalNeuralNetwork,
        GaussianProcesses, SparseGaussianProcesses up to date...
        """
        model_record, err = DBManager.get_model_by_name(name)
        if model_record is None:
            return None, None, err

        if model_record.type == 'CNN':
            cnn, err = ConvolutionalNeuralNetwork.new_from_json(
                model_record.model_params, model_record.extra_params)
            return cnn, model_record, None
        elif model_record.type == 'FullGP':
            full_gp, err = GaussianProcesses.new_from_json(
                model_record.model_params, model_record.extra_params)
            return full_gp, model_record, None
        elif model_record.type == 'SparseGP':
            sparse_gp, err = SparseGaussianProcesses.new_from_json(
                model_record.model_params, model_record.extra_params)
            return sparse_gp, model_record, None

        return None, model_record, err
Exemplo n.º 3
0
 def get_coordinates():
     """
     Function for getting all coordinate pairs from DB
     :return: list of list of floats
     """
     coordinates = DBManager.get_all_coordinates()
     return coordinates
Exemplo n.º 4
0
    def insert_single_prediction(body):
        """
        Function for inserting a single prediction of pollution for a given date, time and location
        :param body: dict - requires several parameters:
          * type - of ML model (CNN, FullGP, etc.),
          * date_time - date and time of measurement
          * longitude - float
          * latitude - float
          * pollutant - name of the pollutant, e.g. PM10, PM2.5
          * pollution_value - float
          * data - dict with meteorological factors and and their values, e.g. data['Temperature'] = 3.3
        :param predicted: bool - whether the instance is a predicted or measured one
        :return: (True, None) | (False, str) - string instance is the error message
        """

        result, err = DatasetsApi.__are_params_valid(body)

        if not result:
            return result, err

        data = body['data'] if 'data' in body else None

        date_time = datetime.datetime.strptime(body['date_time'],
                                               DatasetsApi.DATE_TIME_FORMAT)

        is_successful, err = DBManager.insert_prediction(
            longitude=body['longitude'],
            latitude=body['latitude'],
            pollutant_name=body['pollutant'],
            predicted=True,
            pollution_value=body['pollution_value'],
            date_time=date_time,
            uncertainty=body['uncertainty'])

        return is_successful, err
Exemplo n.º 5
0
    def get_model_params(name):
        """
        Get given model's parameters that are saved in the DB
        :param name: str - name of the model that is saved in the DB
        :return: (None, str) | (dict, None) - str is error message, dict contains model parameters
        """
        model, err = DBManager.get_model_by_name(name)

        if model is None:
            return None, err

        model_params = json.loads(model.model_params)
        # Sometimes it is possible model to give stringified JSON, in that case make it dict
        if 'architecture' in model_params and isinstance(
                model_params['architecture'], str):
            model_params['architecture'] = json.loads(
                model_params['architecture'])

        # Do the same for weights
        if 'weights' in model_params and isinstance(model_params['weights'],
                                                    str):
            model_params['weights'] = json.loads(model_params['weights'])

        model_data = {
            'name': model.name,
            'type': model.type,
            'model_params': model_params,
            'extra_params': json.loads(model.extra_params)
        }

        return model_data, None
Exemplo n.º 6
0
 def get_all_models():
     """
     Function for getting all models' names and types only from DB
     :return: list | None
     """
     try:
         return DBManager.get_all_models()
     except:
         return None
Exemplo n.º 7
0
    def get_models_by_type(type):
        """
        Get all models that are of given type (CNN, GP, SparseGP)
        :param type: str - 'CNN', 'FullGP', 'SparseGP', other input is invalid
        :return: :return: list | None - for schema data
        """
        if not isinstance(type, str):
            return None, Errors.WRONG_PARAM.value

        models, msg = DBManager.get_models_metadata_by_type(type)
        return models, msg
Exemplo n.º 8
0
    def insert_single_instance(body, predicted=False):
        """
        Function for inserting a single instance without a pollution value
        :param body: dict - requires several parameters:
          * date_time - date and time of measurement
          * longitude - float
          * latitude - float
          * pollutant - name of the pollutant, e.g. PM10, PM2.5
          * pollution_value - float
          * data - dict with meteorological factors and and their values, e.g. data['Temperature'] = 3.3
        :param predicted: bool - whether the instance is a predicted or measured one
        :return: (True, None) | (False, str) - string instance is the error message
        """

        result, err = DatasetsApi.__are_params_valid(body)

        if not result:
            return result, err

        data = body['data'] if 'data' in body else None
        pollutant_name = body['pollutant'] if 'pollutant' in body else None
        pollution_value = body[
            'pollution_value'] if 'pollution_value' in body else None

        date_time = datetime.datetime.strptime(body['date_time'],
                                               DatasetsApi.DATE_TIME_FORMAT)

        if predicted is None:
            predicted = False

        is_successful, err = DBManager.insert_instance(
            longitude=body['longitude'],
            latitude=body['latitude'],
            pollutant_name=pollutant_name,
            predicted=predicted,
            pollution_value=pollution_value,
            data=data,
            date_time=date_time)

        return is_successful, err
Exemplo n.º 9
0
    def insert_dataset(files):
        """
        Function for inserting a whole dataset in the database
        :param files: dict with FileStorage instances, holding datasets' files
        :return: (True, None) | (False, str) - string instance is the error message
        """
        # parameters required for basic data such as which dataset to be improted, what time formats to be used, etc.
        BASE_PARAMS = ['Date', 'Time']

        # parameters required for getting specific columns from given datasets, etc. for Temperature get tempC column
        DATASET_PARAMS = ['weatherFormat', 'pollutantFormat']

        dataset_metadata = json.load(files['metadata'])

        if not isinstance(dataset_metadata, dict):
            return False, Errors.WRONG_INSTANCE.value

        are_params_missing = Helpers.are_params_missing(
            dataset_metadata, BASE_PARAMS + DATASET_PARAMS)

        if are_params_missing:
            return False, Errors.MISSING_PARAM.value

        for x in DATASET_PARAMS:
            if not isinstance(dataset_metadata[x], dict):
                return False, Errors.WRONG_INSTANCE.value

        for key in files:
            dataset_metadata[key + 'Datasets'] = files[key]

        # Combine multiple datasets and get result
        main_transformer = MainTransformer(config=dataset_metadata)
        main_transformer.add_transformer(Transformers.WEATHER_TRANSFORMER)
        main_transformer.add_transformer(Transformers.POLLUTANT_TRANSFORMER)
        main_transformer.transform()
        dataset = main_transformer.get_dataset()

        result, err = DBManager.insert_dataset(dataset, dataset_metadata)
        return result, err
with open('configTwo.json') as file:
    dataset_one = json.load(file)

with open('configOne.json') as file:
    dataset_two = json.load(file)

data_transformer = MainTransformer(config=dataset_one)
data_transformer.add_transformer(Transformers.WEATHER_TRANSFORMER)
data_transformer.add_transformer(Transformers.POLLUTANT_TRANSFORMER)
data_transformer.transform()
dataset_centre = data_transformer.get_dataset()

data_transformer = MainTransformer(config=dataset_two)
data_transformer.add_transformer(Transformers.WEATHER_TRANSFORMER)
data_transformer.add_transformer(Transformers.POLLUTANT_TRANSFORMER)
data_transformer.transform()
dataset_a33 = data_transformer.get_dataset()

length_centre = dataset_centre.shape[0]
length_a33 = dataset_a33.shape[0]

dataset_centre['Longitude'] = -1.463484
dataset_centre['Latitude'] = 50.920265
dataset_a33['Longitude'] = -1.395778
dataset_a33['Latitude'] = 50.908140

print(dataset_centre)

DBManager.insert_dataset(dataset_centre, dataset_one)
DBManager.insert_dataset(dataset_a33, dataset_one)
Exemplo n.º 11
0
    def get_dataset(body, use_dataframe=True):
        """
        Function for getting a dataset from database
        :param body: dict - requires several parameters:
          * type - of ML model (CNN, FullGP, etc.),
          * range - dict with start and end datetime strings in format Day-Month-Year H:M (24H format)
          * locations - list with lists of locations - list of location is a list with longitude and latitude, e.g.
          [longitude, latitude]
          * pollutant - name of the pollutant, e.g. PM10, PM2.5
          * data - dict with additional data such as weather data (data['weather'] is another dict)
        :param use_dataframe: bool - whether the returned dataset is a dataframe or a list
        :return: DataFrame | List | None
        """

        if not isinstance(body, dict):
            return None

        if 'range' not in body or 'locations' not in body or 'pollutant' not in body:
            return None

        if body['range'] is None or body['locations'] is None or body[
                'pollutant'] is None:
            return None

        # if not isinstance('range', dict):
        #     return None
        #
        # if not isinstance(body['locations'], list):
        #     return None
        # else:
        #     result = list(filter(lambda c: not isinstance(c, list) or len(c) != 2, body['locations']))
        #     if len(result) != 0 and len(body['locations']) != 0:
        #         return None

        # Params required for the DBManager, acts as a config of a given dataset
        config_params = {
            "Date": DatasetsApi.DATE_TIME_FORMAT.split(' ')[0],
            "Time": DatasetsApi.DATE_TIME_FORMAT.split(' ')[1],
            "pollutant": {
                "Pollutant": None
            },
            'weather': {}
        }

        start_date = None
        end_date = None
        uncertainty = False

        if 'start' in body['range']:
            start_date = datetime.datetime.strptime(
                body['range']['start'], DatasetsApi.DATE_TIME_FORMAT)

        if 'end' in body['range']:
            end_date = datetime.datetime.strptime(body['range']['end'],
                                                  DatasetsApi.DATE_TIME_FORMAT)

        if 'uncertainty' in body:
            uncertainty = True

        location_coordinates = []
        if isinstance(body['locations'], list):
            location_coordinates = list(
                map(lambda x: (x[0], x[1]), body['locations']))

        if isinstance(body['pollutant'], str):
            config_params['pollutant']['Pollutant'] = body['pollutant']

        if 'data' in body and isinstance(body['data'], dict):
            if 'weather' in body['data'] and isinstance(
                    body['data']['weather'], dict):
                config_params['weather'] = body['data']['weather']

        datasets = []

        for coordinates_pair in location_coordinates:
            dataset, err = DBManager.get_dataset(datetime_from=start_date,
                                                 datetime_to=end_date,
                                                 longitude=coordinates_pair[0],
                                                 latitude=coordinates_pair[1],
                                                 config=config_params,
                                                 use_dataframe=use_dataframe,
                                                 uncertainty=uncertainty)

            dataset_size = len(
                dataset.index) if use_dataframe else len(dataset)

            if err is None and dataset_size != 0:
                datasets.append(dataset)

        if len(datasets) == 0:
            # TODO - IT IS VERY IMPORTANT TO CHANGE ALL CONDITIONS TO CHECK IF df.shape[0] == 0 IN THE API
            return pandas.DataFrame() if use_dataframe else []

        if use_dataframe:
            complete_dataset = pandas.concat(datasets)
            MainTransformer.periodic_f(complete_dataset)
        else:
            complete_dataset = []
            for x in datasets:
                complete_dataset.extend(x)

        return complete_dataset
Exemplo n.º 12
0
    def train_model(model_name, body):
        """
        Function for further training a model provided that the model already exists in the DB
        :param model_name: str - name of the existing model
        :param body: dict - body of the request
        :return: (True, None) | (False, str) | (False, list)
        """
        print('Getting dataset...')
        model, model_record, err = ModelApi.get_model_by_name(model_name)

        if model is None:
            return False, err

        dataset = DatasetsApi.get_dataset(body, use_dataframe=True)
        if dataset is None:
            return False, Errors.NO_DATA.value

        complete_dataset = dataset[dataset['Pollutant'].notnull()]

        if 'n_instances_trained' in model.stats and 'dataset_stats' in model.stats:
            updated_stats, new_stats = MainTransformer.normalize_with_old_stats(
                model.stats['n_instances_trained'],
                model.stats['dataset_stats'], complete_dataset)
            MainTransformer.normalize(complete_dataset,
                                      stats=updated_stats,
                                      inplace=True)
        else:
            return False, []

        stats = new_stats

        X_train, y_train, _, _, _ = MainTransformer.get_training_and_test_set(
            complete_dataset,
            'Pollutant',
            'Uncertainty',
            size=1,
            normalize=False)

        training_dataset_stats = {}
        print('Verifying dataset...')
        if 'dataset_stats' in model.stats:
            training_dataset_stats = model.stats['dataset_stats']
            feature_names = set(training_dataset_stats.keys())
            dataset_features = set(X_train)
            dataset_features.discard('DateTime')

            print('Verifying dataset features')
            if feature_names != dataset_features:
                print('feature names', feature_names, training_dataset_stats,
                      training_dataset_stats.keys())
                print('dataset features', dataset_features)
                if feature_names.intersection(
                        dataset_features) == feature_names:
                    print('Dataset is in the expected shape')
                    print('difference')
                    difference = dataset_features.difference(feature_names)
                    print(difference)
                    MainTransformer.remove_features(X_train, difference)
                else:
                    print(feature_names)
                    print(dataset_features)
                    return False, []
        else:
            return False, []

        print('Starting to train model...')
        model.train(X_train, y_train, stats=stats)
        model_params, extra_params = model.model_to_json()
        result = DBManager.upsert_model(model_name,
                                        model_record.type,
                                        model_record.resource,
                                        model_params=model_params,
                                        extra_params=extra_params)
        print(result)
        return result
Exemplo n.º 13
0
    def create_model(name, body):
        """
        Function for creating a non-existing model and training it with a given dataset
        This function should happen in the background to prevent overhead to Flask
        :param name: unique name of the model
        :param body: dict with following data:
        * type - type of model (CNN, FullGP, etc.)
        * range - dict with start and end fields, each storing datetime in DATE_TIME_FORMAT
        * locations - list of lists, nested list should have two entries 0 - longitude, 1 - latitude
        * pollutant - name of the polllutant PM10, PM2.5
        * data - dict object with additional data that would be stored as JSONB data, it could have keys such as
        weather
        :return: bool: whether model was created
        """

        if body is None:
            return False, Errors.MISSING_BODY.value

        print('Getting dataset...')
        dataset = DatasetsApi.get_dataset(body, use_dataframe=True)
        print(dataset)

        if dataset is None:
            return False, Errors.NO_DATA.value

        model = None
        complete_dataset = dataset[dataset['Pollutant'].notnull()]

        X_train, y_train, _, _, stats = MainTransformer.get_training_and_test_set(
            complete_dataset,
            'Pollutant',
            'Uncertainty',
            size=1,
            normalize=True)

        if 'type' not in body:
            return False, Errors.NO_MODEL_TYPE_GIVEN.value

        if body['type'] == 'CNN':
            model = ConvolutionalNeuralNetwork()
            model.train(X_train, y_train, stats=stats)
            resource = 'keras'
            model_params, extra_params = model.model_to_json()
            result = DBManager.upsert_model(name,
                                            body['type'],
                                            resource,
                                            model_params=model_params,
                                            extra_params=extra_params)
            return True, None
        elif body['type'] == 'FullGP':
            model = GaussianProcesses()
            model.train(X_train, y_train, stats=stats)
            resource = 'GPy'
            model_params, extra_params = model.model_to_json()
            result = DBManager.upsert_model(name,
                                            body['type'],
                                            resource,
                                            model_params=model_params,
                                            extra_params=extra_params)
            return True, None
        elif body['type'] == 'SparseGP':
            model = SparseGaussianProcesses()
            model.train(X_train, y_train, stats=stats)
            resource = 'GPy'
            model_params, extra_params = model.model_to_json()
            result = DBManager.upsert_model(name,
                                            body['type'],
                                            resource,
                                            model_params=model_params,
                                            extra_params=extra_params)
            return True, None

        return False, Errors.NO_SUCH_MODEL_TYPE.value