Пример #1
0
    def post_device_data(self, df, sensor_id, clean_na = 'drop'):
        '''
            POST data in the SmartCitizen API
            Parameters
            ----------
                df: pandas DataFrame
                    Contains data in a DataFrame format. 
                    Data is posted regardless the name of the dataframe
                    It uses the sensor id provided, not the name
                    Data is posted in UTC TZ so dataframe needs to have located 
                    timestamp
                sensor_id: int
                    The sensor id
                clean_na: string, optional
                    'drop'
                    'drop', 'fill'
            Returns
            -------
                True if the data was posted succesfully
        '''
        if 'SC_ADMIN_BEARER' not in environ:
            std_out('Cannot post without Auth Bearer', 'ERROR')
            return

        headers = {'Authorization':'Bearer ' + environ['SC_ADMIN_BEARER'], 'Content-type': 'application/json'}

        # Get sensor name
        sensor_name = list(df.columns)[0]
        # Clean df of nans
        df = clean(df, clean_na, how = 'all')

        # Process dataframe
        df['id'] = sensor_id
        df.index.name = 'recorded_at'
        df.rename(columns = {sensor_name: 'value'}, inplace = True)
        df.columns = MultiIndex.from_product([['sensors'], df.columns])
        j = (df.groupby('recorded_at', as_index = True)
                .apply(lambda x: x['sensors'][['value', 'id']].to_dict('r'))
        )

        # Prepare json post
        payload = {"data":[]}
        for item in j.index:
            payload["data"].append(
                {
                    "recorded_at": localise_date(item, 'UTC').strftime('%Y-%m-%dT%H:%M:%SZ'),
                    "sensors": j[item]
                }
            )

        payload_json = dumps(payload)

        response = post(f'https://api.smartcitizen.me/v0/devices/{self.id}/readings', data = payload_json, headers = headers)
        if response.status_code == 200 or response.status_code == 201:
            return True

        return False
Пример #2
0
    def get_device_data(self, start_date = None, end_date = None, frequency = '3Min', clean_na = None):

        if start_date is not None: days_ago = (to_datetime(date.today())-to_datetime(start_date)).days
        else: days_ago = 365 # One year of data

        std_out(f'Requesting data from MUV API')
        std_out(f'Device ID: {self.id}')
        self.get_device_location()
        self.get_device_sensors()        
        
        # Get devices
        try:
            if days_ago == -1: url = f'{self.API_BASE_URL}getSensorData?sensor_id={self.id}'            
            else: url = f'{self.API_BASE_URL}getSensorData?sensor_id={self.id}&days={days_ago}'
            df = DataFrame(get(url).json())
        except:
            print_exc()
            std_out('Failed sensor request request. Probably no connection', 'ERROR')
            pass
            return None

        try:
            # Rename columns
            df.rename(columns = self.sensors, inplace = True)
            df = df.set_index('time')

            df.index = localise_date(df.index, self.location)
            df = df[~df.index.duplicated(keep='first')]
            # Drop unnecessary columns
            df.drop([i for i in df.columns if 'Unnamed' in i], axis=1, inplace=True)
            df.drop('id', axis=1, inplace=True)
            # Check for weird things in the data
            df = df.apply(to_numeric, errors='coerce')
            # # Resample
            df = df.resample(frequency).mean()
            df = df.reindex(df.index.rename('Time'))

            df = clean(df, clean_na, how = 'all')
                
            self.data = df
                
        except:
            print_exc()
            std_out('Problem closing up the API dataframe', 'ERROR')
            pass
            return None

        std_out(f'Device {self.id} loaded successfully from API', 'SUCCESS')
        return self.data
Пример #3
0
    def update_latest_postprocessing(self):
        # Sets latest postprocessing to latest reading

        if self.source == 'api':
            if self.api_device.get_device_postprocessing() is not None:
                std_out('Updating postprocessing')
                # Add latest postprocessing rounded up with
                # frequency so that we don't end up in
                # and endless loop processing only the latest data line
                # (minute vs. second precission of the readings)
                self.latest_postprocessing = localise_date(self.readings.index[-1]+\
                    to_timedelta(self.options['frequency']), 'UTC').strftime('%Y-%m-%dT%H:%M:%S')
                self.api_device.postprocessing[
                    'latest_postprocessing'] = self.latest_postprocessing
                std_out(
                    f"Updated latest_postprocessing to: {self.api_device.postprocessing['latest_postprocessing']}"
                )

                return True

        return False
Пример #4
0
def alphasense_803_04(dataframe, **kwargs):
    """
    Calculates pollutant concentration based on 4 electrode sensor readings (mV)
    and calibration ID. It adds a configurable background concentration and correction
    based on AAN803-04
    Parameters
    ----------
        from_date: string, datetime object
            Date from which this calibration id is valid from
        to_date: string, datetime object
            Date until which this calibration id is valid to. None if current
        id: string
            Alphasense sensor ID (must be in calibrations.yaml)
        we: string
            Name of working electrode found in dataframe (V)
        ae: string
            Name of auxiliary electrode found in dataframe (V)
        t: string
            Name of reference temperature
        use_alternative: boolean
            Default false
            Use alternative algorithm as shown in the AAN
        location: string
            Valid location for date localisation
    Returns
    -------
        calculation of pollutant in ppb
    """
    def alg_1(x, cal_data):
        return x['we_t'] - x['n_t'] * x['ae_t']

    def alg_2(x, cal_data):
        return x['we_t'] - x['k_t'] * (
            cal_data['we_sensor_zero_mv'] /
            cal_data['ae_sensor_zero_mv']) * x['ae_t']

    def alg_3(x, cal_data):
        return x['we_t'] - (
            cal_data['we_sensor_zero_mv'] -
            cal_data['ae_sensor_zero_mv']) / 1000.0 - x['kp_t'] * x['ae_t']

    def alg_4(x, cal_data):
        return x['we_t'] - cal_data['we_sensor_zero_mv'] / 1000.0 - x['kpp_t']

    def comp_t(x, comp_lut):
        # Below min temperature, we saturate
        if x['t'] < config._as_t_comp[0]: return comp_lut[0]

        # Over max temperature, we saturate
        if x['t'] > config._as_t_comp[-1]: return comp_lut[-1]

        # Otherwise, we calculate
        idx_2 = next(x[0] for x in enumerate(config._as_t_comp)
                     if x[1] > x['t'])
        idx_1 = idx_2 - 1

        delta_y = comp_lut[idx_2] - comp_lut[idx_1]
        delta_x = config._as_t_comp[idx_2] - config._as_t_comp[idx_1]

        return comp_lut[idx_1] + (x['t'] -
                                  config._as_t_comp[idx_1]) * delta_y / delta_x

    def reverse_no2(x, cal_data):
        return x['NO2'] * cal_data['we_cross_sensitivity_no2_mv_ppb'] / 1000.0

    # Check inputs
    flag_error = False
    if 'we' not in kwargs: flag_error = True
    if 'ae' not in kwargs: flag_error = True
    if 'id' not in kwargs: flag_error = True
    if 't' not in kwargs: flag_error = True

    if flag_error:
        std_out('Problem with input data', 'ERROR')
        return None

    # Get Sensor data
    if kwargs['id'] not in config.calibrations:
        std_out(f"Sensor {kwargs['id']} not in calibration data", 'ERROR')
        return None

    # Process input dates
    if 'from_date' not in kwargs: from_date = None
    else:
        if 'location' not in kwargs:
            std_out('Cannot localise date without location')
            return None
        from_date = localise_date(kwargs['from_date'], kwargs['location'])

    if 'to_date' not in kwargs: to_date = None
    else:
        if 'location' not in kwargs:
            std_out('Cannot localise date without location')
            return None
        to_date = localise_date(kwargs['to_date'], kwargs['location'])

    # Make copy
    df = dataframe.copy()
    # Trim data
    if from_date is not None: df = df[df.index > from_date]
    if to_date is not None: df = df[df.index < to_date]

    # Get sensor type
    as_type = config._as_sensor_codes[sensor_id[0:3]]

    # Use alternative method or not
    if 'use_alternative' not in kwargs: kwargs['use_alternative'] = False
    if use_alternative: algorithm_idx = 1
    else: algorithm_idx = 0

    # Get algorithm name
    algorithm = list(config._as_sensor_algs[as_type].keys())[algorithm_idx]
    comp_type = config._as_sensor_algs[as_type][algorithm][0]
    comp_lut = config._as_sensor_algs[as_type][algorithm][1]

    # Retrieve calibration data - verify its all float
    cal_data = config.calibrations[kwargs['id']]
    for item in cal_data:
        cal_data[item] = float(cal_data[item])

    # Compensate electronic zero
    df['we_t'] = df[kwargs['we']] - (cal_data['we_electronic_zero_mv'] / 1000
                                     )  # in V
    df['ae_t'] = df[kwargs['ae']] - (cal_data['ae_electronic_zero_mv'] / 1000
                                     )  # in V
    # Get requested temperature
    df['t'] = df[kwargs['t']]

    # Temperature compensation
    df[comp_type] = df.apply(lambda x: comp_t(x, comp_lut),
                             axis=1)  # temperature correction factor

    # Algorithm selection
    if algorithm == 1:
        df['we_c'] = df.apply(lambda x: alg_1(x, cal_data), axis=1)  # in V
    elif algorithm == 2:
        df['we_c'] = df.apply(lambda x: alg_2(x, cal_data), axis=1)  # in V
    elif algorithm == 3:
        df['we_c'] = df.apply(lambda x: alg_3(x, cal_data), axis=1)  # in V
    elif algorithm == 4:
        df['we_c'] = df.apply(lambda x: alg_4(x, cal_data), axis=1)  # in V

    # Verify if it has NO2 cross-sensitivity
    if cal_data['we_cross_sensitivity_no2_mv_ppb'] != float(0):
        df['we_no2_eq'] = df.apply(lambda x: reverse_no2(x, cal_data),
                                   axis=1)  # in V
        df['we_c'] -= df['we_no2_eq']  # in V

    # Calculate sensor concentration
    df['conc'] = df['we_c'] / (cal_data['we_sensitivity_mv_ppb'] / 1000.0
                               )  # in ppb

    return df['conc']
Пример #5
0
    def load(self,
             options=None,
             path='',
             convert_units=True,
             only_unprocessed=False,
             max_amount=None,
             follow_defaults=False):
        '''
        Loads the device with some options

        Parameters:
        -----------
        options: dict()
            Default: None
            options['min_date'] = date to load data from
                Default to device min_date (from blueprint or test)
            options['max_date'] = date to load data to
                Default to device max_date (from blueprint or test)
            options['clean_na'] = clean na (drop_na, fill_na or None)
                Default to device clean_na (from blueprint or test)
            options['frequency'] = frequency to load data at in pandas format
                Default to device frequency (from blueprint or test) or '1Min'
        path: String
            Default: ''
            Path were the csv file is, if any. Normally not needed to be provided, only for internal usage
        convert_units: bool
            Default: True
            Convert units for channels based on config._channel_lut
        only_unprocessed: bool
            Default: False
            Loads only unprocessed data
        max_amount: int
            Default: None
            Trim dataframe to this amount for processing and forwarding purposes
        follow_defaults: bool
            Default: False
            Use defaults from config._csv_defaults for loading
        Returns
        ----------
            True if loaded correctly
        '''

        # Add test overrides if we have them, otherwise set device defaults
        if options is not None: self.check_overrides(options)
        else: self.check_overrides()

        try:
            if self.source == 'csv':
                if follow_defaults:
                    index_name = config._csv_defaults['index_name']
                    sep = config._csv_defaults['sep']
                    skiprows = config._csv_defaults['skiprows']
                else:
                    index_name = self.sources[self.source]['index']
                    sep = self.sources[self.source]['sep']
                    skiprows = self.sources[self.source]['header_skip']

                # here we don't use tzaware because we only load preprocessed data
                self.readings = self.readings.combine_first(
                    read_csv_file(file_path=join(path,
                                                 self.processed_data_file),
                                  timezone=self.timezone,
                                  frequency=self.options['frequency'],
                                  clean_na=self.options['clean_na'],
                                  index_name=index_name,
                                  sep=sep,
                                  skiprows=skiprows))
                if self.readings is not None:
                    self.__convert_names__()

            elif 'api' in self.source:

                # Get device location
                # Location data should be standard for each new device
                self.api_device.get_device_lat_long()
                self.api_device.get_device_alt()

                self.location = {
                    'longitude': self.api_device.long,
                    'latitude': self.api_device.lat,
                    'altitude': self.api_device.alt
                }

                self.timezone = self.api_device.get_device_timezone()

                if path == '':
                    # Not chached case
                    if only_unprocessed:

                        # Override dates for post-processing
                        if self.latest_postprocessing is not None:
                            hw_latest_postprocess = localise_date(
                                self.latest_postprocessing,
                                'UTC').strftime('%Y-%m-%dT%H:%M:%S')
                            # Override min loading date
                            self.options['min_date'] = hw_latest_postprocess

                    df = self.api_device.get_device_data(
                        self.options['min_date'], self.options['max_date'],
                        self.options['frequency'], self.options['clean_na'])

                    # API Device is not aware of other csv index data, so make it here
                    if 'csv' in self.sources and df is not None:
                        df = df.reindex(
                            df.index.rename(self.sources['csv']['index']))

                    # Combine it with readings if possible
                    if df is not None:
                        self.readings = self.readings.combine_first(df)

                else:
                    # Cached case
                    self.readings = self.readings.combine_first(
                        read_csv_file(join(path,
                                           str(self.id) + '.csv'),
                                      self.timezone, self.options['frequency'],
                                      self.options['clean_na'],
                                      self.sources['csv']['index']))

        except FileNotFoundError:
            # Handle error
            if 'api' in self.source:
                std_out(
                    f'No cached data file found for device {self.id} in {path}. Moving on',
                    'WARNING')
            elif 'csv' in self.source:
                std_out(f'File not found for device {self.id} in {path}',
                        'ERROR')

            self.loaded = False
        except:
            print_exc()
            self.loaded = False
        else:
            if self.readings is not None:
                self.__check_sensors__()
                if not self.readings.empty:
                    if max_amount is not None:
                        std_out(f'Trimming dataframe to {max_amount} rows')
                        self.readings = self.readings.dropna(
                            axis=0, how='all').head(max_amount)
                    # Only add metrics if there is something that can be potentially processed
                    self.__fill_metrics__()
                    self.loaded = True
                    if convert_units: self.__convert_units__()
                else:
                    std_out('Empty dataframe in readings', 'WARNING')
        finally:
            self.processed = False
            return self.loaded
Пример #6
0
def read_csv_file(file_path,
                  timezone,
                  frequency,
                  clean_na=None,
                  index_name='',
                  skiprows=None,
                  sep=',',
                  encoding='utf-8',
                  tzaware=True):
    """
    Reads a csv file and adds cleaning, localisation and resampling and puts it into a pandas dataframe
    Parameters
    ----------
        file_path: String
            File path for csv file
        timezone: String
            Time zone for the csv file
        clean_na: String or None
            None
            Whether to perform clean_na or not. Either None, 'fill' or 'drop'
        index_name: String
            ''
            Name of the column to set an index in the dataframe
        skiprows: list or None
            None
            List of rows to skip (same as skiprows in pandas.read_csv)
        sep: String
            ','
            Separator (same as sep in pandas.read_csv)
        encoding: String
            'utf-8'
            Encoding of the csv file
    Returns
    -------
        Pandas dataframe
    """

    # Read pandas dataframe

    df = read_csv(file_path,
                  verbose=False,
                  skiprows=skiprows,
                  sep=sep,
                  encoding=encoding,
                  encoding_errors='ignore')

    flag_found = False
    if type(index_name) == str:
        # Single joint index
        for column in df.columns:
            if index_name in column:
                df = df.set_index(column)
                flag_found = True
                break
    elif type(index_name) == list:
        # Composite index (for instance, DATE and TIME in different columns)
        for iname in index_name:
            if iname not in df.columns:
                std_out(f'{iname} not found in columns', 'ERROR')
                return None
        joint_index_name = '_'.join(index_name)
        df[joint_index_name] = df[index_name].agg(' '.join, axis=1)
        df = df.set_index(joint_index_name)
        df.drop(index_name, axis=1, inplace=True)
        flag_found = True

    if not flag_found:
        std_out('Index not found. Cannot reindex', 'ERROR')
        return None

    # Set index
    df.index = localise_date(df.index, timezone, tzaware=tzaware)
    # Remove duplicates
    df = df[~df.index.duplicated(keep='first')]

    # Sort index
    df.sort_index(inplace=True)

    # Drop unnecessary columns
    df.drop([i for i in df.columns if 'Unnamed' in i], axis=1, inplace=True)

    # Check for weird things in the data
    # df = df.apply(to_numeric, errors='coerce')
    df = df.astype(float, errors='ignore')

    # Resample
    df = df.resample(frequency).mean()

    # Remove na
    df = clean(df, clean_na, how='all')

    return df
    def get_device_data(self,
                        start_date=None,
                        end_date=None,
                        frequency='1Min',
                        clean_na=None):

        std_out(f'Requesting data from SC API')
        std_out(f'Device ID: {self.id}')

        rollup = self.convert_rollup(frequency)
        std_out(f'Using rollup: {rollup}')

        # Make sure we have the everything we need beforehand
        self.get_device_sensors()
        self.get_device_location()
        self.get_device_last_reading()
        self.get_device_added_at()
        self.get_kit_ID()

        if self.location is None: return None

        # Check start date
        # if start_date is None and self.added_at is not None:
        #     start_date = localise_date(to_datetime(self.added_at, format = '%Y-%m-%dT%H:%M:%SZ'), self.location)
        #     # to_datetime(self.added_at, format = '%Y-%m-%dT%H:%M:%SZ')
        # elif start_date is not None:
        #     start_date = to_datetime(start_date, format = '%Y-%m-%dT%H:%M:%SZ')
        if start_date is not None:
            start_date = localise_date(
                to_datetime(start_date, format='%Y-%m-%dT%H:%M:%SZ'),
                self.location)

            # if start_date.tzinfo is None: start_date = start_date.tz_localize('UTC').tz_convert(self.location)
            std_out(f'Min Date: {start_date}')

        # # Check end date
        # if end_date is None and self.last_reading_at is not None:
        #     # end_date = to_datetime(self.last_reading_at, format = '%Y-%m-%dT%H:%M:%SZ')
        #     end_date = localise_date(to_datetime(self.last_reading_at, format = '%Y-%m-%dT%H:%M:%SZ'), self.location)
        # elif end_date is not None:
        #     end_date = to_datetime(end_date, format = '%Y-%m-%dT%H:%M:%SZ')
        if end_date is not None:
            end_date = localise_date(
                to_datetime(end_date, format='%Y-%m-%dT%H:%M:%SZ'),
                self.location)

            # if end_date.tzinfo is None: end_date = end_date.tz_localize('UTC').tz_convert(self.location)

            std_out(f'Max Date: {end_date}')

        # if start_date > end_date: std_out('Ignoring device dates. Probably SD card device', 'WARNING')

        # Print stuff
        std_out('Kit ID: {}'.format(self.kit_id))
        # if start_date < end_date: std_out(f'Dates: from: {start_date}, to: {end_date}')
        std_out(f'Device timezone: {self.location}')
        if not self.sensors.keys():
            std_out(f'Device is empty')
            return None
        else:
            std_out(f'Sensor IDs: {list(self.sensors.keys())}')

        df = DataFrame()

        # Get devices in the sensor first
        for sensor_id in self.sensors.keys():

            # Request sensor per ID
            request = self.API_BASE_URL + '{}/readings?'.format(self.id)

            if start_date is None:
                request += 'from=2001-01-01'
            elif end_date is not None:
                if start_date > end_date: request += 'from=2001-01-01'
                else:
                    request += f'from={start_date}'
                    request += f'&to={end_date}'

            request += f'&rollup={rollup}'
            request += f'&sensor_id={sensor_id}'
            request += '&function=avg'
            # if end_date is not None:
            #     if end_date > start_date: request += f'&to={end_date}'

            # Make request
            sensor_req = get(request)
            flag_error = False
            try:
                sensorjson = sensor_req.json()
            except:
                print_exc()
                std_out('Problem with json data from API', 'ERROR')
                flag_error = True
                pass
                continue

            if 'readings' not in sensorjson.keys():
                std_out(f'No readings key in request for sensor: {sensor_id}',
                        'ERROR')
                flag_error = True
                continue

            elif sensorjson['readings'] == []:
                std_out(f'No data in request for sensor: {sensor_id}',
                        'WARNING')
                flag_error = True
                continue

            if flag_error: continue

            # Put
            try:
                dfsensor = DataFrame(sensorjson['readings']).set_index(0)
                dfsensor.columns = [self.sensors[sensor_id]]
                # dfsensor.index = to_datetime(dfsensor.index).tz_localize('UTC').tz_convert(self.location)
                dfsensor.index = localise_date(dfsensor.index, self.location)
                dfsensor.sort_index(inplace=True)
                dfsensor = dfsensor[~dfsensor.index.duplicated(keep='first')]

                # Drop unnecessary columns
                dfsensor.drop([i for i in dfsensor.columns if 'Unnamed' in i],
                              axis=1,
                              inplace=True)
                # Check for weird things in the data
                dfsensor = dfsensor.apply(to_numeric, errors='coerce')
                # Resample
                dfsensor = dfsensor.resample(frequency).mean()

                df = df.combine_first(dfsensor)
            except:
                print_exc()
                std_out('Problem with sensor data from API', 'ERROR')
                flag_error = True
                pass
                continue

            try:
                df = df.reindex(df.index.rename('Time'))

                df = clean(df, clean_na, how='all')
                # if clean_na is not None:
                #     if clean_na == 'drop':
                #         # std_out('Cleaning na with drop')
                #         df.dropna(axis = 0, how='all', inplace=True)
                #     elif clean_na == 'fill':
                #         df = df.fillna(method='bfill').fillna(method='ffill')
                #         # std_out('Cleaning na with fill')
                self.data = df

            except:
                std_out('Problem closing up the API dataframe', 'ERROR')
                pass
                return None

        if flag_error == False:
            std_out(f'Device {self.id} loaded successfully from API',
                    'SUCCESS')
        return self.data
Пример #8
0
def alphasense_803_04(dataframe, **kwargs):
    """
    Calculates pollutant concentration based on 4 electrode sensor readings (mV)
    and calibration ID. It adds a configurable background concentration and correction
    based on AAN803-04
    Parameters
    ----------
        from_date: string, datetime object
            Date from which this calibration id is valid from
        to_date: string, datetime object
            Date until which this calibration id is valid to. None if current
        alphasense_id: string
            Alphasense sensor ID (must be in calibrations.json)
        we: string
            Name of working electrode found in dataframe (V)
        ae: string
            Name of auxiliary electrode found in dataframe (V)
        t: string
            Name of reference temperature
        use_alternative: boolean
            Default false
            Use alternative algorithm as shown in the AAN
        timezone: string
            Valid timezone for date localisation
    Returns
    -------
        calculation of pollutant in ppb
    """
    def comp_t(x, comp_lut):
        if isnull(x['t']): return None

        # Below min temperature, we saturate
        if x['t'] < as_t_comp[0]: return comp_lut[0]

        # Over max temperature, we saturate
        if x['t'] > as_t_comp[-1]: return comp_lut[-1]

        # Otherwise, we calculate
        idx_2 = next(axis[0] for axis in enumerate(as_t_comp)
                     if axis[1] > x['t'])
        idx_1 = idx_2 - 1

        delta_y = comp_lut[idx_2] - comp_lut[idx_1]
        delta_x = as_t_comp[idx_2] - as_t_comp[idx_1]

        return comp_lut[idx_1] + (x['t'] -
                                  as_t_comp[idx_1]) * delta_y / delta_x

    # Check inputs
    flag_error = False
    if 'we' not in kwargs: flag_error = True
    if 'ae' not in kwargs: flag_error = True
    if 'alphasense_id' not in kwargs: flag_error = True
    if 't' not in kwargs: flag_error = True

    if flag_error:
        std_out('Problem with input data', 'ERROR')
        return None

    if kwargs['alphasense_id'] is None:
        std_out(f"Empty ID. Ignoring", 'WARNING')
        return None

    # Get Sensor data
    if kwargs['alphasense_id'] not in config.calibrations:
        std_out(f"Sensor {kwargs['alphasense_id']} not in calibration data",
                'ERROR')
        return None

    # Process input dates
    if 'from_date' not in kwargs: from_date = None
    else:
        if 'timezone' not in kwargs:
            std_out('Cannot localise date without timezone')
            return None
        from_date = localise_date(kwargs['from_date'], kwargs['timezone'])

    if 'to_date' not in kwargs: to_date = None
    else:
        if 'timezone' not in kwargs:
            std_out('Cannot localise date without timezone')
            return None
        to_date = localise_date(kwargs['to_date'], kwargs['timezone'])

    # Make copy
    df = dataframe.copy()
    # Trim data
    if from_date is not None: df = df[df.index > from_date]
    if to_date is not None: df = df[df.index < to_date]

    # Get sensor type
    as_type = as_sensor_codes[kwargs['alphasense_id'][0:3]]

    # Use alternative method or not
    if 'use_alternative' not in kwargs: kwargs['use_alternative'] = False
    if kwargs['use_alternative']: algorithm_idx = 1
    else: algorithm_idx = 0

    # Get algorithm name
    algorithm = list(as_sensor_algs[as_type].keys())[algorithm_idx]
    comp_type = as_sensor_algs[as_type][algorithm][0]
    comp_lut = as_sensor_algs[as_type][algorithm][1]

    # Retrieve calibration data - verify its all float
    cal_data = config.calibrations[kwargs['alphasense_id']]
    for item in cal_data:
        try:
            cal_data[item] = float(cal_data[item])
        except:
            std_out(
                f"Alphasense calibration data for {kwargs['alphasense_id']} is not correct",
                'ERROR')
            std_out(f'Error on {item}: \'{cal_data[item]}\'', 'ERROR')
            return

    # Remove spurious voltages (0V < electrode < 5V)
    for electrode in ['we', 'ae']:
        subkwargs = {
            'name': kwargs[electrode],
            'limits': (0, 5),  # In V
            'window_size': None
        }

        df[f'{electrode}_clean'] = clean_ts(df, **subkwargs)

    # Compensate electronic zero
    df['we_t'] = df['we_clean'] - (cal_data['we_electronic_zero_mv'] / 1000
                                   )  # in V
    df['ae_t'] = df['ae_clean'] - (cal_data['ae_electronic_zero_mv'] / 1000
                                   )  # in V
    # Get requested temperature
    df['t'] = df[kwargs['t']]

    # Temperature compensation - done line by line as it has special conditions
    df[comp_type] = df.apply(lambda x: comp_t(x, comp_lut),
                             axis=1)  # temperature correction factor

    # Algorithm selection (result in V)
    if algorithm == 1:
        df['we_c'] = df['we_t'] - df['n_t'] * df['ae_t']
    elif algorithm == 2:
        df['we_c'] = df['we_t'] - df['k_t'] * (
            cal_data['we_sensor_zero_mv'] /
            cal_data['ae_sensor_zero_mv']) * df['ae_t']
    elif algorithm == 3:
        df['we_c'] = df['we_t'] - (
            cal_data['we_sensor_zero_mv'] -
            cal_data['ae_sensor_zero_mv']) / 1000.0 - df['kp_t'] * df['ae_t']
    elif algorithm == 4:
        df['we_c'] = df['we_t'] - cal_data['we_sensor_zero_mv'] / 1000.0 - df[
            'kpp_t']
    # TODO - Check if df['we_c'] needs to always be positive and avoid spurious data

    # Verify if it has NO2 cross-sensitivity (in V)
    if cal_data['we_cross_sensitivity_no2_mv_ppb'] != float(0):
        df['we_no2_eq'] = df['NO2'] * cal_data[
            'we_cross_sensitivity_no2_mv_ppb'] / 1000.0
        df['we_c'] -= df['we_no2_eq']  # in V

    # Calculate sensor concentration
    df['conc'] = df['we_c'] / (cal_data['we_sensitivity_mv_ppb'] / 1000.0
                               )  # in ppb

    if avoid_negative_conc:
        df['conc'].clip(lower=0, inplace=True)

    return df['conc']
Пример #9
0
def alphasense_pt1000(dataframe, **kwargs):
    """
    Calculates temperature in degC of a PT1000, given positive and negative voltage
    levels (in V), considering negative PT1000 value is grounded
    Parameters
    ----------
        from_date: string, datetime object
            Date from which this calibration id is valid from
        to_date: string, datetime object
            Date until which this calibration id is valid to. None if current
        pt1000plus: string
            Name of PT1000+ found in dataframe (V)
        pt1000minus: string
            Name of PT1000- found in dataframe (V)
        timezone: string
            Valid timezone for date localisation
        afe_id: string
            Alphasense AFE ID (must be in calibrations.json)
    Returns
    -------
        Calculation of temperature in degC
    """
    # Check inputs
    flag_error = False
    if 'pt1000plus' not in kwargs: flag_error = True
    if 'pt1000minus' not in kwargs: flag_error = True

    if flag_error:
        std_out('Problem with input data', 'ERROR')
        return None

    if kwargs['afe_id'] is None:
        std_out(f"Empty ID. Ignoring", 'WARNING')
        return None

    # Get Sensor data
    if kwargs['afe_id'] not in config.calibrations:
        std_out(f"AFE {kwargs['afe_id']} not in calibration data", 'ERROR')
        return None

    # Process input dates
    if 'from_date' not in kwargs: from_date = None
    else:
        if 'timezone' not in kwargs:
            std_out('Cannot localise date without timezone', 'ERROR')
            return None
        from_date = localise_date(kwargs['from_date'], kwargs['timezone'])

    if 'to_date' not in kwargs: to_date = None
    else:
        if 'timezone' not in kwargs:
            std_out('Cannot localise date without timezone', 'ERROR')
            return None
        to_date = localise_date(kwargs['to_date'], kwargs['timezone'])

    # Retrieve calibration data - verify its all float
    cal_data = config.calibrations[kwargs['afe_id']]
    for item in cal_data:
        try:
            cal_data[item] = float(cal_data[item])
        except:
            std_out(
                f"Alphasense calibration data for {kwargs['afe_id']} is not correct",
                'ERROR')
            std_out(f'Error on {item}: \'{cal_data[item]}\'', 'ERROR')
            return

    # Make copy
    df = dataframe.copy()
    # Trim data
    if from_date is not None: df = df[df.index > from_date]
    if to_date is not None: df = df[df.index < to_date]

    # Calculate temperature
    df['v20'] = cal_data['v20'] - (cal_data['t20'] - 20.0) / 1000.0
    df['temp'] = (df[kwargs['pt1000plus']] -
                  df['v20']) * 1000.0 + 20.0  # in degC

    return df['temp']
Пример #10
0
def read_csv_file(file_path, location, frequency, clean_na = None, index_name = '', skiprows = None, sep = ',', encoding = 'utf-8'):
    """
    Reads a csv file and adds cleaning, localisation and resampling and puts it into a pandas dataframe
    Parameters
    ----------
        file_path: String
            File path for csv file
        location: String
            Time zone for the csv file
        clean_na: String or None
            None
            Whether to perform clean_na or not. Either None, 'fill' or 'drop'
        index_name: String
            ''
            Name of the column to set an index in the dataframe
        skiprows: list or None
            None
            List of rows to skip (same as skiprows in pandas.read_csv)
        sep: String
            ','
            Separator (same as sep in pandas.read_csv)
        encoding: String
            'utf-8'
            Encoding of the csv file
    Returns
    -------
        Pandas dataframe
    """  

    # Read pandas dataframe
    df = read_csv(file_path, verbose = False, skiprows = skiprows, sep = ',', encoding = encoding)

    flag_found = False
    for column in df.columns:
        if index_name in column: 
            df = df.set_index(column)
            flag_found = True
            break

    if not flag_found:
        std_out('Index not found. Cannot reindex', 'ERROR')
        return None

    # Set index
    df.index = localise_date(df.index, location)
    # Remove duplicates
    df = df[~df.index.duplicated(keep='first')]
    
    # Sort index
    df.sort_index(inplace=True)
    
    # Drop unnecessary columns
    df.drop([i for i in df.columns if 'Unnamed' in i], axis=1, inplace=True)
    
    # Check for weird things in the data
    df = df.apply(to_numeric, errors='coerce')   
    
    # Resample
    df = df.resample(frequency).mean()

    # Remove na
    df = clean(df, clean_na, how = 'all')
    
    return df    
Пример #11
0
    def process(self, only_new=False, lmetrics=None):
        '''
        Processes devices metrics, either added by the blueprint definition
        or the addition using Device.add_metric(). See help(Device.add_metric) for
        more information about the definition of the metrics to be added

        Parameters
        ----------
        only_new: boolean
            False
            To process or not the existing channels in the Device.readings that are
            defined in Device.metrics
        lmetrics: list
            None
            List of metrics to process. If none, processes all
        Returns
        ----------
            boolean
            True if processed ok, False otherwise
        '''

        process_ok = True

        if 'metrics' not in vars(self):
            std_out(f'Device {self.id} has nothing to process. Skipping',
                    'WARNING')
            return process_ok

        std_out('---------------------------')
        std_out(f'Processing device {self.id}')

        if lmetrics is None: metrics = self.metrics
        else: metrics = dict([(key, self.metrics[key]) for key in lmetrics])

        for metric in metrics:
            std_out(f'Processing {metric}')

            if only_new and metric in self.readings:
                std_out(f'Skipping. Already in device')
                continue

            # Check if the metric contains a custom from_list
            if 'from_list' in metrics[metric]:
                lazy_name = metrics[metric]['from_list']
            else:
                lazy_name = f"scdata.device.process.{metrics[metric]['process']}"

            try:
                funct = LazyCallable(lazy_name)
            except ModuleNotFoundError:
                print_exc()
                process_ok &= False
                std_out('Problem adding lazy callable to metrics list',
                        'ERROR')
                pass
                return False

            args, kwargs = list(), dict()
            if 'args' in metrics[metric]: args = metrics[metric]['args']
            if 'kwargs' in metrics[metric]: kwargs = metrics[metric]['kwargs']

            try:
                self.readings[metric] = funct(self.readings, *args, **kwargs)
            except KeyError:
                print_exc()
                std_out('Metric args not in dataframe', 'ERROR')
                pass

            if metric in self.readings: process_ok &= True

        if process_ok:
            # Latest postprocessing to latest readings
            if self.api_device.get_postprocessing_info() is not None:
                latest_postprocessing = localise_date(
                    self.readings.index[-1],
                    self.location).strftime('%Y-%m-%dT%H:%M:%S')
                self.api_device.postprocessing_info[
                    'latest_postprocessing'] = latest_postprocessing

        return process_ok
Пример #12
0
    def load(self, options=None, path=None, convert_units=True):
        '''
        Loads the device with some options

        Parameters:
        -----------
        options: dict()
            Default: None
            options['min_date'] = date to load data from
                Default to device min_date (from blueprint or test)
            options['max_date'] = date to load data to
                Default to device max_date (from blueprint or test)
            options['clean_na'] = clean na (drop_na, fill_na or None)
                Default to device clean_na (from blueprint or test)
            options['frequency'] = frequency to load data at in pandas format
                Default to device frequency (from blueprint or test) or '1Min'
        path: String
            Default: None
            Path were the csv file is, if any. Normally not needed to be provided, only for internal usage
        convert_units: bool
            Default: True
            Convert units for channels based on config._channel_lut

        Returns
        ----------
            True if loaded correctly
        '''

        # Add test overrides if we have them, otherwise set device defaults
        if options is not None: self.check_overrides(options)
        else: self.check_overrides()

        try:
            if self.source == 'csv':
                self.readings = self.readings.combine_first(
                    read_csv_file(join(path, self.processed_data_file),
                                  self.location, self.options['frequency'],
                                  self.options['clean_na'],
                                  self.sources[self.source]['index']))
                if self.readings is not None:
                    self.__convert_names__()

            elif 'api' in self.source:

                # Get device location
                self.location = self.api_device.get_device_location()

                if path is None:
                    if self.load_postprocessing_info():
                        # Override dates for post-processing
                        if self.latest_postprocessing is not None:
                            hw_latest_post = localise_date(
                                self.latest_postprocessing, self.location)
                            # Override min processing date
                            self.options['min_date'] = hw_latest_post

                    df = self.api_device.get_device_data(
                        self.options['min_date'], self.options['max_date'],
                        self.options['frequency'], self.options['clean_na'])

                    # API Device is not aware of other csv index data, so make it here
                    if 'csv' in self.sources and df is not None:
                        df = df.reindex(
                            df.index.rename(self.sources['csv']['index']))

                    # Combine it with readings if possible
                    if df is not None:
                        self.readings = self.readings.combine_first(df)

                else:
                    # Cached case
                    self.readings = self.readings.combine_first(
                        read_csv_file(join(path,
                                           str(self.id) + '.csv'),
                                      self.location, self.options['frequency'],
                                      self.options['clean_na'],
                                      self.sources['csv']['index']))

        except FileNotFoundError:
            # Handle error
            if 'api' in self.source:
                std_out(
                    f'No cached data file found for device {self.id} in {path}. Moving on',
                    'WARNING')
            elif 'csv' in self.source:
                std_out(f'File not found for device {self.id} in {path}',
                        'ERROR')

            self.loaded = False
        except:
            print_exc()
            self.loaded = False
        else:
            if self.readings is not None:
                self.__check_sensors__()

                if self.load_postprocessing_info() is not None:
                    self.__fill_metrics__()

                if not self.readings.empty:
                    self.loaded = True
                    if convert_units: self.__convert_units__()
        finally:
            return self.loaded
Пример #13
0
def dispersion_analysis(self,
                        devices=None,
                        min_date=None,
                        max_date=None,
                        timezone='Europe/Madrid',
                        smooth_window=5):
    '''
        Creates channels on a new dataframe for each device/channel combination, and makes the average/std of each
        in a point-by-point fashion

        Parameters:
        -----------
        devices: list
            Default: None
            If list of devices is None, then it will use all devices in self.devices
        min_date: String
            Default: None
            Minimum date from which to perform the analysis

        max_date: String
            Default: None
            Maximum date from which to perform the analysis

        timezone: String
            Default: None
            Sensors for timezone

        smooth_window: int
            Default: 5
            If not None, performs smoothing of the channels with rolling average.

        Returns:
        ---------

    '''
    dispersion_df = DataFrame()

    # Get common channels for this group
    if devices is not None:
        common_ch = self.get_common_channels(devices=devices)
    else:
        if len(self.common_channels) == 0: self.get_common_channels()

    # Localise dates
    min_date = localise_date(min_date, timezone)
    max_date = localise_date(max_date, timezone)

    # Calculate the dispersion for the sensors present in the dataset
    warning = False

    for channel in common_ch:
        columns = list()

        if channel in config._dispersion['ignore_channels']: continue

        for device in devices:
            if channel in self.devices[device].readings.columns and len(
                    self.devices[device].readings.loc[:, channel]) > 0:
                # Important to resample and bfill for unmatching measures
                if smooth_window is not None:
                    # channel_new = self.devices[device].readings[channel].resample('1Min').bfill().rolling(window=smooth_window).mean()
                    channel_new = self.devices[device].readings[channel].bfill(
                    ).rolling(window=smooth_window).mean()
                    dispersion_df[channel + '-' +
                                  device] = channel_new[channel_new > 0]
                else:
                    dispersion_df[channel + '-' + device] = self.devices[
                        device].readings[channel].resample('1Min').bfill()

                columns.append(channel + '-' + device)
            else:
                std_out(f'Device {device} does not contain {channel}</p>',
                        'WARNING')
                warning = True

        dispersion_df.index = localise_date(dispersion_df.index, timezone)

        # Trim dataset to min and max dates (normally these tests are carried out with _minutes_ of differences)
        if min_date is not None:
            dispersion_df = dispersion_df[dispersion_df.index > min_date]
        if max_date is not None:
            dispersion_df = dispersion_df[dispersion_df.index < max_date]

        # Calculate Metrics
        dispersion_df[channel + '_AVG'] = dispersion_df.loc[:, columns].mean(
            skipna=True, axis=1)
        dispersion_df[channel + '_STD'] = dispersion_df.loc[:, columns].std(
            skipna=True, axis=1)

    if not warning:
        std_out(f'All devices have the provided channels list recorded')
    else:
        std_out(f'Missing channels, review data', 'WARNING')

    if devices is None:
        self.dispersion_df = dispersion_df

        return self.dispersion_summary

    group_dispersion_summary = dict()

    for channel in common_ch:
        if channel in config._dispersion['ignore_channels']: continue
        # Calculate
        group_dispersion_summary[channel] = dispersion_df[channel +
                                                          '_STD'].mean()

    return group_dispersion_summary