def post_device_data(self, df, sensor_id, clean_na = 'drop'): ''' POST data in the SmartCitizen API Parameters ---------- df: pandas DataFrame Contains data in a DataFrame format. Data is posted regardless the name of the dataframe It uses the sensor id provided, not the name Data is posted in UTC TZ so dataframe needs to have located timestamp sensor_id: int The sensor id clean_na: string, optional 'drop' 'drop', 'fill' Returns ------- True if the data was posted succesfully ''' if 'SC_ADMIN_BEARER' not in environ: std_out('Cannot post without Auth Bearer', 'ERROR') return headers = {'Authorization':'Bearer ' + environ['SC_ADMIN_BEARER'], 'Content-type': 'application/json'} # Get sensor name sensor_name = list(df.columns)[0] # Clean df of nans df = clean(df, clean_na, how = 'all') # Process dataframe df['id'] = sensor_id df.index.name = 'recorded_at' df.rename(columns = {sensor_name: 'value'}, inplace = True) df.columns = MultiIndex.from_product([['sensors'], df.columns]) j = (df.groupby('recorded_at', as_index = True) .apply(lambda x: x['sensors'][['value', 'id']].to_dict('r')) ) # Prepare json post payload = {"data":[]} for item in j.index: payload["data"].append( { "recorded_at": localise_date(item, 'UTC').strftime('%Y-%m-%dT%H:%M:%SZ'), "sensors": j[item] } ) payload_json = dumps(payload) response = post(f'https://api.smartcitizen.me/v0/devices/{self.id}/readings', data = payload_json, headers = headers) if response.status_code == 200 or response.status_code == 201: return True return False
def get_device_data(self, start_date = None, end_date = None, frequency = '3Min', clean_na = None): if start_date is not None: days_ago = (to_datetime(date.today())-to_datetime(start_date)).days else: days_ago = 365 # One year of data std_out(f'Requesting data from MUV API') std_out(f'Device ID: {self.id}') self.get_device_location() self.get_device_sensors() # Get devices try: if days_ago == -1: url = f'{self.API_BASE_URL}getSensorData?sensor_id={self.id}' else: url = f'{self.API_BASE_URL}getSensorData?sensor_id={self.id}&days={days_ago}' df = DataFrame(get(url).json()) except: print_exc() std_out('Failed sensor request request. Probably no connection', 'ERROR') pass return None try: # Rename columns df.rename(columns = self.sensors, inplace = True) df = df.set_index('time') df.index = localise_date(df.index, self.location) df = df[~df.index.duplicated(keep='first')] # Drop unnecessary columns df.drop([i for i in df.columns if 'Unnamed' in i], axis=1, inplace=True) df.drop('id', axis=1, inplace=True) # Check for weird things in the data df = df.apply(to_numeric, errors='coerce') # # Resample df = df.resample(frequency).mean() df = df.reindex(df.index.rename('Time')) df = clean(df, clean_na, how = 'all') self.data = df except: print_exc() std_out('Problem closing up the API dataframe', 'ERROR') pass return None std_out(f'Device {self.id} loaded successfully from API', 'SUCCESS') return self.data
def update_latest_postprocessing(self): # Sets latest postprocessing to latest reading if self.source == 'api': if self.api_device.get_device_postprocessing() is not None: std_out('Updating postprocessing') # Add latest postprocessing rounded up with # frequency so that we don't end up in # and endless loop processing only the latest data line # (minute vs. second precission of the readings) self.latest_postprocessing = localise_date(self.readings.index[-1]+\ to_timedelta(self.options['frequency']), 'UTC').strftime('%Y-%m-%dT%H:%M:%S') self.api_device.postprocessing[ 'latest_postprocessing'] = self.latest_postprocessing std_out( f"Updated latest_postprocessing to: {self.api_device.postprocessing['latest_postprocessing']}" ) return True return False
def alphasense_803_04(dataframe, **kwargs): """ Calculates pollutant concentration based on 4 electrode sensor readings (mV) and calibration ID. It adds a configurable background concentration and correction based on AAN803-04 Parameters ---------- from_date: string, datetime object Date from which this calibration id is valid from to_date: string, datetime object Date until which this calibration id is valid to. None if current id: string Alphasense sensor ID (must be in calibrations.yaml) we: string Name of working electrode found in dataframe (V) ae: string Name of auxiliary electrode found in dataframe (V) t: string Name of reference temperature use_alternative: boolean Default false Use alternative algorithm as shown in the AAN location: string Valid location for date localisation Returns ------- calculation of pollutant in ppb """ def alg_1(x, cal_data): return x['we_t'] - x['n_t'] * x['ae_t'] def alg_2(x, cal_data): return x['we_t'] - x['k_t'] * ( cal_data['we_sensor_zero_mv'] / cal_data['ae_sensor_zero_mv']) * x['ae_t'] def alg_3(x, cal_data): return x['we_t'] - ( cal_data['we_sensor_zero_mv'] - cal_data['ae_sensor_zero_mv']) / 1000.0 - x['kp_t'] * x['ae_t'] def alg_4(x, cal_data): return x['we_t'] - cal_data['we_sensor_zero_mv'] / 1000.0 - x['kpp_t'] def comp_t(x, comp_lut): # Below min temperature, we saturate if x['t'] < config._as_t_comp[0]: return comp_lut[0] # Over max temperature, we saturate if x['t'] > config._as_t_comp[-1]: return comp_lut[-1] # Otherwise, we calculate idx_2 = next(x[0] for x in enumerate(config._as_t_comp) if x[1] > x['t']) idx_1 = idx_2 - 1 delta_y = comp_lut[idx_2] - comp_lut[idx_1] delta_x = config._as_t_comp[idx_2] - config._as_t_comp[idx_1] return comp_lut[idx_1] + (x['t'] - config._as_t_comp[idx_1]) * delta_y / delta_x def reverse_no2(x, cal_data): return x['NO2'] * cal_data['we_cross_sensitivity_no2_mv_ppb'] / 1000.0 # Check inputs flag_error = False if 'we' not in kwargs: flag_error = True if 'ae' not in kwargs: flag_error = True if 'id' not in kwargs: flag_error = True if 't' not in kwargs: flag_error = True if flag_error: std_out('Problem with input data', 'ERROR') return None # Get Sensor data if kwargs['id'] not in config.calibrations: std_out(f"Sensor {kwargs['id']} not in calibration data", 'ERROR') return None # Process input dates if 'from_date' not in kwargs: from_date = None else: if 'location' not in kwargs: std_out('Cannot localise date without location') return None from_date = localise_date(kwargs['from_date'], kwargs['location']) if 'to_date' not in kwargs: to_date = None else: if 'location' not in kwargs: std_out('Cannot localise date without location') return None to_date = localise_date(kwargs['to_date'], kwargs['location']) # Make copy df = dataframe.copy() # Trim data if from_date is not None: df = df[df.index > from_date] if to_date is not None: df = df[df.index < to_date] # Get sensor type as_type = config._as_sensor_codes[sensor_id[0:3]] # Use alternative method or not if 'use_alternative' not in kwargs: kwargs['use_alternative'] = False if use_alternative: algorithm_idx = 1 else: algorithm_idx = 0 # Get algorithm name algorithm = list(config._as_sensor_algs[as_type].keys())[algorithm_idx] comp_type = config._as_sensor_algs[as_type][algorithm][0] comp_lut = config._as_sensor_algs[as_type][algorithm][1] # Retrieve calibration data - verify its all float cal_data = config.calibrations[kwargs['id']] for item in cal_data: cal_data[item] = float(cal_data[item]) # Compensate electronic zero df['we_t'] = df[kwargs['we']] - (cal_data['we_electronic_zero_mv'] / 1000 ) # in V df['ae_t'] = df[kwargs['ae']] - (cal_data['ae_electronic_zero_mv'] / 1000 ) # in V # Get requested temperature df['t'] = df[kwargs['t']] # Temperature compensation df[comp_type] = df.apply(lambda x: comp_t(x, comp_lut), axis=1) # temperature correction factor # Algorithm selection if algorithm == 1: df['we_c'] = df.apply(lambda x: alg_1(x, cal_data), axis=1) # in V elif algorithm == 2: df['we_c'] = df.apply(lambda x: alg_2(x, cal_data), axis=1) # in V elif algorithm == 3: df['we_c'] = df.apply(lambda x: alg_3(x, cal_data), axis=1) # in V elif algorithm == 4: df['we_c'] = df.apply(lambda x: alg_4(x, cal_data), axis=1) # in V # Verify if it has NO2 cross-sensitivity if cal_data['we_cross_sensitivity_no2_mv_ppb'] != float(0): df['we_no2_eq'] = df.apply(lambda x: reverse_no2(x, cal_data), axis=1) # in V df['we_c'] -= df['we_no2_eq'] # in V # Calculate sensor concentration df['conc'] = df['we_c'] / (cal_data['we_sensitivity_mv_ppb'] / 1000.0 ) # in ppb return df['conc']
def load(self, options=None, path='', convert_units=True, only_unprocessed=False, max_amount=None, follow_defaults=False): ''' Loads the device with some options Parameters: ----------- options: dict() Default: None options['min_date'] = date to load data from Default to device min_date (from blueprint or test) options['max_date'] = date to load data to Default to device max_date (from blueprint or test) options['clean_na'] = clean na (drop_na, fill_na or None) Default to device clean_na (from blueprint or test) options['frequency'] = frequency to load data at in pandas format Default to device frequency (from blueprint or test) or '1Min' path: String Default: '' Path were the csv file is, if any. Normally not needed to be provided, only for internal usage convert_units: bool Default: True Convert units for channels based on config._channel_lut only_unprocessed: bool Default: False Loads only unprocessed data max_amount: int Default: None Trim dataframe to this amount for processing and forwarding purposes follow_defaults: bool Default: False Use defaults from config._csv_defaults for loading Returns ---------- True if loaded correctly ''' # Add test overrides if we have them, otherwise set device defaults if options is not None: self.check_overrides(options) else: self.check_overrides() try: if self.source == 'csv': if follow_defaults: index_name = config._csv_defaults['index_name'] sep = config._csv_defaults['sep'] skiprows = config._csv_defaults['skiprows'] else: index_name = self.sources[self.source]['index'] sep = self.sources[self.source]['sep'] skiprows = self.sources[self.source]['header_skip'] # here we don't use tzaware because we only load preprocessed data self.readings = self.readings.combine_first( read_csv_file(file_path=join(path, self.processed_data_file), timezone=self.timezone, frequency=self.options['frequency'], clean_na=self.options['clean_na'], index_name=index_name, sep=sep, skiprows=skiprows)) if self.readings is not None: self.__convert_names__() elif 'api' in self.source: # Get device location # Location data should be standard for each new device self.api_device.get_device_lat_long() self.api_device.get_device_alt() self.location = { 'longitude': self.api_device.long, 'latitude': self.api_device.lat, 'altitude': self.api_device.alt } self.timezone = self.api_device.get_device_timezone() if path == '': # Not chached case if only_unprocessed: # Override dates for post-processing if self.latest_postprocessing is not None: hw_latest_postprocess = localise_date( self.latest_postprocessing, 'UTC').strftime('%Y-%m-%dT%H:%M:%S') # Override min loading date self.options['min_date'] = hw_latest_postprocess df = self.api_device.get_device_data( self.options['min_date'], self.options['max_date'], self.options['frequency'], self.options['clean_na']) # API Device is not aware of other csv index data, so make it here if 'csv' in self.sources and df is not None: df = df.reindex( df.index.rename(self.sources['csv']['index'])) # Combine it with readings if possible if df is not None: self.readings = self.readings.combine_first(df) else: # Cached case self.readings = self.readings.combine_first( read_csv_file(join(path, str(self.id) + '.csv'), self.timezone, self.options['frequency'], self.options['clean_na'], self.sources['csv']['index'])) except FileNotFoundError: # Handle error if 'api' in self.source: std_out( f'No cached data file found for device {self.id} in {path}. Moving on', 'WARNING') elif 'csv' in self.source: std_out(f'File not found for device {self.id} in {path}', 'ERROR') self.loaded = False except: print_exc() self.loaded = False else: if self.readings is not None: self.__check_sensors__() if not self.readings.empty: if max_amount is not None: std_out(f'Trimming dataframe to {max_amount} rows') self.readings = self.readings.dropna( axis=0, how='all').head(max_amount) # Only add metrics if there is something that can be potentially processed self.__fill_metrics__() self.loaded = True if convert_units: self.__convert_units__() else: std_out('Empty dataframe in readings', 'WARNING') finally: self.processed = False return self.loaded
def read_csv_file(file_path, timezone, frequency, clean_na=None, index_name='', skiprows=None, sep=',', encoding='utf-8', tzaware=True): """ Reads a csv file and adds cleaning, localisation and resampling and puts it into a pandas dataframe Parameters ---------- file_path: String File path for csv file timezone: String Time zone for the csv file clean_na: String or None None Whether to perform clean_na or not. Either None, 'fill' or 'drop' index_name: String '' Name of the column to set an index in the dataframe skiprows: list or None None List of rows to skip (same as skiprows in pandas.read_csv) sep: String ',' Separator (same as sep in pandas.read_csv) encoding: String 'utf-8' Encoding of the csv file Returns ------- Pandas dataframe """ # Read pandas dataframe df = read_csv(file_path, verbose=False, skiprows=skiprows, sep=sep, encoding=encoding, encoding_errors='ignore') flag_found = False if type(index_name) == str: # Single joint index for column in df.columns: if index_name in column: df = df.set_index(column) flag_found = True break elif type(index_name) == list: # Composite index (for instance, DATE and TIME in different columns) for iname in index_name: if iname not in df.columns: std_out(f'{iname} not found in columns', 'ERROR') return None joint_index_name = '_'.join(index_name) df[joint_index_name] = df[index_name].agg(' '.join, axis=1) df = df.set_index(joint_index_name) df.drop(index_name, axis=1, inplace=True) flag_found = True if not flag_found: std_out('Index not found. Cannot reindex', 'ERROR') return None # Set index df.index = localise_date(df.index, timezone, tzaware=tzaware) # Remove duplicates df = df[~df.index.duplicated(keep='first')] # Sort index df.sort_index(inplace=True) # Drop unnecessary columns df.drop([i for i in df.columns if 'Unnamed' in i], axis=1, inplace=True) # Check for weird things in the data # df = df.apply(to_numeric, errors='coerce') df = df.astype(float, errors='ignore') # Resample df = df.resample(frequency).mean() # Remove na df = clean(df, clean_na, how='all') return df
def get_device_data(self, start_date=None, end_date=None, frequency='1Min', clean_na=None): std_out(f'Requesting data from SC API') std_out(f'Device ID: {self.id}') rollup = self.convert_rollup(frequency) std_out(f'Using rollup: {rollup}') # Make sure we have the everything we need beforehand self.get_device_sensors() self.get_device_location() self.get_device_last_reading() self.get_device_added_at() self.get_kit_ID() if self.location is None: return None # Check start date # if start_date is None and self.added_at is not None: # start_date = localise_date(to_datetime(self.added_at, format = '%Y-%m-%dT%H:%M:%SZ'), self.location) # # to_datetime(self.added_at, format = '%Y-%m-%dT%H:%M:%SZ') # elif start_date is not None: # start_date = to_datetime(start_date, format = '%Y-%m-%dT%H:%M:%SZ') if start_date is not None: start_date = localise_date( to_datetime(start_date, format='%Y-%m-%dT%H:%M:%SZ'), self.location) # if start_date.tzinfo is None: start_date = start_date.tz_localize('UTC').tz_convert(self.location) std_out(f'Min Date: {start_date}') # # Check end date # if end_date is None and self.last_reading_at is not None: # # end_date = to_datetime(self.last_reading_at, format = '%Y-%m-%dT%H:%M:%SZ') # end_date = localise_date(to_datetime(self.last_reading_at, format = '%Y-%m-%dT%H:%M:%SZ'), self.location) # elif end_date is not None: # end_date = to_datetime(end_date, format = '%Y-%m-%dT%H:%M:%SZ') if end_date is not None: end_date = localise_date( to_datetime(end_date, format='%Y-%m-%dT%H:%M:%SZ'), self.location) # if end_date.tzinfo is None: end_date = end_date.tz_localize('UTC').tz_convert(self.location) std_out(f'Max Date: {end_date}') # if start_date > end_date: std_out('Ignoring device dates. Probably SD card device', 'WARNING') # Print stuff std_out('Kit ID: {}'.format(self.kit_id)) # if start_date < end_date: std_out(f'Dates: from: {start_date}, to: {end_date}') std_out(f'Device timezone: {self.location}') if not self.sensors.keys(): std_out(f'Device is empty') return None else: std_out(f'Sensor IDs: {list(self.sensors.keys())}') df = DataFrame() # Get devices in the sensor first for sensor_id in self.sensors.keys(): # Request sensor per ID request = self.API_BASE_URL + '{}/readings?'.format(self.id) if start_date is None: request += 'from=2001-01-01' elif end_date is not None: if start_date > end_date: request += 'from=2001-01-01' else: request += f'from={start_date}' request += f'&to={end_date}' request += f'&rollup={rollup}' request += f'&sensor_id={sensor_id}' request += '&function=avg' # if end_date is not None: # if end_date > start_date: request += f'&to={end_date}' # Make request sensor_req = get(request) flag_error = False try: sensorjson = sensor_req.json() except: print_exc() std_out('Problem with json data from API', 'ERROR') flag_error = True pass continue if 'readings' not in sensorjson.keys(): std_out(f'No readings key in request for sensor: {sensor_id}', 'ERROR') flag_error = True continue elif sensorjson['readings'] == []: std_out(f'No data in request for sensor: {sensor_id}', 'WARNING') flag_error = True continue if flag_error: continue # Put try: dfsensor = DataFrame(sensorjson['readings']).set_index(0) dfsensor.columns = [self.sensors[sensor_id]] # dfsensor.index = to_datetime(dfsensor.index).tz_localize('UTC').tz_convert(self.location) dfsensor.index = localise_date(dfsensor.index, self.location) dfsensor.sort_index(inplace=True) dfsensor = dfsensor[~dfsensor.index.duplicated(keep='first')] # Drop unnecessary columns dfsensor.drop([i for i in dfsensor.columns if 'Unnamed' in i], axis=1, inplace=True) # Check for weird things in the data dfsensor = dfsensor.apply(to_numeric, errors='coerce') # Resample dfsensor = dfsensor.resample(frequency).mean() df = df.combine_first(dfsensor) except: print_exc() std_out('Problem with sensor data from API', 'ERROR') flag_error = True pass continue try: df = df.reindex(df.index.rename('Time')) df = clean(df, clean_na, how='all') # if clean_na is not None: # if clean_na == 'drop': # # std_out('Cleaning na with drop') # df.dropna(axis = 0, how='all', inplace=True) # elif clean_na == 'fill': # df = df.fillna(method='bfill').fillna(method='ffill') # # std_out('Cleaning na with fill') self.data = df except: std_out('Problem closing up the API dataframe', 'ERROR') pass return None if flag_error == False: std_out(f'Device {self.id} loaded successfully from API', 'SUCCESS') return self.data
def alphasense_803_04(dataframe, **kwargs): """ Calculates pollutant concentration based on 4 electrode sensor readings (mV) and calibration ID. It adds a configurable background concentration and correction based on AAN803-04 Parameters ---------- from_date: string, datetime object Date from which this calibration id is valid from to_date: string, datetime object Date until which this calibration id is valid to. None if current alphasense_id: string Alphasense sensor ID (must be in calibrations.json) we: string Name of working electrode found in dataframe (V) ae: string Name of auxiliary electrode found in dataframe (V) t: string Name of reference temperature use_alternative: boolean Default false Use alternative algorithm as shown in the AAN timezone: string Valid timezone for date localisation Returns ------- calculation of pollutant in ppb """ def comp_t(x, comp_lut): if isnull(x['t']): return None # Below min temperature, we saturate if x['t'] < as_t_comp[0]: return comp_lut[0] # Over max temperature, we saturate if x['t'] > as_t_comp[-1]: return comp_lut[-1] # Otherwise, we calculate idx_2 = next(axis[0] for axis in enumerate(as_t_comp) if axis[1] > x['t']) idx_1 = idx_2 - 1 delta_y = comp_lut[idx_2] - comp_lut[idx_1] delta_x = as_t_comp[idx_2] - as_t_comp[idx_1] return comp_lut[idx_1] + (x['t'] - as_t_comp[idx_1]) * delta_y / delta_x # Check inputs flag_error = False if 'we' not in kwargs: flag_error = True if 'ae' not in kwargs: flag_error = True if 'alphasense_id' not in kwargs: flag_error = True if 't' not in kwargs: flag_error = True if flag_error: std_out('Problem with input data', 'ERROR') return None if kwargs['alphasense_id'] is None: std_out(f"Empty ID. Ignoring", 'WARNING') return None # Get Sensor data if kwargs['alphasense_id'] not in config.calibrations: std_out(f"Sensor {kwargs['alphasense_id']} not in calibration data", 'ERROR') return None # Process input dates if 'from_date' not in kwargs: from_date = None else: if 'timezone' not in kwargs: std_out('Cannot localise date without timezone') return None from_date = localise_date(kwargs['from_date'], kwargs['timezone']) if 'to_date' not in kwargs: to_date = None else: if 'timezone' not in kwargs: std_out('Cannot localise date without timezone') return None to_date = localise_date(kwargs['to_date'], kwargs['timezone']) # Make copy df = dataframe.copy() # Trim data if from_date is not None: df = df[df.index > from_date] if to_date is not None: df = df[df.index < to_date] # Get sensor type as_type = as_sensor_codes[kwargs['alphasense_id'][0:3]] # Use alternative method or not if 'use_alternative' not in kwargs: kwargs['use_alternative'] = False if kwargs['use_alternative']: algorithm_idx = 1 else: algorithm_idx = 0 # Get algorithm name algorithm = list(as_sensor_algs[as_type].keys())[algorithm_idx] comp_type = as_sensor_algs[as_type][algorithm][0] comp_lut = as_sensor_algs[as_type][algorithm][1] # Retrieve calibration data - verify its all float cal_data = config.calibrations[kwargs['alphasense_id']] for item in cal_data: try: cal_data[item] = float(cal_data[item]) except: std_out( f"Alphasense calibration data for {kwargs['alphasense_id']} is not correct", 'ERROR') std_out(f'Error on {item}: \'{cal_data[item]}\'', 'ERROR') return # Remove spurious voltages (0V < electrode < 5V) for electrode in ['we', 'ae']: subkwargs = { 'name': kwargs[electrode], 'limits': (0, 5), # In V 'window_size': None } df[f'{electrode}_clean'] = clean_ts(df, **subkwargs) # Compensate electronic zero df['we_t'] = df['we_clean'] - (cal_data['we_electronic_zero_mv'] / 1000 ) # in V df['ae_t'] = df['ae_clean'] - (cal_data['ae_electronic_zero_mv'] / 1000 ) # in V # Get requested temperature df['t'] = df[kwargs['t']] # Temperature compensation - done line by line as it has special conditions df[comp_type] = df.apply(lambda x: comp_t(x, comp_lut), axis=1) # temperature correction factor # Algorithm selection (result in V) if algorithm == 1: df['we_c'] = df['we_t'] - df['n_t'] * df['ae_t'] elif algorithm == 2: df['we_c'] = df['we_t'] - df['k_t'] * ( cal_data['we_sensor_zero_mv'] / cal_data['ae_sensor_zero_mv']) * df['ae_t'] elif algorithm == 3: df['we_c'] = df['we_t'] - ( cal_data['we_sensor_zero_mv'] - cal_data['ae_sensor_zero_mv']) / 1000.0 - df['kp_t'] * df['ae_t'] elif algorithm == 4: df['we_c'] = df['we_t'] - cal_data['we_sensor_zero_mv'] / 1000.0 - df[ 'kpp_t'] # TODO - Check if df['we_c'] needs to always be positive and avoid spurious data # Verify if it has NO2 cross-sensitivity (in V) if cal_data['we_cross_sensitivity_no2_mv_ppb'] != float(0): df['we_no2_eq'] = df['NO2'] * cal_data[ 'we_cross_sensitivity_no2_mv_ppb'] / 1000.0 df['we_c'] -= df['we_no2_eq'] # in V # Calculate sensor concentration df['conc'] = df['we_c'] / (cal_data['we_sensitivity_mv_ppb'] / 1000.0 ) # in ppb if avoid_negative_conc: df['conc'].clip(lower=0, inplace=True) return df['conc']
def alphasense_pt1000(dataframe, **kwargs): """ Calculates temperature in degC of a PT1000, given positive and negative voltage levels (in V), considering negative PT1000 value is grounded Parameters ---------- from_date: string, datetime object Date from which this calibration id is valid from to_date: string, datetime object Date until which this calibration id is valid to. None if current pt1000plus: string Name of PT1000+ found in dataframe (V) pt1000minus: string Name of PT1000- found in dataframe (V) timezone: string Valid timezone for date localisation afe_id: string Alphasense AFE ID (must be in calibrations.json) Returns ------- Calculation of temperature in degC """ # Check inputs flag_error = False if 'pt1000plus' not in kwargs: flag_error = True if 'pt1000minus' not in kwargs: flag_error = True if flag_error: std_out('Problem with input data', 'ERROR') return None if kwargs['afe_id'] is None: std_out(f"Empty ID. Ignoring", 'WARNING') return None # Get Sensor data if kwargs['afe_id'] not in config.calibrations: std_out(f"AFE {kwargs['afe_id']} not in calibration data", 'ERROR') return None # Process input dates if 'from_date' not in kwargs: from_date = None else: if 'timezone' not in kwargs: std_out('Cannot localise date without timezone', 'ERROR') return None from_date = localise_date(kwargs['from_date'], kwargs['timezone']) if 'to_date' not in kwargs: to_date = None else: if 'timezone' not in kwargs: std_out('Cannot localise date without timezone', 'ERROR') return None to_date = localise_date(kwargs['to_date'], kwargs['timezone']) # Retrieve calibration data - verify its all float cal_data = config.calibrations[kwargs['afe_id']] for item in cal_data: try: cal_data[item] = float(cal_data[item]) except: std_out( f"Alphasense calibration data for {kwargs['afe_id']} is not correct", 'ERROR') std_out(f'Error on {item}: \'{cal_data[item]}\'', 'ERROR') return # Make copy df = dataframe.copy() # Trim data if from_date is not None: df = df[df.index > from_date] if to_date is not None: df = df[df.index < to_date] # Calculate temperature df['v20'] = cal_data['v20'] - (cal_data['t20'] - 20.0) / 1000.0 df['temp'] = (df[kwargs['pt1000plus']] - df['v20']) * 1000.0 + 20.0 # in degC return df['temp']
def read_csv_file(file_path, location, frequency, clean_na = None, index_name = '', skiprows = None, sep = ',', encoding = 'utf-8'): """ Reads a csv file and adds cleaning, localisation and resampling and puts it into a pandas dataframe Parameters ---------- file_path: String File path for csv file location: String Time zone for the csv file clean_na: String or None None Whether to perform clean_na or not. Either None, 'fill' or 'drop' index_name: String '' Name of the column to set an index in the dataframe skiprows: list or None None List of rows to skip (same as skiprows in pandas.read_csv) sep: String ',' Separator (same as sep in pandas.read_csv) encoding: String 'utf-8' Encoding of the csv file Returns ------- Pandas dataframe """ # Read pandas dataframe df = read_csv(file_path, verbose = False, skiprows = skiprows, sep = ',', encoding = encoding) flag_found = False for column in df.columns: if index_name in column: df = df.set_index(column) flag_found = True break if not flag_found: std_out('Index not found. Cannot reindex', 'ERROR') return None # Set index df.index = localise_date(df.index, location) # Remove duplicates df = df[~df.index.duplicated(keep='first')] # Sort index df.sort_index(inplace=True) # Drop unnecessary columns df.drop([i for i in df.columns if 'Unnamed' in i], axis=1, inplace=True) # Check for weird things in the data df = df.apply(to_numeric, errors='coerce') # Resample df = df.resample(frequency).mean() # Remove na df = clean(df, clean_na, how = 'all') return df
def process(self, only_new=False, lmetrics=None): ''' Processes devices metrics, either added by the blueprint definition or the addition using Device.add_metric(). See help(Device.add_metric) for more information about the definition of the metrics to be added Parameters ---------- only_new: boolean False To process or not the existing channels in the Device.readings that are defined in Device.metrics lmetrics: list None List of metrics to process. If none, processes all Returns ---------- boolean True if processed ok, False otherwise ''' process_ok = True if 'metrics' not in vars(self): std_out(f'Device {self.id} has nothing to process. Skipping', 'WARNING') return process_ok std_out('---------------------------') std_out(f'Processing device {self.id}') if lmetrics is None: metrics = self.metrics else: metrics = dict([(key, self.metrics[key]) for key in lmetrics]) for metric in metrics: std_out(f'Processing {metric}') if only_new and metric in self.readings: std_out(f'Skipping. Already in device') continue # Check if the metric contains a custom from_list if 'from_list' in metrics[metric]: lazy_name = metrics[metric]['from_list'] else: lazy_name = f"scdata.device.process.{metrics[metric]['process']}" try: funct = LazyCallable(lazy_name) except ModuleNotFoundError: print_exc() process_ok &= False std_out('Problem adding lazy callable to metrics list', 'ERROR') pass return False args, kwargs = list(), dict() if 'args' in metrics[metric]: args = metrics[metric]['args'] if 'kwargs' in metrics[metric]: kwargs = metrics[metric]['kwargs'] try: self.readings[metric] = funct(self.readings, *args, **kwargs) except KeyError: print_exc() std_out('Metric args not in dataframe', 'ERROR') pass if metric in self.readings: process_ok &= True if process_ok: # Latest postprocessing to latest readings if self.api_device.get_postprocessing_info() is not None: latest_postprocessing = localise_date( self.readings.index[-1], self.location).strftime('%Y-%m-%dT%H:%M:%S') self.api_device.postprocessing_info[ 'latest_postprocessing'] = latest_postprocessing return process_ok
def load(self, options=None, path=None, convert_units=True): ''' Loads the device with some options Parameters: ----------- options: dict() Default: None options['min_date'] = date to load data from Default to device min_date (from blueprint or test) options['max_date'] = date to load data to Default to device max_date (from blueprint or test) options['clean_na'] = clean na (drop_na, fill_na or None) Default to device clean_na (from blueprint or test) options['frequency'] = frequency to load data at in pandas format Default to device frequency (from blueprint or test) or '1Min' path: String Default: None Path were the csv file is, if any. Normally not needed to be provided, only for internal usage convert_units: bool Default: True Convert units for channels based on config._channel_lut Returns ---------- True if loaded correctly ''' # Add test overrides if we have them, otherwise set device defaults if options is not None: self.check_overrides(options) else: self.check_overrides() try: if self.source == 'csv': self.readings = self.readings.combine_first( read_csv_file(join(path, self.processed_data_file), self.location, self.options['frequency'], self.options['clean_na'], self.sources[self.source]['index'])) if self.readings is not None: self.__convert_names__() elif 'api' in self.source: # Get device location self.location = self.api_device.get_device_location() if path is None: if self.load_postprocessing_info(): # Override dates for post-processing if self.latest_postprocessing is not None: hw_latest_post = localise_date( self.latest_postprocessing, self.location) # Override min processing date self.options['min_date'] = hw_latest_post df = self.api_device.get_device_data( self.options['min_date'], self.options['max_date'], self.options['frequency'], self.options['clean_na']) # API Device is not aware of other csv index data, so make it here if 'csv' in self.sources and df is not None: df = df.reindex( df.index.rename(self.sources['csv']['index'])) # Combine it with readings if possible if df is not None: self.readings = self.readings.combine_first(df) else: # Cached case self.readings = self.readings.combine_first( read_csv_file(join(path, str(self.id) + '.csv'), self.location, self.options['frequency'], self.options['clean_na'], self.sources['csv']['index'])) except FileNotFoundError: # Handle error if 'api' in self.source: std_out( f'No cached data file found for device {self.id} in {path}. Moving on', 'WARNING') elif 'csv' in self.source: std_out(f'File not found for device {self.id} in {path}', 'ERROR') self.loaded = False except: print_exc() self.loaded = False else: if self.readings is not None: self.__check_sensors__() if self.load_postprocessing_info() is not None: self.__fill_metrics__() if not self.readings.empty: self.loaded = True if convert_units: self.__convert_units__() finally: return self.loaded
def dispersion_analysis(self, devices=None, min_date=None, max_date=None, timezone='Europe/Madrid', smooth_window=5): ''' Creates channels on a new dataframe for each device/channel combination, and makes the average/std of each in a point-by-point fashion Parameters: ----------- devices: list Default: None If list of devices is None, then it will use all devices in self.devices min_date: String Default: None Minimum date from which to perform the analysis max_date: String Default: None Maximum date from which to perform the analysis timezone: String Default: None Sensors for timezone smooth_window: int Default: 5 If not None, performs smoothing of the channels with rolling average. Returns: --------- ''' dispersion_df = DataFrame() # Get common channels for this group if devices is not None: common_ch = self.get_common_channels(devices=devices) else: if len(self.common_channels) == 0: self.get_common_channels() # Localise dates min_date = localise_date(min_date, timezone) max_date = localise_date(max_date, timezone) # Calculate the dispersion for the sensors present in the dataset warning = False for channel in common_ch: columns = list() if channel in config._dispersion['ignore_channels']: continue for device in devices: if channel in self.devices[device].readings.columns and len( self.devices[device].readings.loc[:, channel]) > 0: # Important to resample and bfill for unmatching measures if smooth_window is not None: # channel_new = self.devices[device].readings[channel].resample('1Min').bfill().rolling(window=smooth_window).mean() channel_new = self.devices[device].readings[channel].bfill( ).rolling(window=smooth_window).mean() dispersion_df[channel + '-' + device] = channel_new[channel_new > 0] else: dispersion_df[channel + '-' + device] = self.devices[ device].readings[channel].resample('1Min').bfill() columns.append(channel + '-' + device) else: std_out(f'Device {device} does not contain {channel}</p>', 'WARNING') warning = True dispersion_df.index = localise_date(dispersion_df.index, timezone) # Trim dataset to min and max dates (normally these tests are carried out with _minutes_ of differences) if min_date is not None: dispersion_df = dispersion_df[dispersion_df.index > min_date] if max_date is not None: dispersion_df = dispersion_df[dispersion_df.index < max_date] # Calculate Metrics dispersion_df[channel + '_AVG'] = dispersion_df.loc[:, columns].mean( skipna=True, axis=1) dispersion_df[channel + '_STD'] = dispersion_df.loc[:, columns].std( skipna=True, axis=1) if not warning: std_out(f'All devices have the provided channels list recorded') else: std_out(f'Missing channels, review data', 'WARNING') if devices is None: self.dispersion_df = dispersion_df return self.dispersion_summary group_dispersion_summary = dict() for channel in common_ch: if channel in config._dispersion['ignore_channels']: continue # Calculate group_dispersion_summary[channel] = dispersion_df[channel + '_STD'].mean() return group_dispersion_summary