def get_uuid_for_subj_id_airspeck_personal(subject_id, timestamp=None): # Look in Google storage for Airspeck file with that subject ID. # Extract the UUID from the first one which comes up. # Import here as it can clash with datastore imports # There was no file for this subject. Check the automatic upload client = get_datastore_client() filters = [('subject_id', '=', subject_id)] if timestamp is not None: filters.extend([('timestamp', '>=', timestamp - timedelta(hours=1)), ('timestamp', '<', timestamp + timedelta(hours=1))]) # Try manual upload first kind = 'MobileAirspeckManualUpload' query = client.query(kind=kind, filters=filters, order=['timestamp']) result = list(query.fetch(1)) if len(result) == 0: # If there was no data there, try automatic upload kind = 'MobileAirspeck' query = client.query(kind=kind, filters=filters, order=['timestamp']) result = list(query.fetch(1)) if len(result) > 0: uuid = result[0]['airspeck_uuid'].replace(':', '') if len(uuid) == 21: print("Extracted UUID {} (Length {})".format(uuid, len(uuid))) return uuid[5:] else: print("Extracted UUID {} (Length {})".format(uuid, len(uuid))) return uuid
def download_respeck_minute_from_datastore(subject_id, out_filepath, timeframe, project_name, upload_type='automatic'): assert upload_type in ['automatic', 'manual' ], "Data type has to be either automatic or manual" tz = timezone(project_mapping[project_name][1]) if timeframe[0].tzinfo is None: localised_start = tz.localize(timeframe[0]).astimezone( timezone('UTC')).replace(tzinfo=None) localised_end = tz.localize(timeframe[1]).astimezone( timezone('UTC')).replace(tzinfo=None) else: localised_start = timeframe[0].astimezone( timezone('UTC')).replace(tzinfo=None) localised_end = timeframe[1].astimezone( timezone('UTC')).replace(tzinfo=None) if upload_type == 'automatic': kind = 'RespeckAverage' else: kind = 'RespeckMinuteManualUpload' query = get_datastore_client().query(kind=kind, filters=[ ('subject_id', '=', subject_id), ('timestamp', '>=', localised_start), ('timestamp', '<', localised_end) ], order=['timestamp']).fetch() results = [dict(e) for e in query] data = pd.DataFrame(results, columns=[ 'timestamp', 'breathing_rate', 'sd_br', 'activity', 'act_type', 'step_count' ]) data = data.rename(columns={ 'activity': 'activity_level', 'act_type': 'activity_type' }) if len(data) > 0: data.loc[:, 'timestamp'] = data['timestamp'].dt.tz_localize(None) data.to_csv(out_filepath, index=False)
def download_airspeck_from_datastore(subject_id, out_filepath, project_name, timeframe, upload_type): assert upload_type == 'automatic', "Only automatic upload type implemented so far. Passed {}".format( upload_type) # assert upload_type in ['automatic', 'sd_card'], \ # "Upload type needs to be either automatic or sd_card. Download manual upload from Google storage." client = get_datastore_client() tz = timezone(project_mapping[project_name][1]) if timeframe[0].tzinfo is None: utc_start = tz.localize(timeframe[0]).astimezone( timezone('UTC')).replace(tzinfo=None) utc_end = tz.localize(timeframe[1]).astimezone( timezone('UTC')).replace(tzinfo=None) else: utc_start = timeframe[0] utc_end = timeframe[1] if upload_type == 'automatic': kind = 'MobileAirspeck' else: kind = 'MobileAirspeckSDCard' with open(out_filepath, "w") as out: out.write( "timestamp,pm1,pm2_5,pm10,bin0,bin1,bin2,bin3,bin4,bin5,bin6,bin7,bin8,bin9,bin10,bin11,bin12," "bin13,bin14,bin15,temperature,humidity,luxLevel,motion,battery,gpsLatitude,gpsLongitude," "gpsAccuracy\n") query = client.query(kind=kind, filters=[('subject_id', '=', subject_id), ('timestamp', '>=', utc_start), ('timestamp', '<', utc_end)], order=['timestamp']).fetch() for e in query: out.write("{},{},{},{},".format( e['timestamp'].replace(tzinfo=None), e['pm1'], e['pm2_5'], e['pm10'])) for i in range(0, 16): out.write("{},".format(e['bins'][i])) out.write("{},{},{},{},{},{},{},{}\n".format( e['temperature'], e['humidity'], e['lux'], e['motion'], e['battery'], e['location'].latitude, e['location'].longitude, e['gps_accuracy']))
def download_static_airspeck(subj_or_uuid, sensor_label=None, project_name=None, overwrite_if_already_exists=False, timeframe=None, upload_type='automatic', suffix_filename="", filename=None, subject_visit_number=None, out_directory=None): assert upload_type in [ 'automatic', 'sd_card' ], "upload_type has to be either 'automatic' or 'sd_card'" if project_name is None: if len(subj_or_uuid) == 6: project_name = get_project_for_subject(subj_or_uuid) else: raise ValueError( "When passing a UUID and not a subject ID, also specify a project_name so that the " "correct directory can be selected") if out_directory is None: out_directory = project_mapping[project_name][2] if sensor_label is None: if len(subj_or_uuid) == 6 and subject_visit_number is not None: sensor_label = "{}({})".format(subj_or_uuid, subject_visit_number) else: sensor_label = subj_or_uuid if filename is None: filename = "{}_static_airspeck_{}{}.csv".format( sensor_label, upload_type, suffix_filename) out_filepath = out_directory + filename if not overwrite_if_already_exists and os.path.isfile(out_filepath): print('Skipping file as it already exists') return client = get_datastore_client() with open(out_filepath, "w") as out: out.write( "timestamp,pm1,pm2_5,pm10,bin0,bin1,bin2,bin3,bin4,bin5,bin6,bin7,bin8,bin9,bin10,bin11,bin12," "bin13,bin14,bin15,temperature,humidity,battery,no2_ae,no2_we,ox_ae,ox_we," "gpsLatitude,gpsLongitude\n") # Did user pass timeframe? If not, load all data if timeframe is None: timeframe = [datetime(2016, 1, 1), datetime(2100, 1, 1)] tz = timezone(project_mapping[project_name][1]) if timeframe[0].tzinfo is None: utc_start = tz.localize(timeframe[0]).astimezone( timezone('UTC')).replace(tzinfo=None) utc_end = tz.localize(timeframe[1]).astimezone( timezone('UTC')).replace(tzinfo=None) else: utc_start = timeframe[0] utc_end = timeframe[1] if upload_type == 'automatic': kind_name = 'StaticAirspeck' if len(subj_or_uuid) == 16: id_name = 'uuid' else: id_name = 'subject_id' else: kind_name = 'StaticAirspeckSDCard' if len(subj_or_uuid) == 16: id_name = 'airspeck_uuid' else: id_name = 'subject_id' query = client.query(kind=kind_name, filters=[(id_name, '=', subj_or_uuid), ('timestamp', '>=', utc_start), ('timestamp', '<', utc_end)], order=['timestamp']).fetch() for e in query: out.write("{},{},{},{},".format( e['timestamp'].replace(tzinfo=None), e['pm1'], e['pm2_5'], e['pm10'])) for i in range(0, 16): out.write("{},".format(e['bins'][i])) if upload_type == 'automatic': out.write("{},{},{},{},{},{},{},{},{}\n".format( e['temperature'], e['humidity'], e['battery'], e['no2_ae'], e['no2_we'], e['ox_ae'], e['ox_we'], e['location']['latitude'], e['location']['longitude'])) else: out.write("{},{},{},{},{},{},{},{},{}\n".format( e['temperature'], e['humidity'], e['battery'], e['no2_ae'], e['no2_we'], e['ox_ae'], e['ox_we'], e['latitude'], e['longitude'])) print('Done')
def get_calibration_factors_airspeck(subj_or_uuid, to_be_calibrated_timestamp=None, project_name=None, calibrate_pm=False, calibrate_no2=False, calibrate_ox=False, country_name=None, use_all_features_pm=False, use_all_features_gas=True, calibration_id=None): if len(subj_or_uuid) == 6: uuid = get_uuid_for_subj_id_airspeck_personal( subj_or_uuid, timestamp=to_be_calibrated_timestamp) else: uuid = subj_or_uuid # Convert to_be_calibrated_timestamp into UTC tz = timezone(str(to_be_calibrated_timestamp.tzinfo)) results_pm = [] result_date = False if calibrate_pm: ##Query for PM filters = [('uuid', '=', uuid)] if country_name is not None: filters.append(('country_name', '=', country_name)) if calibration_id is not None: filters.append(('calibration_id', '=', calibration_id)) client = get_datastore_client() results_pm = list( client.query(kind='AirspeckCalibrationFactors', filters=filters, order=['-time_of_calibration']).fetch()) if len(results_pm) > 0 and not (len(results_pm) == 1 and results_pm[0]['calibration_id'] == '2019-11 EBAM IITD'): #if len(results_pm) == 1 and results_pm[0]['calibration_id'] != '2019-11 EBAM IITD': # result_pm = results_pm[0] #else: # Choose calibration factors which time of calibration was nearest to to_be_calibrated_timestamp result_to_use_idx = 0 time_difference = results_pm[0]['time_of_calibration'].astimezone( tz) - to_be_calibrated_timestamp if len(results_pm) > 1: for idx, result in enumerate(results_pm[1:]): if result['calibration_id'] == '2019-11 EBAM IITD': continue new_time_difference = result['time_of_calibration'].astimezone( tz) - to_be_calibrated_timestamp if abs(new_time_difference.days) < abs(time_difference.days): time_difference = new_time_difference result_to_use_idx = idx + 1 result_pm = results_pm[result_to_use_idx] result_date = result_pm['time_of_calibration'].astimezone(tz).date() print("--> Chose calibration factors (time difference {}): {}".format( result_pm['time_of_calibration'].astimezone(tz) - to_be_calibrated_timestamp, result_pm['calibration_id'])) low_humidity_factors_pm = get_from_entity_if_present( result_pm, 'low_humidity_factors_simple') if use_all_features_pm or low_humidity_factors_pm == None: print("Using calibration factors for all {} features".format( len(calibration_columns_pm))) low_humidity_factors_pm = get_from_entity_if_present( result_pm, 'low_humidity_factors_all') high_humidity_factors_pm = get_from_entity_if_present( result_pm, 'high_humidity_factors_all') else: print("Using simple calibration factors") low_humidity_factors_pm = get_from_entity_if_present( result_pm, 'low_humidity_factors_simple') high_humidity_factors_pm = get_from_entity_if_present( result_pm, 'high_humidity_factors_simple') humidity_threshold_pm = get_from_entity_if_present( result_pm, 'humidity_threshold') else: print("No PM calibration data available for sensor {}".format(uuid)) # Return "identity" factors if use_all_features_pm: low_humidity_factors_pm = np.append( [1.] * len(calibration_columns_pm), [0.]) high_humidity_factors_pm = np.append( [1.] * len(calibration_columns_pm), [0.]) else: low_humidity_factors_pm = [1., 0.] high_humidity_factors_pm = [1., 0.] humidity_threshold_pm = 1000. result_both_gases = [] if calibrate_ox or calibrate_no2: filters_both = [('uuid', '=', uuid)] filters_both.append(('calibrated_data_type', '=', 'both')) if country_name is not None: filters_both.append(('country_name', '=', country_name)) if calibration_id is not None: filters_both.append(('calibration_id', '=', calibration_id)) result_both_gases = list( client.query(kind='AirspeckCalibrationFactorsGas', filters=filters_both, order=['-time_of_calibration']).fetch()) results_no2 = [] if calibrate_no2: ## Query for NO2 filters_no2 = [('uuid', '=', uuid)] filters_no2.append(('calibrated_data_type', '=', 'no2')) #filters_no2.append(('calibrated_data_type', '=', 'both')) if country_name is not None: filters_no2.append(('country_name', '=', country_name)) if calibration_id is not None: filters_no2.append(('calibration_id', '=', calibration_id)) results_no2 = list( client.query(kind='AirspeckCalibrationFactorsGas', filters=filters_no2, order=['-time_of_calibration']).fetch()) results_no2 = results_no2 + result_both_gases if len(results_no2) > 0: if len(results_no2) == 1: result_no2 = results_no2[0] else: # Choose calibration factors which time of calibration was nearest to to_be_calibrated_timestamp result_to_use_idx = 0 time_difference = results_no2[0]['time_of_calibration'].astimezone( tz) - to_be_calibrated_timestamp if len(results_no2) > 1: for idx, result in enumerate(results_no2[1:]): new_time_difference = result[ 'time_of_calibration'].astimezone( tz) - to_be_calibrated_timestamp if abs(new_time_difference.days) < abs( time_difference.days): time_difference = new_time_difference result_to_use_idx = idx + 1 result_no2 = results_no2[result_to_use_idx] result_date = result_no2['time_of_calibration'].astimezone( tz).date() print("--> Chose calibration factors (time difference {}): {}".format( result_no2['time_of_calibration'].astimezone(tz) - to_be_calibrated_timestamp, result_no2['calibration_id'])) if use_all_features_gas: print("Using gas calibration factors for all {} features for no2.". format(len(calibration_columns_no2))) low_humidty_factors_no2 = get_from_entity_if_present( result_no2, 'low_humidity_factors_all_no2') high_humidity_factors_no2 = get_from_entity_if_present( result_no2, 'high_humidity_factors_all_no2') else: print("Using simple gas calibration factors for no2") low_humidty_factors_no2 = get_from_entity_if_present( result_no2, 'low_humidity_factors_simple_no2') high_humidity_factors_no2 = get_from_entity_if_present( result_no2, 'high_humidity_factors_simple_no2') humidity_threshold_gas = get_from_entity_if_present( result_no2, 'humidity_threshold') else: print("No NO2 calibration data available for sensor {}".format(uuid)) # Return np.nan, as the uncalibrated gas data isn't usable # 1000 is set as humidity threshold which is never reached, # i.e. all will be calibrated with the low humidity factors low_humidty_factors_no2 = [] high_humidity_factors_no2 = [] humidity_threshold_gas = 1000. results_ox = [] if calibrate_ox: ## Query for OX filters_ox = [('uuid', '=', uuid)] filters_ox.append(('calibrated_data_type', '=', 'ox')) if country_name is not None: filters_ox.append(('country_name', '=', country_name)) if calibration_id is not None: filters_ox.append(('calibration_id', '=', calibration_id)) results_ox = list( client.query(kind='AirspeckCalibrationFactorsGas', filters=filters_ox, order=['-time_of_calibration']).fetch()) results_ox = results_ox + result_both_gases if len(results_ox) > 0: if len(results_ox) == 1: result_ox = results_ox[0] else: # Choose calibration factors which time of calibration was nearest to to_be_calibrated_timestamp result_to_use_idx = 0 time_difference = results_ox[0]['time_of_calibration'].astimezone( tz) - to_be_calibrated_timestamp if len(results_ox) > 1: for idx, result in enumerate(results_ox[1:]): new_time_difference = result[ 'time_of_calibration'].astimezone( tz) - to_be_calibrated_timestamp if abs(new_time_difference.days) < abs( time_difference.days): time_difference = new_time_difference result_to_use_idx = idx + 1 result_ox = results_ox[result_to_use_idx] result_date = result_ox['time_of_calibration'].astimezone( tz).date() print("--> Chose calibration factors (time difference {}): {}".format( result_ox['time_of_calibration'].astimezone(tz) - to_be_calibrated_timestamp, result_ox['calibration_id'])) if use_all_features_gas: print("Using gas calibration factors for all {} features for ox.". format(len(calibration_columns_ox))) low_humidity_factors_ox = get_from_entity_if_present( result_ox, 'low_humidity_factors_all_ox') high_humidity_factors_ox = get_from_entity_if_present( result_ox, 'high_humidity_factors_all_ox') else: print("Using simple gas calibration factors for ox") low_humidity_factors_ox = get_from_entity_if_present( result_ox, 'low_humidity_factors_simple_ox') high_humidity_factors_ox = get_from_entity_if_present( result_ox, 'high_humidity_factors_simple_ox') humidity_threshold_gas = get_from_entity_if_present( result_ox, 'humidity_threshold') else: print("No OX calibration data available for sensor {}".format(uuid)) # Return np.nan, as the uncalibrated gas data isn't usable # 1000 is set as humidity threshold which is never reached, # i.e. all will be calibrated with the low humidity factors low_humidity_factors_ox = [] high_humidity_factors_ox = [] humidity_threshold_gas = 1000. return result_date, [low_humidity_factors_pm, high_humidity_factors_pm, humidity_threshold_pm, low_humidty_factors_no2, \ high_humidity_factors_no2, low_humidity_factors_ox, high_humidity_factors_ox, humidity_threshold_gas]