示例#1
0
def get_uuid_for_subj_id_airspeck_personal(subject_id, timestamp=None):
    # Look in Google storage for Airspeck file with that subject ID.
    # Extract the UUID from the first one which comes up.
    # Import here as it can clash with datastore imports

    # There was no file for this subject. Check the automatic upload
    client = get_datastore_client()
    filters = [('subject_id', '=', subject_id)]
    if timestamp is not None:
        filters.extend([('timestamp', '>=', timestamp - timedelta(hours=1)),
                        ('timestamp', '<', timestamp + timedelta(hours=1))])

    # Try manual upload first
    kind = 'MobileAirspeckManualUpload'
    query = client.query(kind=kind, filters=filters, order=['timestamp'])
    result = list(query.fetch(1))

    if len(result) == 0:
        # If there was no data there, try automatic upload
        kind = 'MobileAirspeck'
        query = client.query(kind=kind, filters=filters, order=['timestamp'])
        result = list(query.fetch(1))

    if len(result) > 0:
        uuid = result[0]['airspeck_uuid'].replace(':', '')
        if len(uuid) == 21:
            print("Extracted UUID {} (Length {})".format(uuid, len(uuid)))
            return uuid[5:]
        else:
            print("Extracted UUID {} (Length {})".format(uuid, len(uuid)))
            return uuid
示例#2
0
def download_respeck_minute_from_datastore(subject_id,
                                           out_filepath,
                                           timeframe,
                                           project_name,
                                           upload_type='automatic'):
    assert upload_type in ['automatic', 'manual'
                           ], "Data type has to be either automatic or manual"

    tz = timezone(project_mapping[project_name][1])

    if timeframe[0].tzinfo is None:
        localised_start = tz.localize(timeframe[0]).astimezone(
            timezone('UTC')).replace(tzinfo=None)
        localised_end = tz.localize(timeframe[1]).astimezone(
            timezone('UTC')).replace(tzinfo=None)
    else:
        localised_start = timeframe[0].astimezone(
            timezone('UTC')).replace(tzinfo=None)
        localised_end = timeframe[1].astimezone(
            timezone('UTC')).replace(tzinfo=None)

    if upload_type == 'automatic':
        kind = 'RespeckAverage'
    else:
        kind = 'RespeckMinuteManualUpload'

    query = get_datastore_client().query(kind=kind,
                                         filters=[
                                             ('subject_id', '=', subject_id),
                                             ('timestamp', '>=',
                                              localised_start),
                                             ('timestamp', '<', localised_end)
                                         ],
                                         order=['timestamp']).fetch()

    results = [dict(e) for e in query]

    data = pd.DataFrame(results,
                        columns=[
                            'timestamp', 'breathing_rate', 'sd_br', 'activity',
                            'act_type', 'step_count'
                        ])
    data = data.rename(columns={
        'activity': 'activity_level',
        'act_type': 'activity_type'
    })

    if len(data) > 0:
        data.loc[:, 'timestamp'] = data['timestamp'].dt.tz_localize(None)
        data.to_csv(out_filepath, index=False)
示例#3
0
def download_airspeck_from_datastore(subject_id, out_filepath, project_name,
                                     timeframe, upload_type):
    assert upload_type == 'automatic', "Only automatic upload type implemented so far. Passed {}".format(
        upload_type)
    # assert upload_type in ['automatic', 'sd_card'], \
    #    "Upload type needs to be either automatic or sd_card. Download manual upload from Google storage."

    client = get_datastore_client()

    tz = timezone(project_mapping[project_name][1])

    if timeframe[0].tzinfo is None:
        utc_start = tz.localize(timeframe[0]).astimezone(
            timezone('UTC')).replace(tzinfo=None)
        utc_end = tz.localize(timeframe[1]).astimezone(
            timezone('UTC')).replace(tzinfo=None)
    else:
        utc_start = timeframe[0]
        utc_end = timeframe[1]

    if upload_type == 'automatic':
        kind = 'MobileAirspeck'
    else:
        kind = 'MobileAirspeckSDCard'

    with open(out_filepath, "w") as out:
        out.write(
            "timestamp,pm1,pm2_5,pm10,bin0,bin1,bin2,bin3,bin4,bin5,bin6,bin7,bin8,bin9,bin10,bin11,bin12,"
            "bin13,bin14,bin15,temperature,humidity,luxLevel,motion,battery,gpsLatitude,gpsLongitude,"
            "gpsAccuracy\n")

        query = client.query(kind=kind,
                             filters=[('subject_id', '=', subject_id),
                                      ('timestamp', '>=', utc_start),
                                      ('timestamp', '<', utc_end)],
                             order=['timestamp']).fetch()

        for e in query:
            out.write("{},{},{},{},".format(
                e['timestamp'].replace(tzinfo=None), e['pm1'], e['pm2_5'],
                e['pm10']))
            for i in range(0, 16):
                out.write("{},".format(e['bins'][i]))
            out.write("{},{},{},{},{},{},{},{}\n".format(
                e['temperature'], e['humidity'], e['lux'], e['motion'],
                e['battery'], e['location'].latitude, e['location'].longitude,
                e['gps_accuracy']))
示例#4
0
def download_static_airspeck(subj_or_uuid,
                             sensor_label=None,
                             project_name=None,
                             overwrite_if_already_exists=False,
                             timeframe=None,
                             upload_type='automatic',
                             suffix_filename="",
                             filename=None,
                             subject_visit_number=None,
                             out_directory=None):
    assert upload_type in [
        'automatic', 'sd_card'
    ], "upload_type has to be either 'automatic' or 'sd_card'"

    if project_name is None:
        if len(subj_or_uuid) == 6:
            project_name = get_project_for_subject(subj_or_uuid)
        else:
            raise ValueError(
                "When passing a UUID and not a subject ID, also specify a project_name so that the "
                "correct directory can be selected")

    if out_directory is None:
        out_directory = project_mapping[project_name][2]

    if sensor_label is None:
        if len(subj_or_uuid) == 6 and subject_visit_number is not None:
            sensor_label = "{}({})".format(subj_or_uuid, subject_visit_number)
        else:
            sensor_label = subj_or_uuid

    if filename is None:
        filename = "{}_static_airspeck_{}{}.csv".format(
            sensor_label, upload_type, suffix_filename)

    out_filepath = out_directory + filename

    if not overwrite_if_already_exists and os.path.isfile(out_filepath):
        print('Skipping file as it already exists')
        return

    client = get_datastore_client()

    with open(out_filepath, "w") as out:

        out.write(
            "timestamp,pm1,pm2_5,pm10,bin0,bin1,bin2,bin3,bin4,bin5,bin6,bin7,bin8,bin9,bin10,bin11,bin12,"
            "bin13,bin14,bin15,temperature,humidity,battery,no2_ae,no2_we,ox_ae,ox_we,"
            "gpsLatitude,gpsLongitude\n")

        # Did user pass timeframe? If not, load all data
        if timeframe is None:
            timeframe = [datetime(2016, 1, 1), datetime(2100, 1, 1)]

        tz = timezone(project_mapping[project_name][1])

        if timeframe[0].tzinfo is None:
            utc_start = tz.localize(timeframe[0]).astimezone(
                timezone('UTC')).replace(tzinfo=None)
            utc_end = tz.localize(timeframe[1]).astimezone(
                timezone('UTC')).replace(tzinfo=None)
        else:
            utc_start = timeframe[0]
            utc_end = timeframe[1]

        if upload_type == 'automatic':
            kind_name = 'StaticAirspeck'
            if len(subj_or_uuid) == 16:
                id_name = 'uuid'
            else:
                id_name = 'subject_id'
        else:
            kind_name = 'StaticAirspeckSDCard'
            if len(subj_or_uuid) == 16:
                id_name = 'airspeck_uuid'
            else:
                id_name = 'subject_id'

        query = client.query(kind=kind_name,
                             filters=[(id_name, '=', subj_or_uuid),
                                      ('timestamp', '>=', utc_start),
                                      ('timestamp', '<', utc_end)],
                             order=['timestamp']).fetch()

        for e in query:
            out.write("{},{},{},{},".format(
                e['timestamp'].replace(tzinfo=None), e['pm1'], e['pm2_5'],
                e['pm10']))
            for i in range(0, 16):
                out.write("{},".format(e['bins'][i]))
            if upload_type == 'automatic':
                out.write("{},{},{},{},{},{},{},{},{}\n".format(
                    e['temperature'], e['humidity'], e['battery'], e['no2_ae'],
                    e['no2_we'], e['ox_ae'], e['ox_we'],
                    e['location']['latitude'], e['location']['longitude']))
            else:
                out.write("{},{},{},{},{},{},{},{},{}\n".format(
                    e['temperature'], e['humidity'], e['battery'], e['no2_ae'],
                    e['no2_we'], e['ox_ae'], e['ox_we'], e['latitude'],
                    e['longitude']))

    print('Done')
示例#5
0
def get_calibration_factors_airspeck(subj_or_uuid,
                                     to_be_calibrated_timestamp=None,
                                     project_name=None,
                                     calibrate_pm=False,
                                     calibrate_no2=False,
                                     calibrate_ox=False,
                                     country_name=None,
                                     use_all_features_pm=False,
                                     use_all_features_gas=True,
                                     calibration_id=None):
    if len(subj_or_uuid) == 6:
        uuid = get_uuid_for_subj_id_airspeck_personal(
            subj_or_uuid, timestamp=to_be_calibrated_timestamp)
    else:
        uuid = subj_or_uuid

    # Convert to_be_calibrated_timestamp into UTC
    tz = timezone(str(to_be_calibrated_timestamp.tzinfo))
    results_pm = []
    result_date = False

    if calibrate_pm:
        ##Query for PM
        filters = [('uuid', '=', uuid)]
        if country_name is not None:
            filters.append(('country_name', '=', country_name))
        if calibration_id is not None:
            filters.append(('calibration_id', '=', calibration_id))

        client = get_datastore_client()
        results_pm = list(
            client.query(kind='AirspeckCalibrationFactors',
                         filters=filters,
                         order=['-time_of_calibration']).fetch())

    if len(results_pm) > 0 and not (len(results_pm) == 1
                                    and results_pm[0]['calibration_id']
                                    == '2019-11 EBAM IITD'):
        #if len(results_pm) == 1 and results_pm[0]['calibration_id'] != '2019-11 EBAM IITD':
        #    result_pm = results_pm[0]
        #else:
        # Choose calibration factors which time of calibration was nearest to to_be_calibrated_timestamp
        result_to_use_idx = 0
        time_difference = results_pm[0]['time_of_calibration'].astimezone(
            tz) - to_be_calibrated_timestamp

        if len(results_pm) > 1:
            for idx, result in enumerate(results_pm[1:]):
                if result['calibration_id'] == '2019-11 EBAM IITD':
                    continue

                new_time_difference = result['time_of_calibration'].astimezone(
                    tz) - to_be_calibrated_timestamp
                if abs(new_time_difference.days) < abs(time_difference.days):
                    time_difference = new_time_difference
                    result_to_use_idx = idx + 1

        result_pm = results_pm[result_to_use_idx]
        result_date = result_pm['time_of_calibration'].astimezone(tz).date()

        print("--> Chose calibration factors (time difference {}): {}".format(
            result_pm['time_of_calibration'].astimezone(tz) -
            to_be_calibrated_timestamp, result_pm['calibration_id']))

        low_humidity_factors_pm = get_from_entity_if_present(
            result_pm, 'low_humidity_factors_simple')

        if use_all_features_pm or low_humidity_factors_pm == None:
            print("Using calibration factors for all {} features".format(
                len(calibration_columns_pm)))
            low_humidity_factors_pm = get_from_entity_if_present(
                result_pm, 'low_humidity_factors_all')
            high_humidity_factors_pm = get_from_entity_if_present(
                result_pm, 'high_humidity_factors_all')
        else:
            print("Using simple calibration factors")
            low_humidity_factors_pm = get_from_entity_if_present(
                result_pm, 'low_humidity_factors_simple')
            high_humidity_factors_pm = get_from_entity_if_present(
                result_pm, 'high_humidity_factors_simple')
        humidity_threshold_pm = get_from_entity_if_present(
            result_pm, 'humidity_threshold')
    else:
        print("No PM calibration data available for sensor {}".format(uuid))
        # Return "identity" factors
        if use_all_features_pm:
            low_humidity_factors_pm = np.append(
                [1.] * len(calibration_columns_pm), [0.])
            high_humidity_factors_pm = np.append(
                [1.] * len(calibration_columns_pm), [0.])
        else:
            low_humidity_factors_pm = [1., 0.]
            high_humidity_factors_pm = [1., 0.]
        humidity_threshold_pm = 1000.

    result_both_gases = []

    if calibrate_ox or calibrate_no2:
        filters_both = [('uuid', '=', uuid)]
        filters_both.append(('calibrated_data_type', '=', 'both'))
        if country_name is not None:
            filters_both.append(('country_name', '=', country_name))
        if calibration_id is not None:
            filters_both.append(('calibration_id', '=', calibration_id))

        result_both_gases = list(
            client.query(kind='AirspeckCalibrationFactorsGas',
                         filters=filters_both,
                         order=['-time_of_calibration']).fetch())

    results_no2 = []
    if calibrate_no2:
        ## Query for NO2
        filters_no2 = [('uuid', '=', uuid)]
        filters_no2.append(('calibrated_data_type', '=', 'no2'))
        #filters_no2.append(('calibrated_data_type', '=', 'both'))
        if country_name is not None:
            filters_no2.append(('country_name', '=', country_name))
        if calibration_id is not None:
            filters_no2.append(('calibration_id', '=', calibration_id))

        results_no2 = list(
            client.query(kind='AirspeckCalibrationFactorsGas',
                         filters=filters_no2,
                         order=['-time_of_calibration']).fetch())

        results_no2 = results_no2 + result_both_gases

    if len(results_no2) > 0:

        if len(results_no2) == 1:
            result_no2 = results_no2[0]
        else:
            # Choose calibration factors which time of calibration was nearest to to_be_calibrated_timestamp
            result_to_use_idx = 0
            time_difference = results_no2[0]['time_of_calibration'].astimezone(
                tz) - to_be_calibrated_timestamp

            if len(results_no2) > 1:
                for idx, result in enumerate(results_no2[1:]):

                    new_time_difference = result[
                        'time_of_calibration'].astimezone(
                            tz) - to_be_calibrated_timestamp
                    if abs(new_time_difference.days) < abs(
                            time_difference.days):
                        time_difference = new_time_difference
                        result_to_use_idx = idx + 1

            result_no2 = results_no2[result_to_use_idx]
            result_date = result_no2['time_of_calibration'].astimezone(
                tz).date()

        print("--> Chose calibration factors (time difference {}): {}".format(
            result_no2['time_of_calibration'].astimezone(tz) -
            to_be_calibrated_timestamp, result_no2['calibration_id']))

        if use_all_features_gas:
            print("Using gas calibration factors for all {} features for no2.".
                  format(len(calibration_columns_no2)))
            low_humidty_factors_no2 = get_from_entity_if_present(
                result_no2, 'low_humidity_factors_all_no2')
            high_humidity_factors_no2 = get_from_entity_if_present(
                result_no2, 'high_humidity_factors_all_no2')
        else:
            print("Using simple gas calibration factors for no2")
            low_humidty_factors_no2 = get_from_entity_if_present(
                result_no2, 'low_humidity_factors_simple_no2')
            high_humidity_factors_no2 = get_from_entity_if_present(
                result_no2, 'high_humidity_factors_simple_no2')
        humidity_threshold_gas = get_from_entity_if_present(
            result_no2, 'humidity_threshold')

    else:
        print("No NO2 calibration data available for sensor {}".format(uuid))
        # Return np.nan, as the uncalibrated gas data isn't usable
        # 1000 is set as humidity threshold which is never reached,
        # i.e. all will be calibrated with the low humidity factors
        low_humidty_factors_no2 = []
        high_humidity_factors_no2 = []
        humidity_threshold_gas = 1000.

    results_ox = []
    if calibrate_ox:
        ## Query for OX
        filters_ox = [('uuid', '=', uuid)]
        filters_ox.append(('calibrated_data_type', '=', 'ox'))
        if country_name is not None:
            filters_ox.append(('country_name', '=', country_name))
        if calibration_id is not None:
            filters_ox.append(('calibration_id', '=', calibration_id))

        results_ox = list(
            client.query(kind='AirspeckCalibrationFactorsGas',
                         filters=filters_ox,
                         order=['-time_of_calibration']).fetch())

        results_ox = results_ox + result_both_gases

    if len(results_ox) > 0:

        if len(results_ox) == 1:
            result_ox = results_ox[0]
        else:
            # Choose calibration factors which time of calibration was nearest to to_be_calibrated_timestamp
            result_to_use_idx = 0
            time_difference = results_ox[0]['time_of_calibration'].astimezone(
                tz) - to_be_calibrated_timestamp

            if len(results_ox) > 1:
                for idx, result in enumerate(results_ox[1:]):

                    new_time_difference = result[
                        'time_of_calibration'].astimezone(
                            tz) - to_be_calibrated_timestamp
                    if abs(new_time_difference.days) < abs(
                            time_difference.days):
                        time_difference = new_time_difference
                        result_to_use_idx = idx + 1

            result_ox = results_ox[result_to_use_idx]
            result_date = result_ox['time_of_calibration'].astimezone(
                tz).date()

        print("--> Chose calibration factors (time difference {}): {}".format(
            result_ox['time_of_calibration'].astimezone(tz) -
            to_be_calibrated_timestamp, result_ox['calibration_id']))

        if use_all_features_gas:
            print("Using gas calibration factors for all {} features for ox.".
                  format(len(calibration_columns_ox)))
            low_humidity_factors_ox = get_from_entity_if_present(
                result_ox, 'low_humidity_factors_all_ox')
            high_humidity_factors_ox = get_from_entity_if_present(
                result_ox, 'high_humidity_factors_all_ox')
        else:
            print("Using simple gas calibration factors for ox")
            low_humidity_factors_ox = get_from_entity_if_present(
                result_ox, 'low_humidity_factors_simple_ox')
            high_humidity_factors_ox = get_from_entity_if_present(
                result_ox, 'high_humidity_factors_simple_ox')
        humidity_threshold_gas = get_from_entity_if_present(
            result_ox, 'humidity_threshold')

    else:
        print("No OX calibration data available for sensor {}".format(uuid))
        # Return np.nan, as the uncalibrated gas data isn't usable
        # 1000 is set as humidity threshold which is never reached,
        # i.e. all will be calibrated with the low humidity factors
        low_humidity_factors_ox = []
        high_humidity_factors_ox = []
        humidity_threshold_gas = 1000.

    return result_date, [low_humidity_factors_pm, high_humidity_factors_pm, humidity_threshold_pm, low_humidty_factors_no2, \
           high_humidity_factors_no2, low_humidity_factors_ox, high_humidity_factors_ox, humidity_threshold_gas]