def get_resampled_data(subject_id, visit_number):
    calibration_date_pers, is_calibrated_pm_pers, airspeck_raw = load_personal_airspeck_file(
        subject_id,
        subject_visit_number=visit_number,
        project_name='peeps',
        upload_type='manual',
        is_minute_averaged=False,
        calibrate_pm_and_gas=False,
        return_calibration_flag=False)
    # If upload type is automatic -- Change to 'gpsLatitude':'gpsLongitude'
    #    airspeck_raw['gpsAccuracy'] = pd.to_numeric(airspeck_raw['gpsAccuracy'])

    airspeck_raw.loc[airspeck_raw['gpsAccuracy'] > 1000,
                     'gpsLongitude':'gpsLatitude'] = np.nan
    airspeck_raw.loc[airspeck_raw['gpsLatitude'] < 28.4,
                     'gpsLongitude':'gpsLatitude'] = np.nan
    airspeck_raw.loc[airspeck_raw['gpsLatitude'] > 28.9,
                     'gpsLongitude':'gpsLatitude'] = np.nan
    airspeck_raw.loc[airspeck_raw['gpsLongitude'] < 76.8,
                     'gpsLongitude':'gpsLatitude'] = np.nan
    airspeck_raw.loc[airspeck_raw['gpsLongitude'] > 77.6,
                     'gpsLongitude':'gpsLatitude'] = np.nan

    airspeck = airspeck_raw.resample('1min').mean()

    return calibration_date_pers, is_calibrated_pm_pers, airspeck
示例#2
0
def get_resampled_data(subject_id):
    calibration_date_pers, is_calibrated_pm_pers, airspeck_raw = load_personal_airspeck_file(subject_id, 'british-heart',    upload_type='automatic', calibrate_pm_and_gas=True, return_calibration_flag=True)
    #airspeck_raw = load_personal_airspeck_file(subject_id, project_name='british-heart', upload_type='automatic')

    airspeck_raw['gpsAccuracy'] = pd.to_numeric(airspeck_raw['gpsAccuracy'])
    
    airspeck_raw.loc[airspeck_raw['gpsAccuracy'] > 1000, 'gpsLatitude':'gpsLongitude'] = np.nan
    airspeck_raw.loc[airspeck_raw['gpsLatitude'] < 49.88, 'gpsLatitude':'gpsLongitude'] = np.nan
    airspeck_raw.loc[airspeck_raw['gpsLatitude'] > 55.79, 'gpsLatitude':'gpsLongitude'] = np.nan
    airspeck_raw.loc[airspeck_raw['gpsLongitude'] < -5.9, 'gpsLatitude':'gpsLongitude'] = np.nan
    airspeck_raw.loc[airspeck_raw['gpsLongitude'] > 1.8, 'gpsLatitude':'gpsLongitude'] = np.nan
    
    airspeck = airspeck_raw.resample('1min').mean()
    
    return airspeck
示例#3
0
def plot_philap_report_graphs_for_subject(subject_id, graphs_dir):
    # Download raw data if not present
    participant_details = load_philap_participant_details()

    try:
        airspeck_raw = load_personal_airspeck_file(subject_id,
                                                   upload_type='manual',
                                                   is_minute_averaged=False)
        airspeck = airspeck_raw.resample('1min').mean()
        respeck = load_respeck_file(subject_id, upload_type='manual')
    except:
        print(
            "Please download all Peeps data via download_all_philap_data(raw_airspeck=True) "
            "before calling this function")

    # Delete incorrect GPS. These coordinates are just outside the larger area of Delhi
    airspeck_raw.loc[airspeck_raw['gpsAccuracy'] > 1000,
                     'gpsLongitude':'gpsLatitude'] = np.nan
    airspeck_raw.loc[airspeck_raw['gpsLatitude'] < 10,
                     'gpsLongitude':'gpsLatitude'] = np.nan
    airspeck_raw.loc[airspeck_raw['gpsLatitude'] > 40,
                     'gpsLongitude':'gpsLatitude'] = np.nan
    airspeck_raw.loc[airspeck_raw['gpsLongitude'] < 10,
                     'gpsLongitude':'gpsLatitude'] = np.nan
    airspeck_raw.loc[airspeck_raw['gpsLongitude'] > 80,
                     'gpsLongitude':'gpsLatitude'] = np.nan

    home_gps = airspeck.loc[(1 < airspeck.index.hour)
                            & (airspeck.index.hour <= 3)].mean()
    # If there was no personal data during the night, fall back on the GPS coordinates the researchers provided
    if pd.isnull(home_gps['gpsLatitude']):
        home_gps = get_home_gps_for_subject(subject_id, participant_details)

    # Select locations near home
    radius_home = 0.002
    correction_factor = airspeck['gpsAccuracy'] * 0.00001
    home_mask = (np.abs(airspeck['gpsLatitude'] - home_gps['gpsLatitude']) < radius_home + correction_factor) & \
                (np.abs(airspeck['gpsLongitude'] - home_gps['gpsLongitude']) < radius_home + correction_factor)

    ##################################
    # Draw detailed exposure plot
    ##################################
    sns.set_style('darkgrid', {'xtick.bottom': True, 'xtick.major.size': 5})

    fig, ax = plt.subplots(figsize=(15, 5))

    if np.count_nonzero(home_mask) > 0:
        for ts in airspeck.loc[home_mask].index:
            ax.axvspan(ts,
                       ts + pd.DateOffset(minutes=1),
                       facecolor=CB_color_cycle[0],
                       alpha=0.3,
                       zorder=1)

    ax.scatter(airspeck.index, airspeck['pm2_5'], s=2, color='black', zorder=2)

    # Plot stationary airspeck home
    home_airspeck = load_static_airspeck_file(subject_id,
                                              suffix_filename='_home')
    if home_airspeck is not None and len(home_airspeck) > 0:
        ax.scatter(home_airspeck.index,
                   home_airspeck['pm2_5'],
                   s=2,
                   color='blue')

    ax.set_ylabel("PM2.5 (μg/m³)")

    start = airspeck.index[0].replace(hour=0, minute=0, second=0)
    end = airspeck.index[-1].replace(hour=0, minute=0,
                                     second=0) + pd.DateOffset(days=1)
    ax.set_xlim(start, end)

    formatter = mdates.DateFormatter('%d.%m %Hh',
                                     tz=dateutil.tz.gettz(
                                         project_mapping['philap'][1]))

    ax.xaxis.set_major_formatter(formatter)

    ax.set_title(
        "Continuous PM2.5 personal exposure levels and ambient concentrations")
    fig.autofmt_xdate()

    home_patch = mpatches.Patch(color=CB_color_cycle[0],
                                label='Home',
                                alpha=0.3)

    airs_home_patch = Line2D(range(1),
                             range(1),
                             marker='o',
                             color='#00000000',
                             markerfacecolor="blue",
                             label='Home sensor')
    airp_patch = Line2D(range(1),
                        range(1),
                        marker='o',
                        color='#00000000',
                        markerfacecolor="black",
                        label='Personal sensor')
    plt.legend(handles=[home_patch, airp_patch, airs_home_patch])

    plt.tight_layout()
    plt.savefig(graphs_dir + "{}_detailed_exposure.png".format(subject_id),
                dpi=300)
    plt.show()

    ##################################
    # Draw summary bar graph
    ##################################
    sns.set_style('darkgrid', {'xtick.bottom': False, 'xtick.major.size': 0.0})
    home = airspeck.loc[home_mask, 'pm2_5'].mean()
    other = airspeck.loc[~home_mask, 'pm2_5'].mean()
    overall = airspeck['pm2_5'].mean()
    if home_airspeck is not None:
        home_ambient = home_airspeck['pm2_5'].mean()
    else:
        home_ambient = np.nan

    mean_values = [home, other, overall, home_ambient]

    fig, ax = plt.subplots(figsize=(8, 5))
    ax.set_title(
        "Mean PM2.5 personal exposure levels and ambient concentrations")
    ax.bar(np.arange(len(mean_values)),
           mean_values,
           width=0.5,
           color=CB_color_cycle,
           edgecolor="none")
    ax.set_ylabel("PM2.5 (μg/m³)")
    ax.set_xlim(-0.5, len(mean_values) - 0.5)
    plt.xticks(np.arange(len(mean_values)), [
        "Home\npersonal", "Other\npersonal", "Overall\npersonal",
        "Home\nambient"
    ])
    plt.savefig(graphs_dir +
                "{}_mean_exposure.png".format(subject_id, subject_id),
                dpi=300)
    plt.show()

    ##################################
    # Draw map
    ##################################
    get_maps_image(airspeck_raw,
                   graphs_dir + "{}_airspeck_map.png".format(subject_id),
                   zoom=13)

    ##################################
    # Other statistics
    ##################################
    # Create new empty file
    open(graphs_dir + "{}_stats.txt".format(subject_id), 'w').close()

    # Append stats to this file
    with open(graphs_dir + "{}_stats.txt".format(subject_id), 'a') as f:
        f.write("Step count: {}\n".format(respeck['step_count'].sum()))
        f.write(
            "Mean breathing rate during night: {:.2f} breaths per minute\n".
            format(respeck.loc[(0 < respeck.index.hour) &
                               (respeck.index.hour < 6),
                               'breathing_rate'].mean()))
        f.write("Mean breathing rate during day: {:.2f} breaths per minute\n".
                format(respeck.loc[(6 <= respeck.index.hour) &
                                   (respeck.index.hour <= 23),
                                   'breathing_rate'].mean()))

        f.write("\nStart of recording: {}\n".format(
            airspeck.index[0].replace(tzinfo=None)))
        f.write("End of recording: {}\n".format(
            airspeck.index[-1].replace(tzinfo=None)))
        f.write("Total duration: {}\n".format(airspeck.index[-1] -
                                              airspeck.index[0]))

        f.write("Total recording time at home: {:.1f} h\n".format(
            np.count_nonzero(home_mask) / 60.))
def create_dublin_pixelgram_for_subject(
        subject_id, overwrite_pixelgram_if_already_exists=False):
    download_respeck_and_personal_airspeck_data(subject_id,
                                                upload_type='manual')
    respeck_data = load_respeck_file(subject_id, upload_type='manual')
    airspeck_data = load_personal_airspeck_file(subject_id,
                                                upload_type='manual')

    # Load correction factors for timezone
    corrections = pd.read_excel(dublin_timezones_correction_filepath).replace(
        np.nan, 0).set_index('subject_id')

    participant_details = load_dublin_participant_details()
    row = participant_details.loc[subject_id]

    # Load exposure period
    from_time = row['start_of_exposure_time_to_shs']
    to_time = row['end_of_exposure_time_to_shs']
    start_exposure = row['date_of_exposure_to_shs'].replace(
        hour=from_time.hour, minute=from_time.minute,
        second=from_time.second).to_pydatetime() + timedelta(
            hours=int(corrections.loc[subject_id, 'shs_times_difference']))
    if not pd.isnull(row['end_date_of_exposure_to_shs']):
        end_exposure = row['end_date_of_exposure_to_shs'].replace(
            hour=to_time.hour, minute=to_time.minute,
            second=to_time.second).to_pydatetime() + timedelta(
                hours=int(corrections.loc[subject_id, 'shs_times_difference']))
    else:
        end_exposure = row['date_of_exposure_to_shs'].replace(
            hour=to_time.hour, minute=to_time.minute,
            second=to_time.second).to_pydatetime() + timedelta(
                hours=int(corrections.loc[subject_id, 'shs_times_difference']))

    # Load recording period
    from_time = row['start_time_of_monitoring']
    start_recording = row['start_date_of_monitoring'].replace(
        hour=from_time.hour, minute=from_time.minute,
        second=from_time.second).to_pydatetime() + timedelta(
            hours=int(corrections.loc[subject_id,
                                      'recording_times_difference']))

    to_time = row['end_time_of_monitoring']
    end_recording = row['end_date_of_monitoring'].replace(
        hour=to_time.hour, minute=to_time.minute,
        second=to_time.second).to_pydatetime() + timedelta(
            hours=int(corrections.loc[subject_id,
                                      'recording_times_difference']))

    # Look up timezone
    tz = timezone(project_mapping[subject_id[:3]][1])

    print("Creating pixelgram for subject {}".format(subject_id))

    plot_combined_pixelgram_dublin(
        subject_id,
        respeck_data,
        airspeck_data,
        exposure_period=[
            tz.localize(start_exposure),
            tz.localize(end_exposure)
        ],
        recording_period=[
            tz.localize(start_recording),
            tz.localize(end_recording)
        ],
        overwrite_if_already_exists=overwrite_pixelgram_if_already_exists)
示例#5
0
def download_data_and_plot_combined_pixelgram(
        subject_id,
        timeframe=None,
        filter_out_not_worn_respeck=True,
        overwrite_pixelgram_if_already_exists=False,
        subject_visit_number=None,
        overwrite_data_if_already_exists=False,
        upload_type='automatic'):
    project_name = get_project_for_subject(subject_id)
    plot_dir = project_mapping[project_name][3]

    if subject_visit_number is None:
        label_files = "{}".format(subject_id)
    else:
        label_files = "{}({})".format(subject_id, subject_visit_number)

    pixelgram_filepath = plot_dir + "{}_combined_pixelgram.png".format(
        label_files)

    # Check if pixelgram already exists
    if not overwrite_pixelgram_if_already_exists and os.path.isfile(
            pixelgram_filepath):
        print("Pixelgram for subject {} already exists. Skipping subject.".
              format(label_files))
        return

    # Download data if not present
    download_respeck_and_personal_airspeck_data(
        subject_id,
        upload_type=upload_type,
        timeframe=timeframe,
        overwrite_if_already_exists=overwrite_data_if_already_exists,
        subject_visit_number=subject_visit_number)

    # Load data and create plot
    respeck_data = load_respeck_file(
        subject_id,
        project_name=project_name,
        upload_type=upload_type,
        subject_visit_number=subject_visit_number,
        filter_out_not_worn=filter_out_not_worn_respeck)
    airspeck_data = load_personal_airspeck_file(
        subject_id,
        project_name=project_name,
        upload_type=upload_type,
        subject_visit_number=subject_visit_number)

    if len(respeck_data) == 0:
        print("RESpeck data for subject {} empty. Skipping subject.".format(
            label_files))
        return

    if len(airspeck_data) == 0:
        print("Airspeck data for subject {} empty. Skipping subject.".format(
            label_files))
        return

    if timeframe is not None:
        tz = timezone(project_mapping[project_name][1])

        if timeframe[0].tzinfo is None:
            start_time = tz.localize(timeframe[0])
            end_time = tz.localize(timeframe[1])
        else:
            start_time = timeframe[0]
            end_time = timeframe[1]

        plot_combined_pixelgram(
            subject_id,
            respeck_data[start_time:end_time],
            airspeck_data[start_time:end_time],
            pixelgram_filepath,
            overwrite_if_already_exists=overwrite_pixelgram_if_already_exists,
            subject_visit_number=subject_visit_number)
    else:
        plot_combined_pixelgram(
            subject_id,
            respeck_data,
            airspeck_data,
            pixelgram_filepath,
            overwrite_if_already_exists=overwrite_pixelgram_if_already_exists,
            subject_visit_number=subject_visit_number)