Пример #1
0
def load_satellite(df_idx, SAT_IN_DIR):
    # read satellite predictions
    df_sat = pd.read_csv(SAT_IN_DIR)
    # link to locality identifier
    df_sat = pd.merge(df_sat, df_idx, how='left', on='index')
    # create new var
    df_sat.loc[:,
               'RGB_mean'] = (df_sat.loc[:,
                                         ['R_mean', 'G_mean', 'B_mean']].mean(
                                             axis=1))

    # grouping into localities
    df_sat = df_sat.groupby(['ent', 'mun', 'loc']).agg(
        sat_house=pd.NamedAgg(column='area', aggfunc='count'),
        sat_size_mean=pd.NamedAgg(column='area', aggfunc=np.nanmean),
        sat_size_sum=pd.NamedAgg(column='area', aggfunc=np.nansum),
        sat_lum_mean=pd.NamedAgg(column='RGB_mean', aggfunc=np.nanmean),
    )

    # scale areas / distances
    df_sat[['sat_size_mean', 'sat_size_sum']] *= (
        ((0.001716 * 111000 / 800)**2) * np.cos(23 / 180 * np.pi)
    )  # in sq meters
    # winsorize
    df_sat.loc[:, 'sat_size_sum_wins'] = winsorize(df_sat['sat_size_sum'], 0,
                                                   99)
    df_sat.loc[:, 'sat_size_mean_wins'] = winsorize(df_sat['sat_size_mean'], 0,
                                                    99)
    return df_sat
Пример #2
0
        df.loc[(df['is_eligible'] == 1) &
               (df['in_treatment_group'] == 1), :].shape[0], '; control = ',
        df.loc[(df['is_eligible'] == 1) &
               (df['in_treatment_group'] == 0), :].shape[0], '; ineligible = ',
        df.loc[df['is_eligible'] == 0, :].shape[0])

    df_eligible = df.loc[df['is_eligible'] == 1, :].copy()
    df_ineligible = df.loc[df['is_eligible'] == 0, :].copy()
    del df

    wins_lower_bound = 1
    wins_upper_bound = 99
    # winsorize survey based observations
    for x in xs:
        df_eligible.loc[:, 'svy_' + x] = winsorize(df_eligible['svy_' + x],
                                                   wins_lower_bound,
                                                   wins_upper_bound)
        df_ineligible.loc[:, 'svy_' + x] = winsorize(df_ineligible['svy_' + x],
                                                     wins_lower_bound,
                                                     wins_upper_bound)
    # winsorize satellite based observations
    for y in ys:
        df_eligible.loc[:, 'sat_' + y] = winsorize(df_eligible['sat_' + y], 0,
                                                   wins_upper_bound)
        df_ineligible.loc[:, 'sat_' + y] = winsorize(df_ineligible['sat_' + y],
                                                     0, wins_upper_bound)

    df_control = df_eligible.loc[df_eligible['in_treatment_group'] == 0, :]
    df_treat = df_eligible.loc[df_eligible['in_treatment_group'] == 1, :]

    # ================================================================
Пример #3
0
import pandas as pd
import skimage.color
from sklearn.cluster import KMeans
from maskrcnn.postprocess.analysis import winsorize

IN_DIR = 'data/Siaya/Merged/sat_raw.csv'
OUT_DIR = 'data/Siaya/Merged/sat.csv'
K = 8  # number of clusters

df = pd.read_csv(IN_DIR)
# variable selection
df = df.loc[:, [
    'angle', 'R_mean', 'G_mean', 'B_mean', 'area', 'centroid_lon',
    'centroid_lat'
]]
# unit conversion, winsorize to reduce the influence of outliers
df.loc[:, 'area'] *= ((0.001716 * 111000 / 800)**2)  # in sq meters
df.loc[:, 'area'] = winsorize(df['area'], 1, 99)

# color grouping
rgb = df.loc[:, ['R_mean', 'G_mean', 'B_mean']].values
lab = skimage.color.rgb2lab(rgb)
m = KMeans(n_clusters=K, random_state=0)
m.fit(lab)
df.loc[:, 'color_group'] = m.labels_

df.to_csv(OUT_DIR, index=False)
Пример #4
0
    OUT_RAW_DATA_DIR = 'fig_raw_data'
    # sample selection
    SAMPLE_NAME = '2019Oct9'

    # load data
    df_idx = load_index(IDX_IN_DIR, LOG_IN_DIR)
    df_sat = load_satellite(df_idx, SAT_IN_DIR)
    df_cen = load_census(df_idx, CEN_IN_DIR)
    # merge satellite and census
    df = pd.merge(df_sat, df_cen, how='right', on=['ent', 'mun', 'loc'])
    df.loc[:, 'sat_house'] = df['sat_house'].fillna(0)
    df.loc[:, 'sat_size_sum'] = df['sat_size_sum'].fillna(0)
    # compute per capita values
    df.loc[:, 'sat_size_sum_pc'] = df['sat_size_sum'] / df['cen_pop']
    # winsorize
    df.loc[:, 'sat_size_sum_pc_wins'] = winsorize(df['sat_size_sum_pc'], 0, 99)
    # load nightlight values
    df = load_nightlight_from_point(df, NL_IN_DIR)
    df = df.rename({'nightlight': 'sat_nightlight'}, axis=1)
    # plotting begins
    fig, axes = plt.subplots(ncols=2, figsize=(6, 3))
    plot_scatter(
        col_x_key='cen_pop',
        col_x_label='Census: Population Count',
        transform_x=lambda x: np.log10(x + 1),
        xlim=(np.log10(2 + 1), np.log10(3000 + 1)),
        xticks=[np.log10(10 + 1),
                np.log10(100 + 1),
                np.log10(1000 + 1)],
        xticklabels=[10, 100, 1000],
        col_y_key='sat_house',
Пример #5
0
    df = pd.merge(df_raster_census,
                  df_raster_building,
                  how='left',
                  on=['grid_lon', 'grid_lat'])
    # load nightlight values
    df = load_nightlight_from_point(df,
                                    IN_SAT_NIGHTLIGHT_DIR,
                                    lon_col='lon',
                                    lat_col='lat')

    # subset to eligible only
    if args.eligible_only:
        df = df.loc[df['eligible'] > 0, :]
    # winsorize outcome variables
    for varname in ['nightlight', 'area_sum', 'tin_area_sum']:
        df.loc[:, varname] = winsorize(df[varname], 0, 97.5)
    # placebo runs
    for i_simu in range(args.placebo):

        # draw saturation level
        df_draw = sat_grps.copy()
        df_draw.loc[:, 'hi_sat'] = (np.random.random(sat_grps.shape[0]) > 0.5)
        df_draw = pd.merge(villages, df_draw, how='left', on='satlevel_name')
        # draw treatment status
        df_draw.loc[:, 'treat'] = np.random.random(df_draw.shape[0])
        df_draw.loc[:, 'treat'] = df_draw.apply(
            lambda x: float(x['treat'] > (0.33 if x['hi_sat'] else 0.67)),
            axis=1)
        # merge with full dataset
        df_draw = pd.merge(df_census_placebo,
                           df_draw,