def get_home_locations_by_census_tract(siteid, state_code, county_code): census_tract_geographies = get_from_data_source('CENSUS_TRACT_DF') census_tract_geographies = census_tract_geographies[ (census_tract_geographies['state_code'] == state_code) & (census_tract_geographies['county_code'] == county_code)] home_locations = get_from_data_source('HOME_LOCATIONS_DF') site_home_locations = home_locations[ (home_locations['siteid'] == siteid) & (home_locations['state_code'] == state_code) & (home_locations['county_code'] == county_code)] site_home_locations = site_home_locations[[ 'tract', 'visit_days', 'visitors_unq' ]] site_home_census_data = census_tract_geographies.merge(site_home_locations, on='tract', how='inner') svi_df = get_from_data_source('SVI_DF') svi_df = svi_df[svi_df['state_code'] == state_code] svi_df = svi_df.drop(columns=['state_code']) site_home_census_data = site_home_census_data.merge(svi_df, on='tract', how='inner') return site_home_census_data
def get_project_home_locations_by_census_tract(project, state_code, county_code): census_tract_geographies = get_from_data_source('CENSUS_TRACT_DF') census_tract_geographies = census_tract_geographies[ (census_tract_geographies['state_code'] == state_code) & (census_tract_geographies['county_code'] == county_code)] project_sites = get_project_sites(project) project_site_ids = project_sites['siteid'].drop_duplicates() home_locations = get_from_data_source('HOME_LOCATIONS_DF') project_home_locations = home_locations[ (home_locations['siteid'].isin(project_site_ids)) & (home_locations['state_code'] == state_code) & (home_locations['county_code'] == county_code)] # for each tract, we need to sum visit days and visitors_unq project_home_locations = project_home_locations.groupby( by=['tract'], as_index=False)['visit_days', 'visitors_unq'].sum() project_home_census_data = census_tract_geographies.merge( project_home_locations, on='tract', how='inner') svi_df = get_from_data_source('SVI_DF') svi_df = svi_df[svi_df['state_code'] == state_code] svi_df = svi_df.drop(columns=['state_code']) project_home_census_data = project_home_census_data.merge(svi_df, on='tract', how='inner') return project_home_census_data
def get_home_locations_by_state(siteid): state_boundaries = get_from_data_source('STATE_BOUNDARIES_DF') home_locations = get_from_data_source('HOME_LOCATIONS_DF') site_home_locations = home_locations[home_locations['siteid'] == siteid] site_home_locations = site_home_locations[[ 'state_code', 'state', 'visit_days', 'visitors_unq' ]] site_home_locations = site_home_locations.groupby( by=['state_code', 'state'], as_index=False).sum() site_home_state_data = state_boundaries.merge(site_home_locations, on=['state_code', 'state'], how='inner') return site_home_state_data
def get_demographic_summary(siteid): svi_df = get_from_data_source('SVI_DF') home_locations = get_from_data_source('HOME_LOCATIONS_DF') project = get_project_from_site(siteid) census_tract_states = app_config.CENSUS_TRACT_STATES[project] svi_df = svi_df[svi_df['state_code'].isin(census_tract_states)] svi_df = svi_df.drop(columns=['state_code']) site_home_locations = home_locations[home_locations['siteid'] == siteid] demographics_data = site_home_locations.merge(svi_df, on='tract', how='inner') return demographics_data
def _get_project_estimates(project, period): project_sites = get_project_sites(project) project_site_ids = project_sites['siteid'].drop_duplicates() df = get_from_data_source('MONTHLY_VISITATION_DF') project_sites_data = df[df['trail'].isin(project_site_ids)] if period == 'monthly': project_sites_data = project_sites_data.drop(columns='year').groupby( by=['trail', 'month'], as_index=False).mean() project_sites_data = project_sites_data.drop(columns='trail').groupby( by=['month']).sum() elif period == 'annual': project_sites_data = project_sites_data.drop( columns=['trail', 'month']).groupby(by=['year']).sum() project_sites_data = project_sites_data[[ 'estimate', 'log_estimate', 'flickr', 'twitter', 'instag', 'wta', 'alltrails', 'onsite', 'log_onsite', 'data_days' ]] project_sites_data['log_estimate'] = np.log( project_sites_data['estimate'] + 1) project_sites_data['log_onsite'] = np.log(project_sites_data['onsite'] + 1) project_sites_data.reset_index(drop=False, inplace=True) return project_sites_data
def get_project_readme(readme_type, project=None): project_readme_cache = get_from_data_source('PROJECT_README') if readme_type == 'VISITS': return project_readme_cache[project + '_VISITS'] elif readme_type == 'INFO': return project_readme_cache[project] else: return project_readme_cache[readme_type]
def get_project_home_locations_by_state(project): state_boundaries = get_from_data_source('STATE_BOUNDARIES_DF') home_locations = get_from_data_source('HOME_LOCATIONS_DF') project_sites = get_project_sites(project) project_site_ids = project_sites['siteid'].drop_duplicates() project_home_locations = home_locations[home_locations['siteid'].isin( project_site_ids)] project_home_locations = project_home_locations[[ 'state_code', 'state', 'visit_days', 'visitors_unq' ]] project_home_locations = project_home_locations.groupby( by=['state_code', 'state'], as_index=False).sum() project_home_state_data = state_boundaries.merge( project_home_locations, on=['state_code', 'state'], how='inner') return project_home_state_data
def _get_project_visitation_data(project, period): project_sites = get_project_sites(project) project_site_ids = project_sites['siteid'].drop_duplicates() if period == 'monthly': df = get_from_data_source('MONTHLY_VISITATION_DF') group_by_cols = ['year', 'month'] else: df = get_from_data_source('WEEKLY_VISITATION_DF') group_by_cols = ['year', 'month', 'week'] project_sites_data = df[df['trail'].isin(project_site_ids)] project_sites_data = project_sites_data.drop('trail', axis=1) project_sites_data = project_sites_data.groupby(group_by_cols, as_index=False).sum() project_sites_data['log_estimate'] = np.log( project_sites_data['estimate'] + 1) return project_sites_data
def get_home_locations_by_county(siteid, state_code): county_geographies = get_from_data_source('COUNTIES_DF') county_geographies = county_geographies[county_geographies['state_code'] == state_code] home_locations = get_from_data_source('HOME_LOCATIONS_DF') site_home_locations = home_locations[ (home_locations['siteid'] == siteid) & (home_locations['state_code'] == state_code)] site_home_locations = site_home_locations[[ 'county_code', 'county', 'visit_days', 'visitors_unq' ]] site_home_locations = site_home_locations.groupby( by=['county_code', 'county'], as_index=False).sum() site_home_county_data = county_geographies.merge( site_home_locations, on=['county_code', 'county'], how='inner') return site_home_county_data
def get_project_demographic_summary(project): svi_df = get_from_data_source('SVI_DF') home_locations = get_from_data_source('HOME_LOCATIONS_DF') project_sites = get_project_sites(project) project_site_ids = project_sites['siteid'].drop_duplicates() project_home_locations = home_locations[home_locations['siteid'].isin( project_site_ids)] project_home_locations = project_home_locations.groupby( by=['tract'], as_index=False).sum() census_tract_states = app_config.CENSUS_TRACT_STATES[project] svi_df = svi_df[svi_df['state_code'].isin(census_tract_states)] svi_df = svi_df.drop(columns=['state_code']) demographics_data = project_home_locations.merge(svi_df, on='tract', how='inner') return demographics_data
def get_project_last_year_estimates(project): project_sites = get_project_sites(project) project_site_ids = project_sites['siteid'].drop_duplicates() df = get_from_data_source('MONTHLY_VISITATION_DF') project_sites_data = df[df['trail'].isin(project_site_ids)] project_sites_data = project_sites_data[project_sites_data['year'] == 2018] project_sites_data = project_sites_data.groupby(by=['trail'], as_index=False).sum() project_sites_data = project_sites_data[['trail', 'estimate']] return project_sites_data
def _get_estimates(siteid, period): monthly_df = get_from_data_source('MONTHLY_VISITATION_DF') site_data = monthly_df[monthly_df['trail'] == siteid] site_data = site_data.groupby( by=['month']).mean() if period == 'monthly' else site_data.groupby( by=['year']).sum() site_data = site_data[[ 'estimate', 'log_estimate', 'flickr', 'twitter', 'instag', 'wta', 'alltrails', 'ebird', 'onsite', 'log_onsite', 'data_days' ]] site_data.reset_index(inplace=True) return site_data
def get_project_home_locations_by_county(project, state_code): county_geographies = get_from_data_source('COUNTIES_DF') county_geographies = county_geographies[county_geographies['state_code'] == state_code] home_locations = get_from_data_source('HOME_LOCATIONS_DF') project_sites = get_project_sites(project) project_site_ids = project_sites['siteid'].drop_duplicates() project_home_locations = home_locations[ (home_locations['siteid'].isin(project_site_ids)) & (home_locations['state_code'] == state_code)] project_home_locations = project_home_locations[[ 'county_code', 'county', 'visit_days', 'visitors_unq' ]] project_home_locations = project_home_locations.groupby( by=['county_code', 'county'], as_index=False).sum() project_home_county_data = county_geographies.merge( project_home_locations, on=['county_code', 'county'], how='inner') return project_home_county_data
def get_project_home_locations(project): home_locations = get_from_data_source('HOME_LOCATIONS_DF') project_sites = get_project_sites(project) project_site_ids = project_sites['siteid'].drop_duplicates() project_home_locations = home_locations[home_locations['siteid'].isin( project_site_ids)] # International doesn't have state and counties and hence a three column group by will exclude it # extra effort to include international international_visits = project_home_locations[ project_home_locations['country'] == 'International'] international_visits = international_visits.groupby(['country'], as_index=False).sum() project_home_locations = project_home_locations.groupby( by=['country', 'state', 'county'], as_index=False).sum() project_home_locations = project_home_locations.append( international_visits, ignore_index=True, sort=False) return _treefy_home_locations(project, project_home_locations)
def get_project_sites(project_group): allsites = get_from_data_source('ALLSITES_DF') project_sites = allsites[allsites['Prjct_code'].str.contains( project_group)] return project_sites
def get_monthly_visitation(siteid): monthly_df = get_from_data_source('MONTHLY_VISITATION_DF') site_data = monthly_df[monthly_df['trail'] == siteid] return site_data
def get_project_from_site(siteid): allsites = get_from_data_source('ALLSITES_DF') site = allsites[allsites['siteid'] == siteid] return site[['Prjct_code']].iat[0, 0]
def get_home_locations(siteid): home_locations = get_from_data_source('HOME_LOCATIONS_DF') site_home_locations = home_locations[home_locations['siteid'] == siteid] return _treefy_home_locations(siteid, site_home_locations)
def get_weekly_visitation(siteid): weekly_df = get_from_data_source('WEEKLY_VISITATION_DF') site_data = weekly_df[weekly_df['trail'] == siteid] return site_data