def get_households_profile(geo, session): # head of household # gender head_gender_dist, total_households = get_stat_data( ['gender of household head'], geo, session, order_by='gender of household head') female_heads = head_gender_dist['Female']['numerators']['this'] # age db_model_u18 = get_model_from_fields( ['gender of head of household'], geo.geo_level, table_name='genderofheadofhouseholdunder18' ) objects = get_objects_by_geo(db_model_u18, geo, session) total_under_18 = float(sum(o[0] for o in objects)) # type of dwelling type_of_dwelling_dist, _ = get_stat_data( ['type of dwelling'], geo, session, recode=TYPE_OF_DWELLING_RECODE, order_by='-total') informal = type_of_dwelling_dist['Shack']['numerators']['this'] _, total_ecd_children = get_stat_data( ['age in completed years'], geo, session, table_name='ageincompletedyears', only=['0', '1', '2', '3', '4', '5']) ecd_children_per_household = ratio(total_ecd_children, total_households) return { 'total_households': { 'name': 'Households', 'values': {'this': total_households}, }, 'type_of_dwelling_distribution': type_of_dwelling_dist, 'informal': { 'name': 'Households that are informal dwellings (shacks)', 'values': {'this': percent(informal, total_households)}, 'numerators': {'this': informal}, }, 'head_of_household': { 'gender_distribution': head_gender_dist, 'female': { 'name': 'Households with women as their head', 'values': {'this': percent(female_heads, total_households)}, 'numerators': {'this': female_heads}, }, 'under_18': { 'name': 'Households with heads under 18 years old', 'values': {'this': total_under_18}, } }, 'ecd_children_per_household': { 'name': 'Average number of children (aged 0-5) in each household', 'values': {'this': ecd_children_per_household}, }, }
def get_education_profile(geo, session): db_model = get_model_from_fields(['highest educational level'], geo.geo_level, table_name='highesteducationallevel20') objects = get_objects_by_geo(db_model, geo, session) edu_dist_data = {} get_or_higher = 0.0 fet_or_higher = 0.0 total = 0.0 for i, obj in enumerate(objects): category_val = getattr(obj, 'highest educational level') # increment counters total += obj.total if category_val in EDUCATION_GET_OR_HIGHER: get_or_higher += obj.total if category_val in EDUCATION_FET_OR_HIGHER: fet_or_higher += obj.total # add data points for category edu_dist_data[str(i)] = { "name": category_val, "numerators": {"this": obj.total}, } edu_dist_data = collapse_categories(edu_dist_data, COLLAPSED_EDUCATION_CATEGORIES, key_order=EDUCATION_KEY_ORDER) edu_split_data = { 'percent_get_or_higher': { "name": "Completed Grade 9 or higher", "numerators": {"this": get_or_higher}, }, 'percent_fet_or_higher': { "name": "Completed Matric or higher", "numerators": {"this": fet_or_higher}, } } # calculate percentages for data in (edu_dist_data, edu_split_data): for fields in data.values(): fields["values"] = {"this": round(fields["numerators"]["this"] / total * 100, 2)} edu_dist_data['metadata'] = {'universe': 'Invididuals aged 20 and older'} edu_split_data['metadata'] = {'universe': 'Invididuals aged 20 and older'} add_metadata(edu_dist_data, db_model) return {'educational_attainment_distribution': edu_dist_data, 'educational_attainment': edu_split_data}
def get_education_profile(geo_code, geo_level, session): db_model = get_model_from_fields(['highest educational level'], geo_level, table_name='highesteducationallevel20') objects = get_objects_by_geo(db_model, geo_code, geo_level, session) edu_dist_data = {} get_or_higher = 0.0 fet_or_higher = 0.0 total = 0.0 for i, obj in enumerate(objects): category_val = getattr(obj, 'highest educational level') # increment counters total += obj.total if category_val in EDUCATION_GET_OR_HIGHER: get_or_higher += obj.total if category_val in EDUCATION_FET_OR_HIGHER: fet_or_higher += obj.total # add data points for category edu_dist_data[str(i)] = { "name": category_val, "numerators": {"this": obj.total}, } edu_dist_data = collapse_categories(edu_dist_data, COLLAPSED_EDUCATION_CATEGORIES, key_order=EDUCATION_KEY_ORDER) edu_split_data = { 'percent_get_or_higher': { "name": "Completed Grade 9 or higher", "numerators": {"this": get_or_higher}, }, 'percent_fet_or_higher': { "name": "Completed Matric or higher", "numerators": {"this": fet_or_higher}, } } # calculate percentages for data in (edu_dist_data, edu_split_data): for fields in data.values(): fields["values"] = {"this": round(fields["numerators"]["this"] / total * 100, 2)} edu_dist_data['metadata'] = {'universe': 'Invididuals aged 20 and older'} edu_split_data['metadata'] = {'universe': 'Invididuals aged 20 and older'} add_metadata(edu_dist_data, db_model) return {'educational_attainment_distribution': edu_dist_data, 'educational_attainment': edu_split_data}
def get_service_delivery_profile(geo, session): # water source water_src_data, total_wsrc = get_stat_data( ['source of water'], geo, session, recode=SHORT_WATER_SOURCE_CATEGORIES, order_by='-total') if 'Service provider' in water_src_data: total_water_sp = water_src_data['Service provider']['numerators'][ 'this'] else: total_water_sp = 0.0 # electricity elec_attrs = [ 'electricity for cooking', 'electricity for heating', 'electricity for lighting' ] db_model_elec = get_model_from_fields(elec_attrs, geo.geo_level) objects = get_objects_by_geo(db_model_elec, geo, session) total_elec = 0.0 total_some_elec = 0.0 elec_access_data = { 'total_all_elec': { "name": "Have electricity for everything", "numerators": { "this": 0.0 }, }, 'total_some_not_all_elec': { "name": "Have electricity for some things", "numerators": { "this": 0.0 }, }, 'total_no_elec': { "name": "No electricity", "numerators": { "this": 0.0 }, } } for obj in objects: total_elec += obj.total has_some = False has_all = True for attr in elec_attrs: val = not getattr(obj, attr).startswith('no ') has_all = has_all and val has_some = has_some or val if has_some: total_some_elec += obj.total if has_all: elec_access_data['total_all_elec']['numerators'][ 'this'] += obj.total elif has_some: elec_access_data['total_some_not_all_elec']['numerators'][ 'this'] += obj.total else: elec_access_data['total_no_elec']['numerators'][ 'this'] += obj.total for data, total in zip((elec_access_data, ), (total_elec, )): for fields in data.values(): fields["values"] = { "this": percent(fields["numerators"]["this"], total) } add_metadata(elec_access_data, db_model_elec) # toilets toilet_data, total_toilet = get_stat_data( ['toilet facilities'], geo, session, exclude_zero=True, recode=COLLAPSED_TOILET_CATEGORIES, order_by='-total') total_flush_toilet = 0.0 total_no_toilet = 0.0 for key, data in toilet_data.iteritems(): if key.startswith('Flush') or key.startswith('Chemical'): total_flush_toilet += data['numerators']['this'] if key == 'None': total_no_toilet += data['numerators']['this'] return { 'water_source_distribution': water_src_data, 'percentage_water_from_service_provider': { "name": "Are getting water from a regional or local service provider", "numerators": { "this": total_water_sp }, "values": { "this": percent(total_water_sp, total_wsrc) }, }, 'percentage_electricity_access': { "name": "Have electricity for at least one of cooking, heating or lighting", "numerators": { "this": total_some_elec }, "values": { "this": percent(total_some_elec, total_elec) }, }, 'electricity_access_distribution': elec_access_data, 'percentage_flush_toilet_access': { "name": "Have access to flush or chemical toilets", "numerators": { "this": total_flush_toilet }, "values": { "this": percent(total_flush_toilet, total_toilet) }, }, 'percentage_no_toilet_access': { "name": "Have no access to any toilets", "numerators": { "this": total_no_toilet }, "values": { "this": percent(total_no_toilet, total_toilet) }, }, 'toilet_facilities_distribution': toilet_data, }
def get_households_profile(geo, session): # head of household # gender head_gender_dist, total_households = get_stat_data( ['gender of household head'], geo, session, order_by='gender of household head') female_heads = head_gender_dist['Female']['numerators']['this'] # age db_model_u18 = get_model_from_fields( ['gender of head of household'], geo.geo_level, table_name='genderofheadofhouseholdunder18') objects = get_objects_by_geo(db_model_u18, geo, session) total_under_18 = float(sum(o[0] for o in objects)) # type of dwelling type_of_dwelling_dist, _ = get_stat_data(['type of dwelling'], geo, session, recode=TYPE_OF_DWELLING_RECODE, order_by='-total') informal = type_of_dwelling_dist['Shack']['numerators']['this'] _, total_ecd_children = get_stat_data(['age in completed years'], geo, session, table_name='ageincompletedyears', only=['0', '1', '2', '3', '4', '5']) ecd_children_per_household = ratio(total_ecd_children, total_households) return { 'total_households': { 'name': 'Households', 'values': { 'this': total_households }, }, 'type_of_dwelling_distribution': type_of_dwelling_dist, 'informal': { 'name': 'Households that are informal dwellings (shacks)', 'values': { 'this': percent(informal, total_households) }, 'numerators': { 'this': informal }, }, 'head_of_household': { 'gender_distribution': head_gender_dist, 'female': { 'name': 'Households with women as their head', 'values': { 'this': percent(female_heads, total_households) }, 'numerators': { 'this': female_heads }, }, 'under_18': { 'name': 'Households with heads under 18 years old', 'values': { 'this': total_under_18 }, } }, 'ecd_children_per_household': { 'name': 'Average number of children (aged 0-5) in each household', 'values': { 'this': ecd_children_per_household }, }, }
def get_demographics_profile(geo_code, geo_level, session): # sex sex_dist_data, total_pop = get_stat_data( 'sex', geo_level, geo_code, session, table_fields=['age in completed years', 'sex', 'rural or urban']) # urban/rural by sex urban_dist_data, _ = get_stat_data( ['rural or urban', 'sex'], geo_level, geo_code, session, table_fields=['age in completed years', 'sex', 'rural or urban']) total_urbanised = 0 for data in urban_dist_data['Urban'].itervalues(): if 'numerators' in data: total_urbanised += data['numerators']['this'] # median age db_model_age = get_model_from_fields(['age in completed years', 'sex', 'rural or urban'], geo_level) objects = get_objects_by_geo(db_model_age, geo_code, geo_level, session, ['age in completed years']) objects = sorted((o for o in objects if getattr(o, 'age in completed years') != 'unspecified'), key=lambda x: int(getattr(x, 'age in completed years').replace('+', ''))) median = calculate_median(objects, 'age in completed years') # age in 10 year groups def age_recode(f, x): age = int(x.replace('+', '')) if age >= 80: return '80+' bucket = 10 * (age / 10) return '%d-%d' % (bucket, bucket + 9) age_dist_data, _ = get_stat_data( 'age in completed years', geo_level, geo_code, session, table_fields=['age in completed years', 'sex', 'rural or urban'], recode=age_recode, exclude=['unspecified']) # age category def age_cat_recode(f, x): age = int(x.replace('+', '')) if age < 18: return 'Under 18' elif age >= 65: return '65 and over' else: return '18 to 64' age_cats, _ = get_stat_data( 'age in completed years', geo_level, geo_code, session, table_fields=['age in completed years', 'sex', 'rural or urban'], recode=age_cat_recode, exclude=['unspecified']) final_data = { 'sex_ratio': sex_dist_data, 'urban_distribution': urban_dist_data, 'urbanised': { 'name': 'In urban areas', 'numerators': {'this': total_urbanised}, 'values': {'this': round(total_urbanised / total_pop * 100, 2)} }, 'age_group_distribution': age_dist_data, 'age_category_distribution': age_cats, 'median_age': { "name": "Median age", "values": {"this": median}, }, 'total_population': { "name": "People", "values": {"this": total_pop} }} return final_data
def get_service_delivery_profile(geo, session): # water source water_src_data, total_wsrc = get_stat_data( ['source of water'], geo, session, recode=SHORT_WATER_SOURCE_CATEGORIES, order_by='-total') if 'Service provider' in water_src_data: total_water_sp = water_src_data['Service provider']['numerators']['this'] else: total_water_sp = 0.0 # electricity elec_attrs = ['electricity for cooking', 'electricity for heating', 'electricity for lighting'] db_model_elec = get_model_from_fields(elec_attrs, geo.geo_level) objects = get_objects_by_geo(db_model_elec, geo, session) total_elec = 0.0 total_some_elec = 0.0 elec_access_data = { 'total_all_elec': { "name": "Have electricity for everything", "numerators": {"this": 0.0}, }, 'total_some_not_all_elec': { "name": "Have electricity for some things", "numerators": {"this": 0.0}, }, 'total_no_elec': { "name": "No electricity", "numerators": {"this": 0.0}, } } for obj in objects: total_elec += obj.total has_some = False has_all = True for attr in elec_attrs: val = not getattr(obj, attr).startswith('no ') has_all = has_all and val has_some = has_some or val if has_some: total_some_elec += obj.total if has_all: elec_access_data['total_all_elec']['numerators']['this'] += obj.total elif has_some: elec_access_data['total_some_not_all_elec']['numerators']['this'] += obj.total else: elec_access_data['total_no_elec']['numerators']['this'] += obj.total for data, total in zip((elec_access_data,), (total_elec,)): for fields in data.values(): fields["values"] = {"this": percent(fields["numerators"]["this"], total)} add_metadata(elec_access_data, db_model_elec) # toilets toilet_data, total_toilet = get_stat_data( ['toilet facilities'], geo, session, exclude_zero=True, recode=COLLAPSED_TOILET_CATEGORIES, order_by='-total') total_flush_toilet = 0.0 total_no_toilet = 0.0 for key, data in toilet_data.iteritems(): if key.startswith('Flush') or key.startswith('Chemical'): total_flush_toilet += data['numerators']['this'] if key == 'None': total_no_toilet += data['numerators']['this'] return { 'water_source_distribution': water_src_data, 'percentage_water_from_service_provider': { "name": "Are getting water from a regional or local service provider", "numerators": {"this": total_water_sp}, "values": {"this": percent(total_water_sp, total_wsrc)}, }, 'percentage_electricity_access': { "name": "Have electricity for at least one of cooking, heating or lighting", "numerators": {"this": total_some_elec}, "values": {"this": percent(total_some_elec, total_elec)}, }, 'electricity_access_distribution': elec_access_data, 'percentage_flush_toilet_access': { "name": "Have access to flush or chemical toilets", "numerators": {"this": total_flush_toilet}, "values": {"this": percent(total_flush_toilet, total_toilet)}, }, 'percentage_no_toilet_access': { "name": "Have no access to any toilets", "numerators": {"this": total_no_toilet}, "values": {"this": percent(total_no_toilet, total_toilet)}, }, 'toilet_facilities_distribution': toilet_data, }
def get_schools_profile(geo, session): print geo.geo_level # ownership status schools_dist, total_schools = get_stat_data(['ownership'], geo, session) # region status region_dist, total_schools = get_stat_data(['region'], geo, session) # Choosing sorting option #Sorting will only be done using national_rank all, as regional and district ranks are unknown for some result esp historical rank_column = Base.metadata.tables['secondary_school'].c.national_rank_all # Getting top for schools with more than 40 students top_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "yes")\ .order_by(asc(cast(rank_column, Integer)))\ .all() # Getting top for schools with less than 40 students top_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "no")\ .order_by(asc(cast(rank_column, Integer)))\ .all() # Getting lowest schools with more than 40 students lowest_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "yes")\ .order_by(desc(cast(rank_column, Integer)))\ .all() # Getting lowest for schools with less than 40 students lowest_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "no")\ .order_by(desc(cast(rank_column, Integer)))\ .all() # median gpa db_model_age = get_model_from_fields(['code', 'name', 'avg_gpa'], geo.geo_level) objects = get_objects_by_geo(db_model_age, geo, session, ['avg_gpa']) median = calculate_median(objects, 'avg_gpa') # gpa in 1 point groups def gpa_recode(f, x): gpa = x if gpa >= 4: return '4+' bucket = 1 * (gpa / 1) return '%d-%d' % (bucket, bucket + 2) gpa_dist_data, _ = get_stat_data( 'avg_gpa', geo, session, table_fields=['code', 'name', 'avg_gpa'], recode=gpa_recode, exclude=['unspecified']) total_private = 0.0 for data in schools_dist['PRIVATE'].itervalues(): if 'numerators' in data: total_private += data['numerators']['this'] return { 'schools_distribution': schools_dist, 'region_distribution': region_dist, 'top_schools': top_schools, 'lowest_schools': lowest_schools, 'gpa_group_distribution': gpa_dist_data, 'median_gpa': { "name": "Median GPA", "values": {"this": median}, }, }
def get_service_delivery_profile(geo, session): # water source water_src_data, total_wsrc = get_stat_data( ['source of water'], geo, session, recode=SHORT_WATER_SOURCE_CATEGORIES, order_by='-total') if 'Service provider' in water_src_data: total_water_sp = water_src_data['Service provider']['numerators']['this'] else: total_water_sp = 0.0 # refuse disposal db_model_ref = get_model_from_fields(['refuse disposal'], geo.geo_level) objects = get_objects_by_geo(db_model_ref, geo, session, order_by='-total') refuse_disp_data = OrderedDict() total_ref = 0.0 total_ref_sp = 0.0 for obj in objects: attr = getattr(obj, 'refuse disposal') disp = SHORT_REFUSE_DISPOSAL_CATEGORIES[attr] refuse_disp_data[disp] = { "name": disp, "numerators": {"this": obj.total}, } total_ref += obj.total if attr.startswith('Removed by local authority'): total_ref_sp += obj.total set_percent_values(refuse_disp_data, total_ref) add_metadata(refuse_disp_data, db_model_ref) # electricity if geo.version == '2011': elec_attrs = ['electricity for cooking', 'electricity for heating', 'electricity for lighting'] db_model_elec = get_model_from_fields(elec_attrs, geo.geo_level) objects = get_objects_by_geo(db_model_elec, geo, session) total_elec = 0.0 total_some_elec = 0.0 elec_access_data = { 'total_all_elec': { "name": "Have electricity for everything", "numerators": {"this": 0.0}, }, 'total_some_not_all_elec': { "name": "Have electricity for some things", "numerators": {"this": 0.0}, }, 'total_no_elec': { "name": "No electricity", "numerators": {"this": 0.0}, } } for obj in objects: total_elec += obj.total has_some = False has_all = True for attr in elec_attrs: val = not getattr(obj, attr).startswith('no ') has_all = has_all and val has_some = has_some or val if has_some: total_some_elec += obj.total if has_all: elec_access_data['total_all_elec']['numerators']['this'] += obj.total elif has_some: elec_access_data['total_some_not_all_elec']['numerators']['this'] += obj.total else: elec_access_data['total_no_elec']['numerators']['this'] += obj.total set_percent_values(elec_access_data, total_elec) add_metadata(elec_access_data, db_model_elec) # toilets toilet_data, total_toilet = get_stat_data( ['toilet facilities'], geo, session, exclude_zero=True, recode=COLLAPSED_TOILET_CATEGORIES, order_by='-total') total_flush_toilet = 0.0 total_no_toilet = 0.0 for key, data in toilet_data.iteritems(): if key.startswith('Flush') or key.startswith('Chemical'): total_flush_toilet += data['numerators']['this'] if key == 'None': total_no_toilet += data['numerators']['this'] profile = { 'water_source_distribution': water_src_data, 'percentage_water_from_service_provider': { "name": "Are getting water from a regional or local service provider", "numerators": {"this": total_water_sp}, "values": {"this": percent(total_water_sp, total_wsrc)}, }, 'refuse_disposal_distribution': refuse_disp_data, 'percentage_ref_disp_from_service_provider': { "name": "Are getting refuse disposal from a local authority or private company", "numerators": {"this": total_ref_sp}, "values": {"this": percent(total_ref_sp, total_ref)}, }, 'percentage_flush_toilet_access': { "name": "Have access to flush or chemical toilets", "numerators": {"this": total_flush_toilet}, "values": {"this": percent(total_flush_toilet, total_toilet)}, }, 'percentage_no_toilet_access': { "name": "Have no access to any toilets", "numerators": {"this": total_no_toilet}, "values": {"this": percent(total_no_toilet, total_toilet)}, }, 'toilet_facilities_distribution': toilet_data, } if geo.version == '2011': profile.update({ 'percentage_electricity_access': { "name": "Have electricity for at least one of cooking, heating or lighting", "numerators": {"this": total_some_elec}, "values": {"this": percent(total_some_elec, total_elec)}, }, 'electricity_access_distribution': elec_access_data, }) return profile
def get_households_profile(geo, session): # head of household # gender head_gender_dist, total_households = get_stat_data( ['gender of household head'], geo, session, order_by='gender of household head') female_heads = head_gender_dist['Female']['numerators']['this'] # age db_model_u18 = get_model_from_fields( ['gender of head of household'], geo.geo_level, table_name='genderofheadofhouseholdunder18' ) objects = get_objects_by_geo(db_model_u18, geo, session) total_under_18 = float(sum(o[0] for o in objects)) # tenure tenure_data, _ = get_stat_data( ['tenure status'], geo, session, recode=HOUSEHOLD_OWNERSHIP_RECODE, order_by='tenure status') owned = 0 for key, data in tenure_data.iteritems(): if key.startswith('Owned'): owned += data['numerators']['this'] # annual household income if geo.version == '2011': HOUSEHOLD_INCOME_RECODE = HOUSEHOLD_INCOME_RECODE_2011 else: HOUSEHOLD_INCOME_RECODE = COLLAPSED_ANNUAL_INCOME_CATEGORIES income_dist_data, _ = get_stat_data( ['annual household income'], geo, session, exclude=['Unspecified', 'Not applicable'], recode=HOUSEHOLD_INCOME_RECODE, key_order=HOUSEHOLD_INCOME_RECODE.values(), table_name='annualhouseholdincome_genderofhouseholdhead') # median income median = calculate_median_stat(income_dist_data) median_income = HOUSEHOLD_INCOME_ESTIMATE[median] # type of dwelling type_of_dwelling_dist, _ = get_stat_data( ['type of dwelling'], geo, session, recode=TYPE_OF_DWELLING_RECODE, order_by='-total') informal = type_of_dwelling_dist['Shack']['numerators']['this'] # household goods household_goods, _ = get_stat_data( ['household goods'], geo, session, recode=HOUSEHOLD_GOODS_RECODE, key_order=sorted(HOUSEHOLD_GOODS_RECODE.values())) return {'total_households': { 'name': 'Households', 'values': {'this': total_households}, }, 'owned': { 'name': 'Households fully owned or being paid off', 'values': {'this': percent(owned, total_households)}, 'numerators': {'this': owned}, }, 'type_of_dwelling_distribution': type_of_dwelling_dist, 'informal': { 'name': 'Households that are informal dwellings (shacks)', 'values': {'this': percent(informal, total_households)}, 'numerators': {'this': informal}, }, 'tenure_distribution': tenure_data, 'household_goods': household_goods, 'annual_income_distribution': income_dist_data, 'median_annual_income': { 'name': 'Average annual household income', 'values': {'this': median_income}, }, 'head_of_household': { 'gender_distribution': head_gender_dist, 'female': { 'name': 'Households with women as their head', 'values': {'this': percent(female_heads, total_households)}, 'numerators': {'this': female_heads}, }, 'under_18': { 'name': 'Households with heads under 18 years old', 'values': {'this': total_under_18}, } }, }
def get_demographics_profile(geo, session): # population group pop_dist_data, total_pop = get_stat_data( ['population group'], geo, session, table_dataset='Census 2011') # language language_data, _ = get_stat_data( ['language'], geo, session, order_by='-total') language_most_spoken = language_data[language_data.keys()[0]] # age groups age_dist_data, total_age = get_stat_data( ['age groups in 5 years'], geo, session, table_name='agegroupsin5years', recode=COLLAPSED_AGE_CATEGORIES, key_order=('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+')) # sex sex_data, _ = get_stat_data( ['gender'], geo, session, table_name='gender') final_data = { 'language_distribution': language_data, 'language_most_spoken': language_most_spoken, 'population_group_distribution': pop_dist_data, 'age_group_distribution': age_dist_data, 'sex_ratio': sex_data, 'total_population': { "name": "People", "values": {"this": total_pop}, } } if geo.square_kms: final_data['population_density'] = { 'name': "people per square kilometre", 'values': {"this": total_pop / geo.square_kms}, } # median age/age category db_model_age = get_model_from_fields( ['age in completed years'], geo.geo_level, table_name='ageincompletedyears' ) objects = sorted( get_objects_by_geo(db_model_age, geo, session), key=lambda x: int(getattr(x, 'age in completed years')) ) # median age median = calculate_median(objects, 'age in completed years') final_data['median_age'] = { "name": "Median age", "values": {"this": median}, } # age category age_dist, _ = get_stat_data( ['age in completed years'], geo, session, table_name='ageincompletedyearssimplified', key_order=['Under 18', '18 to 64', '65 and over'], recode={'< 18': 'Under 18', '>= 65': '65 and over'}) final_data['age_category_distribution'] = age_dist # citizenship citizenship_dist, _ = get_stat_data( ['citizenship'], geo, session, order_by='-total') sa_citizen = citizenship_dist['Yes']['numerators']['this'] final_data['citizenship_distribution'] = citizenship_dist final_data['citizenship_south_african'] = { 'name': 'South African citizens', 'values': {'this': percent(sa_citizen, total_pop)}, 'numerators': {'this': sa_citizen}, } # migration province_of_birth_dist, _ = get_stat_data( ['province of birth'], geo, session, exclude_zero=True, order_by='-total') final_data['province_of_birth_distribution'] = province_of_birth_dist def region_recode(field, key): if key == 'Born in South Africa': return 'South Africa' else: return { 'Not applicable': 'Other', }.get(key, key) region_of_birth_dist, _ = get_stat_data( ['region of birth'], geo, session, exclude_zero=True, order_by='-total', recode=region_recode) if 'South Africa' in region_of_birth_dist: born_in_sa = region_of_birth_dist['South Africa']['numerators']['this'] else: born_in_sa = 0 final_data['region_of_birth_distribution'] = region_of_birth_dist final_data['born_in_south_africa'] = { 'name': 'Born in South Africa', 'values': {'this': percent(born_in_sa, total_pop)}, 'numerators': {'this': born_in_sa}, } return final_data
def get_demographics_profile(geo_code, geo_level, session): # population by sex sex_dist_data, total_pop = get_stat_data( 'sex', geo_level, geo_code, session, table_fields=['disability', 'sex']) if total_pop > 0: # population by disability disability_dist_data, total_disabled = get_stat_data( 'disability', geo_level, geo_code, session, table_fields=['disability', 'sex'], recode=dict(DISABILITY_RECODES), key_order=DISABILITY_RECODES.values(), exclude=['NO_DISABILITY']) demographic_data = { 'has_data': True, 'sex_ratio': sex_dist_data, 'disability_ratio': disability_dist_data, 'total_population': { "name": "People", "values": {"this": total_pop} }, 'total_disabled': { 'name': 'People', 'values': {'this': total_disabled}, }, 'percent_disabled': { 'name': 'Are disabled', 'values': {'this': round(total_disabled / float(total_pop) * 100, 2)}, }, 'is_vdc': True } if geo_level != 'vdc': income_table = get_datatable('per_capita_income') per_capita_income, _ = income_table.get_stat_data( geo_level, geo_code, percent=False) lifeexpectancy_table = get_datatable('lifeexpectancy') life_expectancy, _ = lifeexpectancy_table.get_stat_data( geo_level, geo_code, percent=False) # population projection for 2031 pop_2031_dist_data, pop_projection_2031 = get_stat_data( 'sex', geo_level, geo_code, session, table_fields=['sex'], table_name='population_projection_2031') # poverty (UNDP and Open Nepal) poverty_dist_data, undp_survey_pop = get_stat_data( 'poverty', geo_level, geo_code, session, recode=dict(POVERTY_RECODES), key_order=POVERTY_RECODES.values()) total_in_poverty = \ poverty_dist_data['In Poverty']['numerators']['this'] # language language_data, _ = get_stat_data( ['language'], geo_level, geo_code, session, order_by='-total') language_most_spoken = language_data[language_data.keys()[0]] # caste or ethnic group caste_data, _ = get_stat_data(['caste or ethnic group'], geo_level, geo_code, session, order_by='-total') most_populous_caste = caste_data[caste_data.keys()[0]] citizenship_data, _ = get_stat_data( ['citizenship', 'sex'], geo_level, geo_code, session, order_by='-total' ) citizenship_by_sex = { 'Nepal': citizenship_data['Nepal'], 'India': citizenship_data['India'], 'China': citizenship_data['China'], 'Others': citizenship_data['Others'], 'metadata': citizenship_data['metadata'] } citizenship_distribution, _ = get_stat_data( 'citizenship', geo_level, geo_code, session, order_by='-total') # age # age in 10 year groups def age_recode(f, x): age = int(x) if age >= 80: return '80+' bucket = 10 * (age / 10) return '%d-%d' % (bucket, bucket + 9) age_dist_data, _ = get_stat_data( 'age in completed years', geo_level, geo_code, session, table_fields=['age in completed years', 'sex'], recode=age_recode, table_name='age_sex') ordered_age_dist_data = OrderedDict( sorted(age_dist_data.items(), key=lambda age_range: age_range[0]) ) # age category def age_cat_recode(f, x): age = int(x.replace('+', '')) if age < 20: return 'Under 20' elif age >= 60: return '60 and over' else: return '20 to 59' age_cats, _ = get_stat_data( 'age in completed years', geo_level, geo_code, session, table_fields=['age in completed years', 'sex'], recode=age_cat_recode, table_name='age_sex') ordered_age_cats_data = OrderedDict( [('Under 20', age_cats['Under 20']), ('20 to 59', age_cats['20 to 59']), ('60 and over', age_cats['60 and over']), ('metadata', age_cats['metadata'])] ) # median age db_model_age = get_model_from_fields( ['age in completed years', 'sex'], geo_level) objects = get_objects_by_geo(db_model_age, geo_code, geo_level, session, ['age in completed years']) objects = sorted((o for o in objects if getattr(o, 'age in completed years') != 'unspecified'), key=lambda x: int(getattr(x, 'age in completed years') .replace('+', ''))) median_age = calculate_median(objects, 'age in completed years') # add non-VDC data demographic_data['is_vdc'] = False demographic_data['per_capita_income'] = { 'name': 'Per capita income in US dollars', 'values': {'this': per_capita_income['income']['values']['this']} } demographic_data['life_expectancy'] = { 'name': 'Life expectancy in years', 'values': {'this': life_expectancy['years']['values']['this']} } demographic_data['pop_2031_dist'] = pop_2031_dist_data demographic_data['pop_projection_2031'] = { "name": "Projected in 2031", "values": {"this": pop_projection_2031} } demographic_data['poverty_dist'] = poverty_dist_data demographic_data['poverty_population'] = { 'name': 'Estimated Population', 'values': {'this': undp_survey_pop} } demographic_data['percent_impoverished'] = { 'name': 'Are in poverty', 'numerators': {'this': total_in_poverty}, 'values': { 'this': round( total_in_poverty / undp_survey_pop * 100, 2)} } demographic_data['language_distribution'] = language_data demographic_data['language_most_spoken'] = language_most_spoken demographic_data['ethnic_distribution'] = caste_data demographic_data['most_populous_caste'] = most_populous_caste demographic_data['citizenship_by_sex'] = citizenship_by_sex demographic_data['citizenship_distribution'] = citizenship_distribution demographic_data['age_group_distribution'] = ordered_age_dist_data demographic_data['age_category_distribution'] = \ ordered_age_cats_data demographic_data['median_age'] = { 'name': 'Median age', 'values': {'this': median_age}, } else: demographic_data = { 'area_has_data': False } return demographic_data
def get_demographics_profile(geo, session): # sex sex_dist_data, total_pop = get_stat_data( 'sex', geo, session, table_fields=['age in completed years', 'sex', 'rural or urban']) religion_dist_data, _ = get_stat_data( 'religion', geo, session) # urban/rural by sex urban_dist_data, _ = get_stat_data( ['rural or urban', 'sex'], geo, session, table_fields=['age in completed years', 'sex', 'rural or urban']) total_urbanised = 0 for data in urban_dist_data['Urban'].itervalues(): if 'numerators' in data: total_urbanised += data['numerators']['this'] # median age db_model_age = get_model_from_fields( ['age in completed years', 'sex', 'rural or urban'], geo.geo_level) objects = get_objects_by_geo(db_model_age, geo, session, [ 'age in completed years']) objects = sorted((o for o in objects if getattr(o, 'age in completed years') != 'unspecified'), key=lambda x: int(getattr(x, 'age in completed years').replace('+', ''))) median = calculate_median(objects, 'age in completed years') # age in 10 year groups def age_recode(f, x): age = int(x.replace('+', '')) if age >= 80: return '80+' bucket = 10 * (age / 10) return '%d-%d' % (bucket, bucket + 9) age_dist_data, _ = get_stat_data( 'age in completed years', geo, session, table_fields=['age in completed years', 'sex', 'rural or urban'], recode=age_recode, exclude=['unspecified']) # age category def age_cat_recode(f, x): age = int(x.replace('+', '')) if age < 18: return 'Under 18' elif age >= 65: return '65 and over' else: return '18 to 64' age_cats, _ = get_stat_data( 'age in completed years', geo, session, table_fields=['age in completed years', 'sex', 'rural or urban'], recode=age_cat_recode, exclude=['unspecified']) final_data = { 'sex_ratio': sex_dist_data, 'religion_ratio': religion_dist_data, 'urban_distribution': urban_dist_data, 'urbanised': { 'name': 'In urban areas', 'numerators': {'this': total_urbanised}, 'values': {'this': round(total_urbanised / total_pop * 100, 2)} }, 'age_group_distribution': age_dist_data, 'age_category_distribution': age_cats, 'median_age': { "name": "Median age", "values": {"this": median}, }, 'total_population': { "name": "People", "values": {"this": total_pop} } } return final_data
def get_households_profile(geo_code, geo_level, session): # head of household # gender head_gender_dist, total_households = get_stat_data( ['gender of household head'], geo_level, geo_code, session, order_by='gender of household head') female_heads = head_gender_dist['Female']['numerators']['this'] # age db_model_u18 = get_model_from_fields( ['gender of head of household'], geo_level, table_name='genderofheadofhouseholdunder18' ) objects = get_objects_by_geo(db_model_u18, geo_code, geo_level, session) total_under_18 = float(sum(o[0] for o in objects)) # tenure tenure_data, _ = get_stat_data( ['tenure status'], geo_level, geo_code, session, order_by='tenure status') owned = 0 for key, data in tenure_data.iteritems(): if key.startswith('Owned'): owned += data['numerators']['this'] # annual household income income_dist_data, _ = get_stat_data( ['annual household income'], geo_level, geo_code, session, exclude=['Unspecified'], recode=HOUSEHOLD_INCOME_RECODE, key_order=HOUSEHOLD_INCOME_RECODE.values(), table_name='annualhouseholdincome_genderofhouseholdhead') # median income median = calculate_median_stat(income_dist_data) median_income = HOUSEHOLD_INCOME_ESTIMATE[median] # type of dwelling type_of_dwelling_dist, _ = get_stat_data( ['type of dwelling'], geo_level, geo_code, session, recode=TYPE_OF_DWELLING_RECODE, order_by='-total') informal = type_of_dwelling_dist['Shack']['numerators']['this'] # household goods household_goods, _ = get_stat_data( ['household goods'], geo_level, geo_code, session, total=total_households, recode=HOUSEHOLD_GOODS_RECODE, exclude=['total households'], key_order=sorted(HOUSEHOLD_GOODS_RECODE.values())) return {'total_households': { 'name': 'Households', 'values': {'this': total_households}, }, 'owned': { 'name': 'Households fully owned or being paid off', 'values': {'this': percent(owned, total_households)}, 'numerators': {'this': owned}, }, 'type_of_dwelling_distribution': type_of_dwelling_dist, 'informal': { 'name': 'Households that are informal dwellings (shacks)', 'values': {'this': percent(informal, total_households)}, 'numerators': {'this': informal}, }, 'tenure_distribution': tenure_data, 'household_goods': household_goods, 'annual_income_distribution': income_dist_data, 'median_annual_income': { 'name': 'Average annual household income', 'values': {'this': median_income}, }, 'head_of_household': { 'gender_distribution': head_gender_dist, 'female': { 'name': 'Households with women as their head', 'values': {'this': percent(female_heads, total_households)}, 'numerators': {'this': female_heads}, }, 'under_18': { 'name': 'Households with heads under 18 years old', 'values': {'this': total_under_18}, } }, }
def get_demographics_profile(geo_code, geo_level, session): # population group pop_dist_data, total_pop = get_stat_data( ['population group'], geo_level, geo_code, session) # language language_data, _ = get_stat_data( ['language'], geo_level, geo_code, session, order_by='-total') language_most_spoken = language_data[language_data.keys()[0]] # age groups age_dist_data, total_age = get_stat_data( ['age groups in 5 years'], geo_level, geo_code, session, table_name='agegroupsin5years', recode=COLLAPSED_AGE_CATEGORIES, key_order=('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+')) # sex db_model_sex = get_model_from_fields(['gender'], geo_level, table_name='gender') query = session.query(func.sum(db_model_sex.total)) \ .filter(db_model_sex.gender == 'Male') query = query.filter(db_model_sex.geo_code == geo_code) total_male = query.one()[0] sex_data = OrderedDict(( # census data refers to sex as gender ('Female', { "name": "Female", "values": {"this": round((total_pop - total_male) / total_pop * 100, 2)}, "numerators": {"this": total_pop - total_male}, }), ('Male', { "name": "Male", "values": {"this": round(total_male / total_pop * 100, 2)}, "numerators": {"this": total_male}, }), )) add_metadata(sex_data, db_model_sex) final_data = { 'language_distribution': language_data, 'language_most_spoken': language_most_spoken, 'population_group_distribution': pop_dist_data, 'age_group_distribution': age_dist_data, 'sex_ratio': sex_data, 'total_population': { "name": "People", "values": {"this": total_pop}, } } geo = geo_data.get_geography(geo_code, geo_level) if geo.square_kms: final_data['population_density'] = { 'name': "people per square kilometre", 'values': {"this": total_pop / geo.square_kms}, } # median age/age category db_model_age = get_model_from_fields( ['age in completed years'], geo_level, table_name='ageincompletedyears' ) objects = sorted( get_objects_by_geo(db_model_age, geo_code, geo_level, session), key=lambda x: int(getattr(x, 'age in completed years')) ) # median age median = calculate_median(objects, 'age in completed years') final_data['median_age'] = { "name": "Median age", "values": {"this": median}, } # age category age_dist, _ = get_stat_data( ['age in completed years'], geo_level, geo_code, session, table_name='ageincompletedyearssimplified', key_order=['Under 18', '18 to 64', '65 and over'], recode={'< 18': 'Under 18', '>= 65': '65 and over'}) final_data['age_category_distribution'] = age_dist # citizenship citizenship_dist, _ = get_stat_data( ['citizenship'], geo_level, geo_code, session, order_by='-total') sa_citizen = citizenship_dist['Yes']['numerators']['this'] final_data['citizenship_distribution'] = citizenship_dist final_data['citizenship_south_african'] = { 'name': 'South African citizens', 'values': {'this': percent(sa_citizen, total_pop)}, 'numerators': {'this': sa_citizen}, } # migration province_of_birth_dist, _ = get_stat_data( ['province of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total') final_data['province_of_birth_distribution'] = province_of_birth_dist def region_recode(field, key): if key == 'Born in South Africa': return 'South Africa' else: return key region_of_birth_dist, _ = get_stat_data( ['region of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total', recode=region_recode) if 'South Africa' in region_of_birth_dist: born_in_sa = region_of_birth_dist['South Africa']['numerators']['this'] else: born_in_sa = 0 final_data['region_of_birth_distribution'] = region_of_birth_dist final_data['born_in_south_africa'] = { 'name': 'Born in South Africa', 'values': {'this': percent(born_in_sa, total_pop)}, 'numerators': {'this': born_in_sa}, } return final_data
def get_schools_profile(geo, session, year): # ownership status schools_dist, total_schools = get_stat_data(['ownership'], geo=geo, session=session, only={'year_of_result': [year]}) #school_dist_data, _ = get_stat_data('age in completed years',geo=geo, session=session, only={'year_of_result': [year]}) if geo.geo_level == "country": reg = 'region' elif geo.geo_level == "region": reg = 'district' elif geo.geo_level == "district": reg = 'ward' region_dist, total_schools = get_stat_data([reg], geo=geo, session=session, only={'year_of_result': [year]}) category_dist, _ = get_stat_data(['more_than_40'], geo=geo, session=session, only={'year_of_result': [year]}) gender_dist, _ = get_stat_data(['gender'], geo=geo, session=session, only={'year_of_result': [year]}) # Choosing sorting option #Sorting will only be done using national_rank all, as regional and district ranks are unknown for some result esp historical rank_column = Base.metadata.tables['secondary_school'].c.national_rank_all # Getting top for schools with more than 40 students top_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("yes%"))\ .order_by(asc(cast(rank_column, Integer)))\ .all() # Getting top for schools with less than 40 students top_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("no%"))\ .order_by(asc(cast(rank_column, Integer)))\ .all() # Getting lowest schools with more than 40 students lowest_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("yes%"))\ .order_by(desc(cast(rank_column, Integer)))\ .all() # Getting lowest for schools with less than 40 students lowest_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("no%"))\ .order_by(desc(cast(rank_column, Integer)))\ .all() # median gpa db_model_age = get_model_from_fields(['code', 'name', 'avg_gpa'], geo.geo_level) objects = get_objects_by_geo(db_model_age, geo, session, ['avg_gpa']) median = calculate_median(objects, 'avg_gpa') # gpa in 1 point groups def gpa_recode(f, x): gpa = x if gpa >= 4: return '4+' bucket = 1 * (gpa / 1) return '%d-%d' % (bucket, bucket + 2) gpa_dist_data, total_schools = get_stat_data( 'avg_gpa', geo, session, table_fields=['code', 'name', 'avg_gpa'], recode=gpa_recode, exclude=['unspecified'], only={'year_of_result': [year]}) total_private = 0.0 for data in schools_dist['Non-Government'].itervalues(): if 'numerators' in data: total_private += data['numerators']['this'] return { 'schools_distribution': schools_dist, 'region_distribution': region_dist, 'category_distribution': category_dist, 'best_schools_more_40': top_schools_40_more, 'worst_schools_more_40': lowest_schools_40_more, 'best_schools_less_40': top_schools_40_less, 'worst_schools_less_40': lowest_schools_40_less, 'gpa_group_distribution': gpa_dist_data, 'gender_distribution': gender_dist, 'total_schools': { "name": "Schools", "values": {"this": total_schools} }, 'median_gpa': { "name": "Median GPA", "values": {"this": median}, }, }