def get_demographics_profile(geo_code, geo_level, session): # sex sex_dist_data, total_pop = get_stat_data( 'sex', geo_level, geo_code, session, table_fields=['age in completed years', 'sex', 'rural or urban']) # urban/rural by sex urban_dist_data, _ = get_stat_data( ['rural or urban', 'sex'], geo_level, geo_code, session, table_fields=['age in completed years', 'sex', 'rural or urban']) total_urbanised = 0 for data in urban_dist_data['Urban'].itervalues(): if 'numerators' in data: total_urbanised += data['numerators']['this'] # median age db_model_age = get_model_from_fields(['age in completed years', 'sex', 'rural or urban'], geo_level) objects = get_objects_by_geo(db_model_age, geo_code, geo_level, session, ['age in completed years']) objects = sorted((o for o in objects if getattr(o, 'age in completed years') != 'unspecified'), key=lambda x: int(getattr(x, 'age in completed years').replace('+', ''))) median = calculate_median(objects, 'age in completed years') # age in 10 year groups def age_recode(f, x): age = int(x.replace('+', '')) if age >= 80: return '80+' bucket = 10 * (age / 10) return '%d-%d' % (bucket, bucket + 9) age_dist_data, _ = get_stat_data( 'age in completed years', geo_level, geo_code, session, table_fields=['age in completed years', 'sex', 'rural or urban'], recode=age_recode, exclude=['unspecified']) # age category def age_cat_recode(f, x): age = int(x.replace('+', '')) if age < 18: return 'Under 18' elif age >= 65: return '65 and over' else: return '18 to 64' age_cats, _ = get_stat_data( 'age in completed years', geo_level, geo_code, session, table_fields=['age in completed years', 'sex', 'rural or urban'], recode=age_cat_recode, exclude=['unspecified']) final_data = { 'sex_ratio': sex_dist_data, 'urban_distribution': urban_dist_data, 'urbanised': { 'name': 'In urban areas', 'numerators': {'this': total_urbanised}, 'values': {'this': round(total_urbanised / total_pop * 100, 2)} }, 'age_group_distribution': age_dist_data, 'age_category_distribution': age_cats, 'median_age': { "name": "Median age", "values": {"this": median}, }, 'total_population': { "name": "People", "values": {"this": total_pop} }} return final_data
def get_schools_profile(geo, session, year): schools_dist = LOCATIONNOTFOUND region_dist = LOCATIONNOTFOUND category_dist = LOCATIONNOTFOUND top_schools_40_more = [] top_schools_40_less = [] lowest_schools_40_less = [] lowest_schools_40_more = [] gpa_dist_data = [] gender_dist = [] total_schools = 0 median = 0 reg = 'region' if geo.geo_level == "country": reg = 'region' elif geo.geo_level == "region": reg = 'district' elif geo.geo_level == "district": reg = 'ward' with dataset_context(year='2017'): try: schools_dist, total_schools = get_stat_data(['ownership'], geo=geo, table_name='secondary_school', session=session, only={ 'year_of_result': [year]}) except Exception as e: pass try: region_dist, total_schools = get_stat_data([reg], geo=geo, session=session, only={ 'year_of_result': [year]}) except Exception as e: pass try: category_dist, _ = get_stat_data(['more_than_40'], geo=geo, session=session, only={'year_of_result': [year]}) except Exception as e: pass try: gender_dist, _ = get_stat_data(['gender'], geo=geo, session=session, only={'year_of_result': [year]}) except Exception: pass try: # ownership status # school_dist_data, _ = get_stat_data('age in completed years',geo=geo, session=session, only={'year_of_result': [year]}) # Choosing sorting option # Sorting will only be done using national_rank all, as regional and district ranks are unknown for some result esp historical Base.metadata.reflect() rank_column = Base.metadata.tables[ 'secondary_school'].c.national_rank_all # Getting top for schools with more than 40 students top_schools_40_more = session.query( Base.metadata.tables['secondary_school']) \ .filter(Base.metadata.tables[ 'secondary_school'].c.geo_level == geo.geo_level) \ .filter( Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code) \ .filter( Base.metadata.tables['secondary_school'].c.year_of_result == year) \ .filter( Base.metadata.tables['secondary_school'].c.more_than_40.like( "yes%")) \ .order_by(asc(cast(rank_column, Integer))) \ .all() # Getting top for schools with less than 40 students top_schools_40_less = session.query( Base.metadata.tables['secondary_school']) \ .filter(Base.metadata.tables[ 'secondary_school'].c.geo_level == geo.geo_level) \ .filter( Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code) \ .filter( Base.metadata.tables['secondary_school'].c.year_of_result == year) \ .filter( Base.metadata.tables['secondary_school'].c.more_than_40.like("no%")) \ .order_by(asc(cast(rank_column, Integer))) \ .all() # Getting lowest schools with more than 40 students lowest_schools_40_more = session.query( Base.metadata.tables['secondary_school']) \ .filter(Base.metadata.tables[ 'secondary_school'].c.geo_level == geo.geo_level) \ .filter( Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code) \ .filter( Base.metadata.tables['secondary_school'].c.year_of_result == year) \ .filter( Base.metadata.tables['secondary_school'].c.more_than_40.like( "yes%")) \ .order_by(desc(cast(rank_column, Integer))) \ .all() # Getting lowest for schools with less than 40 students lowest_schools_40_less = session.query( Base.metadata.tables['secondary_school']) \ .filter(Base.metadata.tables[ 'secondary_school'].c.geo_level == geo.geo_level) \ .filter( Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code) \ .filter( Base.metadata.tables['secondary_school'].c.year_of_result == year) \ .filter( Base.metadata.tables['secondary_school'].c.more_than_40.like("no%")) \ .order_by(desc(cast(rank_column, Integer))) \ .all() # median gpa db_model_age = get_datatable('code_name_avg_gpa') objects = db_model_age.get_rows_for_geo(geo, session) median = calculate_median(objects, 'avg_gpa') # gpa in 1 point groups def gpa_recode(f, x): gpa = x if gpa >= 4: return '4+' bucket = 1 * (gpa / 1) return '%d-%d' % (bucket, bucket + 2) gpa_dist_data, total_schools = get_stat_data( 'avg_gpa', geo, session, table_fields=['code', 'name', 'avg_gpa'], recode=gpa_recode, exclude=['unspecified'], only={'year_of_result': [year]}) total_private = 0.0 for data in schools_dist.get('Non-Government', {}).values(): if 'numerators' in data: total_private += data['numerators']['this'] except Exception as e: pass return { 'schools_distribution': schools_dist, 'region_distribution': region_dist, 'category_distribution': category_dist, 'best_schools_more_40': top_schools_40_more, 'worst_schools_more_40': lowest_schools_40_more, 'best_schools_less_40': top_schools_40_less, 'worst_schools_less_40': lowest_schools_40_less, 'gpa_group_distribution': gpa_dist_data, 'gender_distribution': gender_dist, 'total_schools': { "name": "Schools", "values": {"this": total_schools} }, 'median_gpa': { "name": "Median GPA", "values": {"this": median}, }, }
def get_demographics_profile(geo, session): # population group pop_dist_data, total_pop = get_stat_data( ['population group'], geo, session, table_dataset='Census 2011') # language language_data, _ = get_stat_data( ['language'], geo, session, order_by='-total') language_most_spoken = language_data[language_data.keys()[0]] # age groups age_dist_data, total_age = get_stat_data( ['age groups in 5 years'], geo, session, table_name='agegroupsin5years', recode=COLLAPSED_AGE_CATEGORIES, key_order=('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+')) # sex sex_data, _ = get_stat_data( ['gender'], geo, session, table_name='gender') final_data = { 'language_distribution': language_data, 'language_most_spoken': language_most_spoken, 'population_group_distribution': pop_dist_data, 'age_group_distribution': age_dist_data, 'sex_ratio': sex_data, 'total_population': { "name": "People", "values": {"this": total_pop}, } } if geo.square_kms: final_data['population_density'] = { 'name': "people per square kilometre", 'values': {"this": total_pop / geo.square_kms}, } # median age/age category db_model_age = get_model_from_fields( ['age in completed years'], geo.geo_level, table_name='ageincompletedyears' ) objects = sorted( get_objects_by_geo(db_model_age, geo, session), key=lambda x: int(getattr(x, 'age in completed years')) ) # median age median = calculate_median(objects, 'age in completed years') final_data['median_age'] = { "name": "Median age", "values": {"this": median}, } # age category age_dist, _ = get_stat_data( ['age in completed years'], geo, session, table_name='ageincompletedyearssimplified', key_order=['Under 18', '18 to 64', '65 and over'], recode={'< 18': 'Under 18', '>= 65': '65 and over'}) final_data['age_category_distribution'] = age_dist # citizenship citizenship_dist, _ = get_stat_data( ['citizenship'], geo, session, order_by='-total') sa_citizen = citizenship_dist['Yes']['numerators']['this'] final_data['citizenship_distribution'] = citizenship_dist final_data['citizenship_south_african'] = { 'name': 'South African citizens', 'values': {'this': percent(sa_citizen, total_pop)}, 'numerators': {'this': sa_citizen}, } # migration province_of_birth_dist, _ = get_stat_data( ['province of birth'], geo, session, exclude_zero=True, order_by='-total') final_data['province_of_birth_distribution'] = province_of_birth_dist def region_recode(field, key): if key == 'Born in South Africa': return 'South Africa' else: return { 'Not applicable': 'Other', }.get(key, key) region_of_birth_dist, _ = get_stat_data( ['region of birth'], geo, session, exclude_zero=True, order_by='-total', recode=region_recode) if 'South Africa' in region_of_birth_dist: born_in_sa = region_of_birth_dist['South Africa']['numerators']['this'] else: born_in_sa = 0 final_data['region_of_birth_distribution'] = region_of_birth_dist final_data['born_in_south_africa'] = { 'name': 'Born in South Africa', 'values': {'this': percent(born_in_sa, total_pop)}, 'numerators': {'this': born_in_sa}, } return final_data
def get_schools_profile(geo, session): print geo.geo_level # ownership status schools_dist, total_schools = get_stat_data(['ownership'], geo, session) # region status region_dist, total_schools = get_stat_data(['region'], geo, session) # Choosing sorting option #Sorting will only be done using national_rank all, as regional and district ranks are unknown for some result esp historical rank_column = Base.metadata.tables['secondary_school'].c.national_rank_all # Getting top for schools with more than 40 students top_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "yes")\ .order_by(asc(cast(rank_column, Integer)))\ .all() # Getting top for schools with less than 40 students top_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "no")\ .order_by(asc(cast(rank_column, Integer)))\ .all() # Getting lowest schools with more than 40 students lowest_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "yes")\ .order_by(desc(cast(rank_column, Integer)))\ .all() # Getting lowest for schools with less than 40 students lowest_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "no")\ .order_by(desc(cast(rank_column, Integer)))\ .all() # median gpa db_model_age = get_model_from_fields(['code', 'name', 'avg_gpa'], geo.geo_level) objects = get_objects_by_geo(db_model_age, geo, session, ['avg_gpa']) median = calculate_median(objects, 'avg_gpa') # gpa in 1 point groups def gpa_recode(f, x): gpa = x if gpa >= 4: return '4+' bucket = 1 * (gpa / 1) return '%d-%d' % (bucket, bucket + 2) gpa_dist_data, _ = get_stat_data( 'avg_gpa', geo, session, table_fields=['code', 'name', 'avg_gpa'], recode=gpa_recode, exclude=['unspecified']) total_private = 0.0 for data in schools_dist['PRIVATE'].itervalues(): if 'numerators' in data: total_private += data['numerators']['this'] return { 'schools_distribution': schools_dist, 'region_distribution': region_dist, 'top_schools': top_schools, 'lowest_schools': lowest_schools, 'gpa_group_distribution': gpa_dist_data, 'median_gpa': { "name": "Median GPA", "values": {"this": median}, }, }
def get_demographics_profile(geo_code, geo_level, session): # population by sex sex_dist_data, total_pop = get_stat_data( 'sex', geo_level, geo_code, session, table_fields=['disability', 'sex']) if total_pop > 0: # population by disability disability_dist_data, total_disabled = get_stat_data( 'disability', geo_level, geo_code, session, table_fields=['disability', 'sex'], recode=dict(DISABILITY_RECODES), key_order=DISABILITY_RECODES.values(), exclude=['NO_DISABILITY']) demographic_data = { 'has_data': True, 'sex_ratio': sex_dist_data, 'disability_ratio': disability_dist_data, 'total_population': { "name": "People", "values": {"this": total_pop} }, 'total_disabled': { 'name': 'People', 'values': {'this': total_disabled}, }, 'percent_disabled': { 'name': 'Are disabled', 'values': {'this': round(total_disabled / float(total_pop) * 100, 2)}, }, 'is_vdc': True } if geo_level != 'vdc': income_table = get_datatable('per_capita_income') per_capita_income, _ = income_table.get_stat_data( geo_level, geo_code, percent=False) lifeexpectancy_table = get_datatable('lifeexpectancy') life_expectancy, _ = lifeexpectancy_table.get_stat_data( geo_level, geo_code, percent=False) # population projection for 2031 pop_2031_dist_data, pop_projection_2031 = get_stat_data( 'sex', geo_level, geo_code, session, table_fields=['sex'], table_name='population_projection_2031') # poverty (UNDP and Open Nepal) poverty_dist_data, undp_survey_pop = get_stat_data( 'poverty', geo_level, geo_code, session, recode=dict(POVERTY_RECODES), key_order=POVERTY_RECODES.values()) total_in_poverty = \ poverty_dist_data['In Poverty']['numerators']['this'] # language language_data, _ = get_stat_data( ['language'], geo_level, geo_code, session, order_by='-total') language_most_spoken = language_data[language_data.keys()[0]] # caste or ethnic group caste_data, _ = get_stat_data(['caste or ethnic group'], geo_level, geo_code, session, order_by='-total') most_populous_caste = caste_data[caste_data.keys()[0]] citizenship_data, _ = get_stat_data( ['citizenship', 'sex'], geo_level, geo_code, session, order_by='-total' ) citizenship_by_sex = { 'Nepal': citizenship_data['Nepal'], 'India': citizenship_data['India'], 'China': citizenship_data['China'], 'Others': citizenship_data['Others'], 'metadata': citizenship_data['metadata'] } citizenship_distribution, _ = get_stat_data( 'citizenship', geo_level, geo_code, session, order_by='-total') # age # age in 10 year groups def age_recode(f, x): age = int(x) if age >= 80: return '80+' bucket = 10 * (age / 10) return '%d-%d' % (bucket, bucket + 9) age_dist_data, _ = get_stat_data( 'age in completed years', geo_level, geo_code, session, table_fields=['age in completed years', 'sex'], recode=age_recode, table_name='age_sex') ordered_age_dist_data = OrderedDict( sorted(age_dist_data.items(), key=lambda age_range: age_range[0]) ) # age category def age_cat_recode(f, x): age = int(x.replace('+', '')) if age < 20: return 'Under 20' elif age >= 60: return '60 and over' else: return '20 to 59' age_cats, _ = get_stat_data( 'age in completed years', geo_level, geo_code, session, table_fields=['age in completed years', 'sex'], recode=age_cat_recode, table_name='age_sex') ordered_age_cats_data = OrderedDict( [('Under 20', age_cats['Under 20']), ('20 to 59', age_cats['20 to 59']), ('60 and over', age_cats['60 and over']), ('metadata', age_cats['metadata'])] ) # median age db_model_age = get_model_from_fields( ['age in completed years', 'sex'], geo_level) objects = get_objects_by_geo(db_model_age, geo_code, geo_level, session, ['age in completed years']) objects = sorted((o for o in objects if getattr(o, 'age in completed years') != 'unspecified'), key=lambda x: int(getattr(x, 'age in completed years') .replace('+', ''))) median_age = calculate_median(objects, 'age in completed years') # add non-VDC data demographic_data['is_vdc'] = False demographic_data['per_capita_income'] = { 'name': 'Per capita income in US dollars', 'values': {'this': per_capita_income['income']['values']['this']} } demographic_data['life_expectancy'] = { 'name': 'Life expectancy in years', 'values': {'this': life_expectancy['years']['values']['this']} } demographic_data['pop_2031_dist'] = pop_2031_dist_data demographic_data['pop_projection_2031'] = { "name": "Projected in 2031", "values": {"this": pop_projection_2031} } demographic_data['poverty_dist'] = poverty_dist_data demographic_data['poverty_population'] = { 'name': 'Estimated Population', 'values': {'this': undp_survey_pop} } demographic_data['percent_impoverished'] = { 'name': 'Are in poverty', 'numerators': {'this': total_in_poverty}, 'values': { 'this': round( total_in_poverty / undp_survey_pop * 100, 2)} } demographic_data['language_distribution'] = language_data demographic_data['language_most_spoken'] = language_most_spoken demographic_data['ethnic_distribution'] = caste_data demographic_data['most_populous_caste'] = most_populous_caste demographic_data['citizenship_by_sex'] = citizenship_by_sex demographic_data['citizenship_distribution'] = citizenship_distribution demographic_data['age_group_distribution'] = ordered_age_dist_data demographic_data['age_category_distribution'] = \ ordered_age_cats_data demographic_data['median_age'] = { 'name': 'Median age', 'values': {'this': median_age}, } else: demographic_data = { 'area_has_data': False } return demographic_data
def get_demographics_profile(geo, session): # sex sex_dist_data, total_pop = get_stat_data( 'sex', geo, session, table_fields=['age in completed years', 'sex', 'rural or urban']) religion_dist_data, _ = get_stat_data( 'religion', geo, session) # urban/rural by sex urban_dist_data, _ = get_stat_data( ['rural or urban', 'sex'], geo, session, table_fields=['age in completed years', 'sex', 'rural or urban']) total_urbanised = 0 for data in urban_dist_data['Urban'].itervalues(): if 'numerators' in data: total_urbanised += data['numerators']['this'] # median age db_model_age = get_model_from_fields( ['age in completed years', 'sex', 'rural or urban'], geo.geo_level) objects = get_objects_by_geo(db_model_age, geo, session, [ 'age in completed years']) objects = sorted((o for o in objects if getattr(o, 'age in completed years') != 'unspecified'), key=lambda x: int(getattr(x, 'age in completed years').replace('+', ''))) median = calculate_median(objects, 'age in completed years') # age in 10 year groups def age_recode(f, x): age = int(x.replace('+', '')) if age >= 80: return '80+' bucket = 10 * (age / 10) return '%d-%d' % (bucket, bucket + 9) age_dist_data, _ = get_stat_data( 'age in completed years', geo, session, table_fields=['age in completed years', 'sex', 'rural or urban'], recode=age_recode, exclude=['unspecified']) # age category def age_cat_recode(f, x): age = int(x.replace('+', '')) if age < 18: return 'Under 18' elif age >= 65: return '65 and over' else: return '18 to 64' age_cats, _ = get_stat_data( 'age in completed years', geo, session, table_fields=['age in completed years', 'sex', 'rural or urban'], recode=age_cat_recode, exclude=['unspecified']) final_data = { 'sex_ratio': sex_dist_data, 'religion_ratio': religion_dist_data, 'urban_distribution': urban_dist_data, 'urbanised': { 'name': 'In urban areas', 'numerators': {'this': total_urbanised}, 'values': {'this': round(total_urbanised / total_pop * 100, 2)} }, 'age_group_distribution': age_dist_data, 'age_category_distribution': age_cats, 'median_age': { "name": "Median age", "values": {"this": median}, }, 'total_population': { "name": "People", "values": {"this": total_pop} } } return final_data
def get_demographics_profile(geo_code, geo_level, session): # population group pop_dist_data, total_pop = get_stat_data( ['population group'], geo_level, geo_code, session) # language language_data, _ = get_stat_data( ['language'], geo_level, geo_code, session, order_by='-total') language_most_spoken = language_data[language_data.keys()[0]] # age groups age_dist_data, total_age = get_stat_data( ['age groups in 5 years'], geo_level, geo_code, session, table_name='agegroupsin5years', recode=COLLAPSED_AGE_CATEGORIES, key_order=('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+')) # sex db_model_sex = get_model_from_fields(['gender'], geo_level, table_name='gender') query = session.query(func.sum(db_model_sex.total)) \ .filter(db_model_sex.gender == 'Male') query = query.filter(db_model_sex.geo_code == geo_code) total_male = query.one()[0] sex_data = OrderedDict(( # census data refers to sex as gender ('Female', { "name": "Female", "values": {"this": round((total_pop - total_male) / total_pop * 100, 2)}, "numerators": {"this": total_pop - total_male}, }), ('Male', { "name": "Male", "values": {"this": round(total_male / total_pop * 100, 2)}, "numerators": {"this": total_male}, }), )) add_metadata(sex_data, db_model_sex) final_data = { 'language_distribution': language_data, 'language_most_spoken': language_most_spoken, 'population_group_distribution': pop_dist_data, 'age_group_distribution': age_dist_data, 'sex_ratio': sex_data, 'total_population': { "name": "People", "values": {"this": total_pop}, } } geo = geo_data.get_geography(geo_code, geo_level) if geo.square_kms: final_data['population_density'] = { 'name': "people per square kilometre", 'values': {"this": total_pop / geo.square_kms}, } # median age/age category db_model_age = get_model_from_fields( ['age in completed years'], geo_level, table_name='ageincompletedyears' ) objects = sorted( get_objects_by_geo(db_model_age, geo_code, geo_level, session), key=lambda x: int(getattr(x, 'age in completed years')) ) # median age median = calculate_median(objects, 'age in completed years') final_data['median_age'] = { "name": "Median age", "values": {"this": median}, } # age category age_dist, _ = get_stat_data( ['age in completed years'], geo_level, geo_code, session, table_name='ageincompletedyearssimplified', key_order=['Under 18', '18 to 64', '65 and over'], recode={'< 18': 'Under 18', '>= 65': '65 and over'}) final_data['age_category_distribution'] = age_dist # citizenship citizenship_dist, _ = get_stat_data( ['citizenship'], geo_level, geo_code, session, order_by='-total') sa_citizen = citizenship_dist['Yes']['numerators']['this'] final_data['citizenship_distribution'] = citizenship_dist final_data['citizenship_south_african'] = { 'name': 'South African citizens', 'values': {'this': percent(sa_citizen, total_pop)}, 'numerators': {'this': sa_citizen}, } # migration province_of_birth_dist, _ = get_stat_data( ['province of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total') final_data['province_of_birth_distribution'] = province_of_birth_dist def region_recode(field, key): if key == 'Born in South Africa': return 'South Africa' else: return key region_of_birth_dist, _ = get_stat_data( ['region of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total', recode=region_recode) if 'South Africa' in region_of_birth_dist: born_in_sa = region_of_birth_dist['South Africa']['numerators']['this'] else: born_in_sa = 0 final_data['region_of_birth_distribution'] = region_of_birth_dist final_data['born_in_south_africa'] = { 'name': 'Born in South Africa', 'values': {'this': percent(born_in_sa, total_pop)}, 'numerators': {'this': born_in_sa}, } return final_data
def get_demographics_profile(geo, session): # population group pop_dist_data, total_pop = get_stat_data( ["population group"], geo, session, table_dataset="Census and Community Survey" ) # language language_data, _ = get_stat_data( ["language"], geo, session, table_dataset="Census and Community Survey", order_by="-total", ) language_most_spoken = language_data[language_data.keys()[0]] # age groups age_dist_data, total_age = get_stat_data( ["age groups in 5 years"], geo, session, table_dataset="Census and Community Survey", recode=COLLAPSED_AGE_CATEGORIES, key_order=( "0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80+", ), ) # sex sex_data, _ = get_stat_data( ["gender"], geo, session, table_universe="Population", table_dataset="Census and Community Survey", ) final_data = { "language_distribution": language_data, "language_most_spoken": language_most_spoken, "population_group_distribution": pop_dist_data, "age_group_distribution": age_dist_data, "sex_ratio": sex_data, "total_population": {"name": "People", "values": {"this": total_pop}}, } if geo.square_kms: final_data["population_density"] = { "name": "people per square kilometre", "values": {"this": total_pop / geo.square_kms}, } # median age/age category age_table = get_datatable("ageincompletedyears") objects = sorted( age_table.get_rows_for_geo(geo, session), key=lambda x: int(getattr(x, "age in completed years")), ) # median age median = calculate_median(objects, "age in completed years") final_data["median_age"] = {"name": "Median age", "values": {"this": median}} # age category age_dist, _ = get_stat_data( ["age in completed years"], geo, session, table_dataset="Census and Community Survey", table_name="ageincompletedyearssimplified", key_order=["Under 18", "18 to 64", "65 and over"], recode={"< 18": "Under 18", ">= 65": "65 and over"}, ) final_data["age_category_distribution"] = age_dist # citizenship citizenship_dist, _ = get_stat_data( ["citizenship"], geo, session, table_dataset="Census and Community Survey", order_by="-total", ) sa_citizen = citizenship_dist["Yes"]["numerators"]["this"] final_data["citizenship_distribution"] = citizenship_dist final_data["citizenship_south_african"] = { "name": "South African citizens", "values": {"this": percent(sa_citizen, total_pop)}, "numerators": {"this": sa_citizen}, } # migration province_of_birth_dist, _ = get_stat_data( ["province of birth"], geo, session, table_dataset="Census and Community Survey", exclude_zero=True, order_by="-total", ) final_data["province_of_birth_distribution"] = province_of_birth_dist def region_recode(field, key): if key == "Born in South Africa": return "South Africa" else: return {"Not applicable": "Other"}.get(key, key) region_of_birth_dist, _ = get_stat_data( ["region of birth"], geo, session, table_dataset="Census and Community Survey", exclude_zero=True, order_by="-total", recode=region_recode, ) if "South Africa" in region_of_birth_dist: born_in_sa = region_of_birth_dist["South Africa"]["numerators"]["this"] else: born_in_sa = 0 final_data["region_of_birth_distribution"] = region_of_birth_dist final_data["born_in_south_africa"] = { "name": "Born in South Africa", "values": {"this": percent(born_in_sa, total_pop)}, "numerators": {"this": born_in_sa}, } return final_data
def get_schools_profile(geo, session, year): # ownership status schools_dist, total_schools = get_stat_data(['ownership'], geo=geo, session=session, only={'year_of_result': [year]}) #school_dist_data, _ = get_stat_data('age in completed years',geo=geo, session=session, only={'year_of_result': [year]}) if geo.geo_level == "country": reg = 'region' elif geo.geo_level == "region": reg = 'district' elif geo.geo_level == "district": reg = 'ward' region_dist, total_schools = get_stat_data([reg], geo=geo, session=session, only={'year_of_result': [year]}) category_dist, _ = get_stat_data(['more_than_40'], geo=geo, session=session, only={'year_of_result': [year]}) gender_dist, _ = get_stat_data(['gender'], geo=geo, session=session, only={'year_of_result': [year]}) # Choosing sorting option #Sorting will only be done using national_rank all, as regional and district ranks are unknown for some result esp historical rank_column = Base.metadata.tables['secondary_school'].c.national_rank_all # Getting top for schools with more than 40 students top_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("yes%"))\ .order_by(asc(cast(rank_column, Integer)))\ .all() # Getting top for schools with less than 40 students top_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("no%"))\ .order_by(asc(cast(rank_column, Integer)))\ .all() # Getting lowest schools with more than 40 students lowest_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("yes%"))\ .order_by(desc(cast(rank_column, Integer)))\ .all() # Getting lowest for schools with less than 40 students lowest_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\ .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\ .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\ .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\ .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("no%"))\ .order_by(desc(cast(rank_column, Integer)))\ .all() # median gpa db_model_age = get_model_from_fields(['code', 'name', 'avg_gpa'], geo.geo_level) objects = get_objects_by_geo(db_model_age, geo, session, ['avg_gpa']) median = calculate_median(objects, 'avg_gpa') # gpa in 1 point groups def gpa_recode(f, x): gpa = x if gpa >= 4: return '4+' bucket = 1 * (gpa / 1) return '%d-%d' % (bucket, bucket + 2) gpa_dist_data, total_schools = get_stat_data( 'avg_gpa', geo, session, table_fields=['code', 'name', 'avg_gpa'], recode=gpa_recode, exclude=['unspecified'], only={'year_of_result': [year]}) total_private = 0.0 for data in schools_dist['Non-Government'].itervalues(): if 'numerators' in data: total_private += data['numerators']['this'] return { 'schools_distribution': schools_dist, 'region_distribution': region_dist, 'category_distribution': category_dist, 'best_schools_more_40': top_schools_40_more, 'worst_schools_more_40': lowest_schools_40_more, 'best_schools_less_40': top_schools_40_less, 'worst_schools_less_40': lowest_schools_40_less, 'gpa_group_distribution': gpa_dist_data, 'gender_distribution': gender_dist, 'total_schools': { "name": "Schools", "values": {"this": total_schools} }, 'median_gpa': { "name": "Median GPA", "values": {"this": median}, }, }