Пример #1
0
def get_households_profile(geo, session):
    # head of household
    # gender
    head_gender_dist, total_households = get_stat_data(
        ['gender of household head'], geo, session,
        order_by='gender of household head')
    female_heads = head_gender_dist['Female']['numerators']['this']

    # age
    db_model_u18 = get_model_from_fields(
        ['gender of head of household'], geo.geo_level,
        table_name='genderofheadofhouseholdunder18'
    )
    objects = get_objects_by_geo(db_model_u18, geo, session)
    total_under_18 = float(sum(o[0] for o in objects))

    # type of dwelling
    type_of_dwelling_dist, _ = get_stat_data(
        ['type of dwelling'], geo, session,
        recode=TYPE_OF_DWELLING_RECODE,
        order_by='-total')
    informal = type_of_dwelling_dist['Shack']['numerators']['this']

    _, total_ecd_children = get_stat_data(
        ['age in completed years'], geo, session,
        table_name='ageincompletedyears',
        only=['0', '1', '2', '3', '4', '5'])

    ecd_children_per_household = ratio(total_ecd_children, total_households)

    return {
        'total_households': {
            'name': 'Households',
            'values': {'this': total_households},
        },
        'type_of_dwelling_distribution': type_of_dwelling_dist,
        'informal': {
            'name': 'Households that are informal dwellings (shacks)',
            'values': {'this': percent(informal, total_households)},
            'numerators': {'this': informal},
        },
        'head_of_household': {
            'gender_distribution': head_gender_dist,
            'female': {
                'name': 'Households with women as their head',
                'values': {'this': percent(female_heads, total_households)},
                'numerators': {'this': female_heads},
            },
            'under_18': {
                'name': 'Households with heads under 18 years old',
                'values': {'this': total_under_18},
            }
        },
        'ecd_children_per_household': {
            'name': 'Average number of children (aged 0-5) in each household',
            'values': {'this': ecd_children_per_household},
        },
    }
Пример #2
0
def get_education_profile(geo, session):
    db_model = get_model_from_fields(['highest educational level'], geo.geo_level, table_name='highesteducationallevel20')
    objects = get_objects_by_geo(db_model, geo, session)

    edu_dist_data = {}
    get_or_higher = 0.0
    fet_or_higher = 0.0
    total = 0.0
    for i, obj in enumerate(objects):
        category_val = getattr(obj, 'highest educational level')
        # increment counters
        total += obj.total
        if category_val in EDUCATION_GET_OR_HIGHER:
            get_or_higher += obj.total
            if category_val in EDUCATION_FET_OR_HIGHER:
                fet_or_higher += obj.total
        # add data points for category
        edu_dist_data[str(i)] = {
            "name": category_val,
            "numerators": {"this": obj.total},
        }
    edu_dist_data = collapse_categories(edu_dist_data,
                                        COLLAPSED_EDUCATION_CATEGORIES,
                                        key_order=EDUCATION_KEY_ORDER)
    edu_split_data = {
        'percent_get_or_higher': {
            "name": "Completed Grade 9 or higher",
            "numerators": {"this": get_or_higher},
        },
        'percent_fet_or_higher': {
            "name": "Completed Matric or higher",
            "numerators": {"this": fet_or_higher},
        }
    }
    # calculate percentages
    for data in (edu_dist_data, edu_split_data):
        for fields in data.values():
            fields["values"] = {"this": round(fields["numerators"]["this"]
                                              / total * 100, 2)}

    edu_dist_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}
    edu_split_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}

    add_metadata(edu_dist_data, db_model)

    return {'educational_attainment_distribution': edu_dist_data,
            'educational_attainment': edu_split_data}
Пример #3
0
def get_education_profile(geo_code, geo_level, session):
    db_model = get_model_from_fields(['highest educational level'], geo_level, table_name='highesteducationallevel20')
    objects = get_objects_by_geo(db_model, geo_code, geo_level, session)

    edu_dist_data = {}
    get_or_higher = 0.0
    fet_or_higher = 0.0
    total = 0.0
    for i, obj in enumerate(objects):
        category_val = getattr(obj, 'highest educational level')
        # increment counters
        total += obj.total
        if category_val in EDUCATION_GET_OR_HIGHER:
            get_or_higher += obj.total
            if category_val in EDUCATION_FET_OR_HIGHER:
                fet_or_higher += obj.total
        # add data points for category
        edu_dist_data[str(i)] = {
            "name": category_val,
            "numerators": {"this": obj.total},
        }
    edu_dist_data = collapse_categories(edu_dist_data,
                                        COLLAPSED_EDUCATION_CATEGORIES,
                                        key_order=EDUCATION_KEY_ORDER)
    edu_split_data = {
        'percent_get_or_higher': {
            "name": "Completed Grade 9 or higher",
            "numerators": {"this": get_or_higher},
        },
        'percent_fet_or_higher': {
            "name": "Completed Matric or higher",
            "numerators": {"this": fet_or_higher},
        }
    }
    # calculate percentages
    for data in (edu_dist_data, edu_split_data):
        for fields in data.values():
            fields["values"] = {"this": round(fields["numerators"]["this"]
                                              / total * 100, 2)}

    edu_dist_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}
    edu_split_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}

    add_metadata(edu_dist_data, db_model)

    return {'educational_attainment_distribution': edu_dist_data,
            'educational_attainment': edu_split_data}
Пример #4
0
def get_service_delivery_profile(geo, session):
    # water source
    water_src_data, total_wsrc = get_stat_data(
        ['source of water'],
        geo,
        session,
        recode=SHORT_WATER_SOURCE_CATEGORIES,
        order_by='-total')
    if 'Service provider' in water_src_data:
        total_water_sp = water_src_data['Service provider']['numerators'][
            'this']
    else:
        total_water_sp = 0.0

    # electricity
    elec_attrs = [
        'electricity for cooking', 'electricity for heating',
        'electricity for lighting'
    ]
    db_model_elec = get_model_from_fields(elec_attrs, geo.geo_level)
    objects = get_objects_by_geo(db_model_elec, geo, session)
    total_elec = 0.0
    total_some_elec = 0.0
    elec_access_data = {
        'total_all_elec': {
            "name": "Have electricity for everything",
            "numerators": {
                "this": 0.0
            },
        },
        'total_some_not_all_elec': {
            "name": "Have electricity for some things",
            "numerators": {
                "this": 0.0
            },
        },
        'total_no_elec': {
            "name": "No electricity",
            "numerators": {
                "this": 0.0
            },
        }
    }
    for obj in objects:
        total_elec += obj.total
        has_some = False
        has_all = True
        for attr in elec_attrs:
            val = not getattr(obj, attr).startswith('no ')
            has_all = has_all and val
            has_some = has_some or val
        if has_some:
            total_some_elec += obj.total
        if has_all:
            elec_access_data['total_all_elec']['numerators'][
                'this'] += obj.total
        elif has_some:
            elec_access_data['total_some_not_all_elec']['numerators'][
                'this'] += obj.total
        else:
            elec_access_data['total_no_elec']['numerators'][
                'this'] += obj.total

    for data, total in zip((elec_access_data, ), (total_elec, )):
        for fields in data.values():
            fields["values"] = {
                "this": percent(fields["numerators"]["this"], total)
            }

    add_metadata(elec_access_data, db_model_elec)

    # toilets
    toilet_data, total_toilet = get_stat_data(
        ['toilet facilities'],
        geo,
        session,
        exclude_zero=True,
        recode=COLLAPSED_TOILET_CATEGORIES,
        order_by='-total')

    total_flush_toilet = 0.0
    total_no_toilet = 0.0
    for key, data in toilet_data.iteritems():
        if key.startswith('Flush') or key.startswith('Chemical'):
            total_flush_toilet += data['numerators']['this']
        if key == 'None':
            total_no_toilet += data['numerators']['this']

    return {
        'water_source_distribution': water_src_data,
        'percentage_water_from_service_provider': {
            "name":
            "Are getting water from a regional or local service provider",
            "numerators": {
                "this": total_water_sp
            },
            "values": {
                "this": percent(total_water_sp, total_wsrc)
            },
        },
        'percentage_electricity_access': {
            "name":
            "Have electricity for at least one of cooking, heating or lighting",
            "numerators": {
                "this": total_some_elec
            },
            "values": {
                "this": percent(total_some_elec, total_elec)
            },
        },
        'electricity_access_distribution': elec_access_data,
        'percentage_flush_toilet_access': {
            "name": "Have access to flush or chemical toilets",
            "numerators": {
                "this": total_flush_toilet
            },
            "values": {
                "this": percent(total_flush_toilet, total_toilet)
            },
        },
        'percentage_no_toilet_access': {
            "name": "Have no access to any toilets",
            "numerators": {
                "this": total_no_toilet
            },
            "values": {
                "this": percent(total_no_toilet, total_toilet)
            },
        },
        'toilet_facilities_distribution': toilet_data,
    }
Пример #5
0
def get_households_profile(geo, session):
    # head of household
    # gender
    head_gender_dist, total_households = get_stat_data(
        ['gender of household head'],
        geo,
        session,
        order_by='gender of household head')
    female_heads = head_gender_dist['Female']['numerators']['this']

    # age
    db_model_u18 = get_model_from_fields(
        ['gender of head of household'],
        geo.geo_level,
        table_name='genderofheadofhouseholdunder18')
    objects = get_objects_by_geo(db_model_u18, geo, session)
    total_under_18 = float(sum(o[0] for o in objects))

    # type of dwelling
    type_of_dwelling_dist, _ = get_stat_data(['type of dwelling'],
                                             geo,
                                             session,
                                             recode=TYPE_OF_DWELLING_RECODE,
                                             order_by='-total')
    informal = type_of_dwelling_dist['Shack']['numerators']['this']

    _, total_ecd_children = get_stat_data(['age in completed years'],
                                          geo,
                                          session,
                                          table_name='ageincompletedyears',
                                          only=['0', '1', '2', '3', '4', '5'])

    ecd_children_per_household = ratio(total_ecd_children, total_households)

    return {
        'total_households': {
            'name': 'Households',
            'values': {
                'this': total_households
            },
        },
        'type_of_dwelling_distribution': type_of_dwelling_dist,
        'informal': {
            'name': 'Households that are informal dwellings (shacks)',
            'values': {
                'this': percent(informal, total_households)
            },
            'numerators': {
                'this': informal
            },
        },
        'head_of_household': {
            'gender_distribution': head_gender_dist,
            'female': {
                'name': 'Households with women as their head',
                'values': {
                    'this': percent(female_heads, total_households)
                },
                'numerators': {
                    'this': female_heads
                },
            },
            'under_18': {
                'name': 'Households with heads under 18 years old',
                'values': {
                    'this': total_under_18
                },
            }
        },
        'ecd_children_per_household': {
            'name': 'Average number of children (aged 0-5) in each household',
            'values': {
                'this': ecd_children_per_household
            },
        },
    }
Пример #6
0
def get_demographics_profile(geo_code, geo_level, session):
    # sex
    sex_dist_data, total_pop = get_stat_data(
        'sex', geo_level, geo_code, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'])

    # urban/rural by sex
    urban_dist_data, _ = get_stat_data(
        ['rural or urban', 'sex'], geo_level, geo_code, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'])
    total_urbanised = 0
    for data in urban_dist_data['Urban'].itervalues():
        if 'numerators' in data:
            total_urbanised += data['numerators']['this']

    # median age
    db_model_age = get_model_from_fields(['age in completed years', 'sex', 'rural or urban'], geo_level)
    objects = get_objects_by_geo(db_model_age, geo_code, geo_level, session, ['age in completed years'])
    objects = sorted((o for o in objects if getattr(o, 'age in completed years') != 'unspecified'),
                     key=lambda x: int(getattr(x, 'age in completed years').replace('+', '')))
    median = calculate_median(objects, 'age in completed years')

    # age in 10 year groups
    def age_recode(f, x):
        age = int(x.replace('+', ''))
        if age >= 80:
            return '80+'
        bucket = 10 * (age / 10)
        return '%d-%d' % (bucket, bucket + 9)

    age_dist_data, _ = get_stat_data(
        'age in completed years', geo_level, geo_code, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'],
        recode=age_recode, exclude=['unspecified'])

    # age category
    def age_cat_recode(f, x):
        age = int(x.replace('+', ''))
        if age < 18:
            return 'Under 18'
        elif age >= 65:
            return '65 and over'
        else:
            return '18 to 64'

    age_cats, _ = get_stat_data(
        'age in completed years', geo_level, geo_code, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'],
        recode=age_cat_recode,
        exclude=['unspecified'])

    final_data = {
        'sex_ratio': sex_dist_data,
        'urban_distribution': urban_dist_data,
        'urbanised': {
            'name': 'In urban areas',
            'numerators': {'this': total_urbanised},
            'values': {'this': round(total_urbanised / total_pop * 100, 2)}
        },
        'age_group_distribution': age_dist_data,
        'age_category_distribution': age_cats,
        'median_age': {
            "name": "Median age",
            "values": {"this": median},
        },
        'total_population': {
            "name": "People",
            "values": {"this": total_pop}
        }}

    return final_data
Пример #7
0
def get_service_delivery_profile(geo, session):
    # water source
    water_src_data, total_wsrc = get_stat_data(
        ['source of water'], geo, session,
        recode=SHORT_WATER_SOURCE_CATEGORIES,
        order_by='-total')
    if 'Service provider' in water_src_data:
        total_water_sp = water_src_data['Service provider']['numerators']['this']
    else:
        total_water_sp = 0.0

    # electricity
    elec_attrs = ['electricity for cooking',
                  'electricity for heating',
                  'electricity for lighting']
    db_model_elec = get_model_from_fields(elec_attrs, geo.geo_level)
    objects = get_objects_by_geo(db_model_elec, geo, session)
    total_elec = 0.0
    total_some_elec = 0.0
    elec_access_data = {
        'total_all_elec': {
            "name": "Have electricity for everything",
            "numerators": {"this": 0.0},
        },
        'total_some_not_all_elec': {
            "name": "Have electricity for some things",
            "numerators": {"this": 0.0},
        },
        'total_no_elec': {
            "name": "No electricity",
            "numerators": {"this": 0.0},
        }
    }
    for obj in objects:
        total_elec += obj.total
        has_some = False
        has_all = True
        for attr in elec_attrs:
            val = not getattr(obj, attr).startswith('no ')
            has_all = has_all and val
            has_some = has_some or val
        if has_some:
            total_some_elec += obj.total
        if has_all:
            elec_access_data['total_all_elec']['numerators']['this'] += obj.total
        elif has_some:
            elec_access_data['total_some_not_all_elec']['numerators']['this'] += obj.total
        else:
            elec_access_data['total_no_elec']['numerators']['this'] += obj.total

    for data, total in zip((elec_access_data,), (total_elec,)):
        for fields in data.values():
            fields["values"] = {"this": percent(fields["numerators"]["this"], total)}

    add_metadata(elec_access_data, db_model_elec)

    # toilets
    toilet_data, total_toilet = get_stat_data(
        ['toilet facilities'], geo, session,
        exclude_zero=True,
        recode=COLLAPSED_TOILET_CATEGORIES,
        order_by='-total')

    total_flush_toilet = 0.0
    total_no_toilet = 0.0
    for key, data in toilet_data.iteritems():
        if key.startswith('Flush') or key.startswith('Chemical'):
            total_flush_toilet += data['numerators']['this']
        if key == 'None':
            total_no_toilet += data['numerators']['this']

    return {
        'water_source_distribution': water_src_data,
        'percentage_water_from_service_provider': {
            "name": "Are getting water from a regional or local service provider",
            "numerators": {"this": total_water_sp},
            "values": {"this": percent(total_water_sp, total_wsrc)},
        },
        'percentage_electricity_access': {
            "name": "Have electricity for at least one of cooking, heating or lighting",
            "numerators": {"this": total_some_elec},
            "values": {"this": percent(total_some_elec, total_elec)},
        },
        'electricity_access_distribution': elec_access_data,
        'percentage_flush_toilet_access': {
            "name": "Have access to flush or chemical toilets",
            "numerators": {"this": total_flush_toilet},
            "values": {"this": percent(total_flush_toilet, total_toilet)},
        },
        'percentage_no_toilet_access': {
            "name": "Have no access to any toilets",
            "numerators": {"this": total_no_toilet},
            "values": {"this": percent(total_no_toilet, total_toilet)},
        },
        'toilet_facilities_distribution': toilet_data,
    }
Пример #8
0
def get_schools_profile(geo, session):
    print geo.geo_level
    # ownership status
    schools_dist, total_schools = get_stat_data(['ownership'], geo, session)

    # region status
    region_dist, total_schools = get_stat_data(['region'], geo, session)

    # Choosing sorting option
    #Sorting will only be done using national_rank all, as regional and district ranks are unknown for some result esp historical
    rank_column = Base.metadata.tables['secondary_school'].c.national_rank_all

    # Getting top for schools with more than 40 students
    top_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "yes")\
                    .order_by(asc(cast(rank_column, Integer)))\
                    .all()
    # Getting top for schools with less than 40 students
    top_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "no")\
                    .order_by(asc(cast(rank_column, Integer)))\
                    .all()

    # Getting lowest schools with more than 40 students
    lowest_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "yes")\
                    .order_by(desc(cast(rank_column, Integer)))\
                    .all()
    # Getting lowest for schools with less than 40 students
    lowest_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "no")\
                    .order_by(desc(cast(rank_column, Integer)))\
                    .all()

    # median gpa
    db_model_age = get_model_from_fields(['code', 'name', 'avg_gpa'], geo.geo_level)
    objects = get_objects_by_geo(db_model_age, geo, session, ['avg_gpa'])
    median = calculate_median(objects, 'avg_gpa')

    # gpa in 1 point groups
    def gpa_recode(f, x):
        gpa = x
        if gpa >= 4:
            return '4+'
        bucket = 1 * (gpa / 1)
        return '%d-%d' % (bucket, bucket + 2)

    gpa_dist_data, _ = get_stat_data(
        'avg_gpa', geo, session,
        table_fields=['code', 'name', 'avg_gpa'],
        recode=gpa_recode, exclude=['unspecified'])

    total_private = 0.0
    for data in schools_dist['PRIVATE'].itervalues():
        if 'numerators' in data:
            total_private += data['numerators']['this']

    return {
        'schools_distribution': schools_dist,
        'region_distribution': region_dist,
        'top_schools': top_schools,
        'lowest_schools': lowest_schools,
        'gpa_group_distribution': gpa_dist_data,
        'median_gpa': {
            "name": "Median GPA",
            "values": {"this": median},
        },
    }
Пример #9
0
def get_service_delivery_profile(geo, session):
    # water source
    water_src_data, total_wsrc = get_stat_data(
            ['source of water'], geo, session,
            recode=SHORT_WATER_SOURCE_CATEGORIES,
            order_by='-total')
    if 'Service provider' in water_src_data:
        total_water_sp = water_src_data['Service provider']['numerators']['this']
    else:
        total_water_sp = 0.0

    # refuse disposal
    db_model_ref = get_model_from_fields(['refuse disposal'], geo.geo_level)
    objects = get_objects_by_geo(db_model_ref, geo, session, order_by='-total')
    refuse_disp_data = OrderedDict()
    total_ref = 0.0
    total_ref_sp = 0.0
    for obj in objects:
        attr = getattr(obj, 'refuse disposal')
        disp = SHORT_REFUSE_DISPOSAL_CATEGORIES[attr]
        refuse_disp_data[disp] = {
            "name": disp,
            "numerators": {"this": obj.total},
        }
        total_ref += obj.total
        if attr.startswith('Removed by local authority'):
            total_ref_sp += obj.total
    set_percent_values(refuse_disp_data, total_ref)
    add_metadata(refuse_disp_data, db_model_ref)

    # electricity
    if geo.version == '2011':
        elec_attrs = ['electricity for cooking',
                      'electricity for heating',
                      'electricity for lighting']
        db_model_elec = get_model_from_fields(elec_attrs, geo.geo_level)
        objects = get_objects_by_geo(db_model_elec, geo, session)
        total_elec = 0.0
        total_some_elec = 0.0
        elec_access_data = {
            'total_all_elec': {
                "name": "Have electricity for everything",
                "numerators": {"this": 0.0},
            },
            'total_some_not_all_elec': {
                "name": "Have electricity for some things",
                "numerators": {"this": 0.0},
            },
            'total_no_elec': {
                "name": "No electricity",
                "numerators": {"this": 0.0},
            }
        }
        for obj in objects:
            total_elec += obj.total
            has_some = False
            has_all = True
            for attr in elec_attrs:
                val = not getattr(obj, attr).startswith('no ')
                has_all = has_all and val
                has_some = has_some or val
            if has_some:
                total_some_elec += obj.total
            if has_all:
                elec_access_data['total_all_elec']['numerators']['this'] += obj.total
            elif has_some:
                elec_access_data['total_some_not_all_elec']['numerators']['this'] += obj.total
            else:
                elec_access_data['total_no_elec']['numerators']['this'] += obj.total
        set_percent_values(elec_access_data, total_elec)
        add_metadata(elec_access_data, db_model_elec)

    # toilets
    toilet_data, total_toilet = get_stat_data(
            ['toilet facilities'], geo, session,
            exclude_zero=True,
            recode=COLLAPSED_TOILET_CATEGORIES,
            order_by='-total')

    total_flush_toilet = 0.0
    total_no_toilet = 0.0
    for key, data in toilet_data.iteritems():
        if key.startswith('Flush') or key.startswith('Chemical'):
            total_flush_toilet += data['numerators']['this']
        if key == 'None':
            total_no_toilet += data['numerators']['this']

    profile = {
        'water_source_distribution': water_src_data,
        'percentage_water_from_service_provider': {
            "name": "Are getting water from a regional or local service provider",
            "numerators": {"this": total_water_sp},
            "values": {"this": percent(total_water_sp, total_wsrc)},
        },
        'refuse_disposal_distribution': refuse_disp_data,
        'percentage_ref_disp_from_service_provider': {
            "name": "Are getting refuse disposal from a local authority or private company",
            "numerators": {"this": total_ref_sp},
            "values": {"this": percent(total_ref_sp, total_ref)},
        },
        'percentage_flush_toilet_access': {
            "name": "Have access to flush or chemical toilets",
            "numerators": {"this": total_flush_toilet},
            "values": {"this": percent(total_flush_toilet, total_toilet)},
        },
        'percentage_no_toilet_access': {
            "name": "Have no access to any toilets",
            "numerators": {"this": total_no_toilet},
            "values": {"this": percent(total_no_toilet, total_toilet)},
        },
        'toilet_facilities_distribution': toilet_data,
    }
    if geo.version == '2011':
        profile.update({
            'percentage_electricity_access': {
                "name": "Have electricity for at least one of cooking, heating or lighting",
                "numerators": {"this": total_some_elec},
                "values": {"this": percent(total_some_elec, total_elec)},
            },
            'electricity_access_distribution': elec_access_data,
        })
    return profile
Пример #10
0
def get_households_profile(geo, session):
    # head of household
    # gender
    head_gender_dist, total_households = get_stat_data(
            ['gender of household head'], geo, session,
            order_by='gender of household head')
    female_heads = head_gender_dist['Female']['numerators']['this']

    # age
    db_model_u18 = get_model_from_fields(
        ['gender of head of household'], geo.geo_level,
        table_name='genderofheadofhouseholdunder18'
    )
    objects = get_objects_by_geo(db_model_u18, geo, session)
    total_under_18 = float(sum(o[0] for o in objects))

    # tenure
    tenure_data, _ = get_stat_data(
            ['tenure status'], geo, session,
            recode=HOUSEHOLD_OWNERSHIP_RECODE,
            order_by='tenure status')
    owned = 0
    for key, data in tenure_data.iteritems():
        if key.startswith('Owned'):
            owned += data['numerators']['this']

    # annual household income
    if geo.version == '2011':
        HOUSEHOLD_INCOME_RECODE = HOUSEHOLD_INCOME_RECODE_2011
    else:
        HOUSEHOLD_INCOME_RECODE = COLLAPSED_ANNUAL_INCOME_CATEGORIES
    income_dist_data, _ = get_stat_data(
            ['annual household income'], geo, session,
            exclude=['Unspecified', 'Not applicable'],
            recode=HOUSEHOLD_INCOME_RECODE,
            key_order=HOUSEHOLD_INCOME_RECODE.values(),
            table_name='annualhouseholdincome_genderofhouseholdhead')

    # median income
    median = calculate_median_stat(income_dist_data)
    median_income = HOUSEHOLD_INCOME_ESTIMATE[median]

    # type of dwelling
    type_of_dwelling_dist, _ = get_stat_data(
            ['type of dwelling'], geo, session,
            recode=TYPE_OF_DWELLING_RECODE,
            order_by='-total')
    informal = type_of_dwelling_dist['Shack']['numerators']['this']

    # household goods
    household_goods, _ = get_stat_data(
            ['household goods'], geo, session,
            recode=HOUSEHOLD_GOODS_RECODE,
            key_order=sorted(HOUSEHOLD_GOODS_RECODE.values()))

    return {'total_households': {
                'name': 'Households',
                'values': {'this': total_households},
                },
            'owned': {
                'name': 'Households fully owned or being paid off',
                'values': {'this': percent(owned, total_households)},
                'numerators': {'this': owned},
                },
            'type_of_dwelling_distribution': type_of_dwelling_dist,
            'informal': {
                'name': 'Households that are informal dwellings (shacks)',
                'values': {'this': percent(informal, total_households)},
                'numerators': {'this': informal},
                },
            'tenure_distribution': tenure_data,
            'household_goods': household_goods,
            'annual_income_distribution': income_dist_data,
            'median_annual_income': {
                'name': 'Average annual household income',
                'values': {'this': median_income},
                },
            'head_of_household': {
                'gender_distribution': head_gender_dist,
                'female': {
                    'name': 'Households with women as their head',
                    'values': {'this': percent(female_heads, total_households)},
                    'numerators': {'this': female_heads},
                    },
                'under_18': {
                    'name': 'Households with heads under 18 years old',
                    'values': {'this': total_under_18},
                    }
                },
           }
Пример #11
0
def get_demographics_profile(geo, session):
    # population group
    pop_dist_data, total_pop = get_stat_data(
            ['population group'], geo, session, table_dataset='Census 2011')

    # language
    language_data, _ = get_stat_data(
            ['language'], geo, session, order_by='-total')
    language_most_spoken = language_data[language_data.keys()[0]]

    # age groups
    age_dist_data, total_age = get_stat_data(
            ['age groups in 5 years'], geo, session,
            table_name='agegroupsin5years',
            recode=COLLAPSED_AGE_CATEGORIES,
            key_order=('0-9', '10-19',
                       '20-29', '30-39',
                       '40-49', '50-59',
                       '60-69', '70-79',
                       '80+'))

    # sex
    sex_data, _ = get_stat_data(
            ['gender'], geo, session, table_name='gender')

    final_data = {
        'language_distribution': language_data,
        'language_most_spoken': language_most_spoken,
        'population_group_distribution': pop_dist_data,
        'age_group_distribution': age_dist_data,
        'sex_ratio': sex_data,
        'total_population': {
            "name": "People",
            "values": {"this": total_pop},
        }
    }

    if geo.square_kms:
        final_data['population_density'] = {
            'name': "people per square kilometre",
            'values': {"this": total_pop / geo.square_kms},
        }

    # median age/age category
    db_model_age = get_model_from_fields(
        ['age in completed years'], geo.geo_level,
        table_name='ageincompletedyears'
    )
    objects = sorted(
        get_objects_by_geo(db_model_age, geo, session),
        key=lambda x: int(getattr(x, 'age in completed years'))
    )
    # median age
    median = calculate_median(objects, 'age in completed years')
    final_data['median_age'] = {
        "name": "Median age",
        "values": {"this": median},
    }

    # age category
    age_dist, _ = get_stat_data(
        ['age in completed years'], geo, session,
        table_name='ageincompletedyearssimplified',
        key_order=['Under 18', '18 to 64', '65 and over'],
        recode={'< 18': 'Under 18',
                '>= 65': '65 and over'})
    final_data['age_category_distribution'] = age_dist

    # citizenship
    citizenship_dist, _ = get_stat_data(
            ['citizenship'], geo, session,
            order_by='-total')

    sa_citizen = citizenship_dist['Yes']['numerators']['this']

    final_data['citizenship_distribution'] = citizenship_dist
    final_data['citizenship_south_african'] = {
            'name': 'South African citizens',
            'values': {'this': percent(sa_citizen, total_pop)},
            'numerators': {'this': sa_citizen},
            }

    # migration
    province_of_birth_dist, _ = get_stat_data(
            ['province of birth'], geo, session,
            exclude_zero=True, order_by='-total')

    final_data['province_of_birth_distribution'] = province_of_birth_dist

    def region_recode(field, key):
        if key == 'Born in South Africa':
            return 'South Africa'
        else:
            return {
                'Not applicable': 'Other',
                }.get(key, key)

    region_of_birth_dist, _ = get_stat_data(
            ['region of birth'], geo, session,
            exclude_zero=True, order_by='-total',
            recode=region_recode)

    if 'South Africa' in region_of_birth_dist:
        born_in_sa = region_of_birth_dist['South Africa']['numerators']['this']
    else:
        born_in_sa = 0

    final_data['region_of_birth_distribution'] = region_of_birth_dist
    final_data['born_in_south_africa'] = {
            'name': 'Born in South Africa',
            'values': {'this': percent(born_in_sa, total_pop)},
            'numerators': {'this': born_in_sa},
            }

    return final_data
Пример #12
0
def get_demographics_profile(geo_code, geo_level, session):
    # population by sex
    sex_dist_data, total_pop = get_stat_data(
        'sex', geo_level, geo_code, session,
        table_fields=['disability', 'sex'])

    if total_pop > 0:
        # population by disability
        disability_dist_data, total_disabled = get_stat_data(
            'disability', geo_level, geo_code, session,
            table_fields=['disability', 'sex'],
            recode=dict(DISABILITY_RECODES),
            key_order=DISABILITY_RECODES.values(),
            exclude=['NO_DISABILITY'])

        demographic_data = {
            'has_data': True,
            'sex_ratio': sex_dist_data,
            'disability_ratio': disability_dist_data,
            'total_population': {
                "name": "People",
                "values": {"this": total_pop}
            },
            'total_disabled': {
                'name': 'People',
                'values':
                    {'this': total_disabled},
            },
            'percent_disabled': {
                'name': 'Are disabled',
                'values':
                    {'this': round(total_disabled / float(total_pop) * 100, 2)},
            },
            'is_vdc': True
        }

        if geo_level != 'vdc':

            income_table = get_datatable('per_capita_income')
            per_capita_income, _ = income_table.get_stat_data(
                geo_level, geo_code, percent=False)

            lifeexpectancy_table = get_datatable('lifeexpectancy')
            life_expectancy, _ = lifeexpectancy_table.get_stat_data(
                geo_level, geo_code, percent=False)

            # population projection for 2031
            pop_2031_dist_data, pop_projection_2031 = get_stat_data(
                'sex', geo_level, geo_code, session,
                table_fields=['sex'],
                table_name='population_projection_2031')

            # poverty (UNDP and Open Nepal)
            poverty_dist_data, undp_survey_pop = get_stat_data(
                'poverty', geo_level, geo_code, session,
                recode=dict(POVERTY_RECODES),
                key_order=POVERTY_RECODES.values())

            total_in_poverty = \
                poverty_dist_data['In Poverty']['numerators']['this']

            # language
            language_data, _ = get_stat_data(
                ['language'], geo_level, geo_code, session, order_by='-total')
            language_most_spoken = language_data[language_data.keys()[0]]

            # caste or ethnic group
            caste_data, _ = get_stat_data(['caste or ethnic group'], geo_level,
                                          geo_code, session, order_by='-total')
            most_populous_caste = caste_data[caste_data.keys()[0]]

            citizenship_data, _ = get_stat_data(
                            ['citizenship', 'sex'], geo_level,
                            geo_code, session, order_by='-total'
            )
            citizenship_by_sex = {
                'Nepal': citizenship_data['Nepal'],
                'India': citizenship_data['India'],
                'China': citizenship_data['China'],
                'Others': citizenship_data['Others'],
                'metadata': citizenship_data['metadata']
            }
            citizenship_distribution, _ = get_stat_data(
                'citizenship', geo_level, geo_code, session,
                order_by='-total')

            # age
            # age in 10 year groups
            def age_recode(f, x):
                age = int(x)
                if age >= 80:
                    return '80+'
                bucket = 10 * (age / 10)
                return '%d-%d' % (bucket, bucket + 9)

            age_dist_data, _ = get_stat_data(
                'age in completed years', geo_level, geo_code, session,
                table_fields=['age in completed years', 'sex'],
                recode=age_recode,
                table_name='age_sex')

            ordered_age_dist_data = OrderedDict(
                sorted(age_dist_data.items(),
                       key=lambda age_range: age_range[0])
            )

            # age category
            def age_cat_recode(f, x):
                age = int(x.replace('+', ''))
                if age < 20:
                    return 'Under 20'
                elif age >= 60:
                    return '60 and over'
                else:
                    return '20 to 59'

            age_cats, _ = get_stat_data(
                'age in completed years', geo_level, geo_code, session,
                table_fields=['age in completed years', 'sex'],
                recode=age_cat_recode,
                table_name='age_sex')

            ordered_age_cats_data = OrderedDict(
                [('Under 20', age_cats['Under 20']),
                 ('20 to 59', age_cats['20 to 59']),
                 ('60 and over', age_cats['60 and over']),
                 ('metadata', age_cats['metadata'])]
            )

            # median age
            db_model_age = get_model_from_fields(
                ['age in completed years', 'sex'],
                geo_level)
            objects = get_objects_by_geo(db_model_age,
                                         geo_code,
                                         geo_level,
                                         session,
                                         ['age in completed years'])
            objects = sorted((o for o in objects if
                              getattr(o, 'age in completed years') !=
                              'unspecified'),
                             key=lambda x:
                             int(getattr(x, 'age in completed years')
                                 .replace('+', '')))
            median_age = calculate_median(objects, 'age in completed years')

            # add non-VDC data
            demographic_data['is_vdc'] = False

            demographic_data['per_capita_income'] = {
                'name': 'Per capita income in US dollars',
                'values': {'this': per_capita_income['income']['values']['this']}
            }

            demographic_data['life_expectancy'] = {
                'name': 'Life expectancy in years',
                'values': {'this': life_expectancy['years']['values']['this']}
            }
            demographic_data['pop_2031_dist'] = pop_2031_dist_data
            demographic_data['pop_projection_2031'] = {
                "name": "Projected in 2031",
                "values": {"this": pop_projection_2031}
            }
            demographic_data['poverty_dist'] = poverty_dist_data
            demographic_data['poverty_population'] = {
                'name': 'Estimated Population',
                'values': {'this': undp_survey_pop}
            }
            demographic_data['percent_impoverished'] = {
                'name': 'Are in poverty',
                'numerators': {'this': total_in_poverty},
                'values': {
                    'this': round(
                        total_in_poverty / undp_survey_pop * 100,
                        2)}
            }
            demographic_data['language_distribution'] = language_data
            demographic_data['language_most_spoken'] = language_most_spoken
            demographic_data['ethnic_distribution'] = caste_data
            demographic_data['most_populous_caste'] = most_populous_caste
            demographic_data['citizenship_by_sex'] = citizenship_by_sex
            demographic_data['citizenship_distribution'] = citizenship_distribution
            demographic_data['age_group_distribution'] = ordered_age_dist_data
            demographic_data['age_category_distribution'] = \
                ordered_age_cats_data
            demographic_data['median_age'] = {
                'name': 'Median age',
                'values': {'this': median_age},
            }

    else:
        demographic_data = {
            'area_has_data': False
        }

    return demographic_data
Пример #13
0
def get_demographics_profile(geo, session):
    # sex
    sex_dist_data, total_pop = get_stat_data(
        'sex', geo, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'])

    religion_dist_data, _ = get_stat_data(
        'religion', geo, session)

    # urban/rural by sex
    urban_dist_data, _ = get_stat_data(
        ['rural or urban', 'sex'], geo, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'])
    total_urbanised = 0
    for data in urban_dist_data['Urban'].itervalues():
        if 'numerators' in data:
            total_urbanised += data['numerators']['this']

    # median age
    db_model_age = get_model_from_fields(
        ['age in completed years', 'sex', 'rural or urban'], geo.geo_level)
    objects = get_objects_by_geo(db_model_age, geo, session, [
                                 'age in completed years'])
    objects = sorted((o for o in objects if getattr(o, 'age in completed years') != 'unspecified'),
                     key=lambda x: int(getattr(x, 'age in completed years').replace('+', '')))
    median = calculate_median(objects, 'age in completed years')

    # age in 10 year groups
    def age_recode(f, x):
        age = int(x.replace('+', ''))
        if age >= 80:
            return '80+'
        bucket = 10 * (age / 10)
        return '%d-%d' % (bucket, bucket + 9)

    age_dist_data, _ = get_stat_data(
        'age in completed years', geo, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'],
        recode=age_recode, exclude=['unspecified'])

    # age category
    def age_cat_recode(f, x):
        age = int(x.replace('+', ''))
        if age < 18:
            return 'Under 18'
        elif age >= 65:
            return '65 and over'
        else:
            return '18 to 64'

    age_cats, _ = get_stat_data(
        'age in completed years', geo, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'],
        recode=age_cat_recode,
        exclude=['unspecified'])

    final_data = {
        'sex_ratio': sex_dist_data,
        'religion_ratio': religion_dist_data,
        'urban_distribution': urban_dist_data,
        'urbanised': {
            'name': 'In urban areas',
            'numerators': {'this': total_urbanised},
            'values': {'this': round(total_urbanised / total_pop * 100, 2)}
        },
        'age_group_distribution': age_dist_data,
        'age_category_distribution': age_cats,
        'median_age': {
            "name": "Median age",
            "values": {"this": median},
        },
        'total_population': {
            "name": "People",
            "values": {"this": total_pop}
        }
    }

    return final_data
Пример #14
0
def get_households_profile(geo_code, geo_level, session):
    # head of household
    # gender
    head_gender_dist, total_households = get_stat_data(
            ['gender of household head'], geo_level, geo_code, session,
            order_by='gender of household head')
    female_heads = head_gender_dist['Female']['numerators']['this']

    # age
    db_model_u18 = get_model_from_fields(
        ['gender of head of household'], geo_level,
        table_name='genderofheadofhouseholdunder18'
    )
    objects = get_objects_by_geo(db_model_u18, geo_code, geo_level, session)
    total_under_18 = float(sum(o[0] for o in objects))

    # tenure
    tenure_data, _ = get_stat_data(
            ['tenure status'], geo_level, geo_code, session,
            order_by='tenure status')
    owned = 0
    for key, data in tenure_data.iteritems():
        if key.startswith('Owned'):
            owned += data['numerators']['this']

    # annual household income
    income_dist_data, _ = get_stat_data(
            ['annual household income'], geo_level, geo_code, session,
            exclude=['Unspecified'],
            recode=HOUSEHOLD_INCOME_RECODE,
            key_order=HOUSEHOLD_INCOME_RECODE.values(),
            table_name='annualhouseholdincome_genderofhouseholdhead')

    # median income
    median = calculate_median_stat(income_dist_data)
    median_income = HOUSEHOLD_INCOME_ESTIMATE[median]

    # type of dwelling
    type_of_dwelling_dist, _ = get_stat_data(
            ['type of dwelling'], geo_level, geo_code, session,
            recode=TYPE_OF_DWELLING_RECODE,
            order_by='-total')
    informal = type_of_dwelling_dist['Shack']['numerators']['this']

    # household goods
    household_goods, _ = get_stat_data(
            ['household goods'], geo_level, geo_code, session,
            total=total_households,
            recode=HOUSEHOLD_GOODS_RECODE,
            exclude=['total households'],
            key_order=sorted(HOUSEHOLD_GOODS_RECODE.values()))

    return {'total_households': {
                'name': 'Households',
                'values': {'this': total_households},
                },
            'owned': {
                'name': 'Households fully owned or being paid off',
                'values': {'this': percent(owned, total_households)},
                'numerators': {'this': owned},
                },
            'type_of_dwelling_distribution': type_of_dwelling_dist,
            'informal': {
                'name': 'Households that are informal dwellings (shacks)',
                'values': {'this': percent(informal, total_households)},
                'numerators': {'this': informal},
                },
            'tenure_distribution': tenure_data,
            'household_goods': household_goods,
            'annual_income_distribution': income_dist_data,
            'median_annual_income': {
                'name': 'Average annual household income',
                'values': {'this': median_income},
                },
            'head_of_household': {
                'gender_distribution': head_gender_dist,
                'female': {
                    'name': 'Households with women as their head',
                    'values': {'this': percent(female_heads, total_households)},
                    'numerators': {'this': female_heads},
                    },
                'under_18': {
                    'name': 'Households with heads under 18 years old',
                    'values': {'this': total_under_18},
                    }
                },
           }
Пример #15
0
def get_demographics_profile(geo_code, geo_level, session):
    # population group
    pop_dist_data, total_pop = get_stat_data(
            ['population group'], geo_level, geo_code, session)

    # language
    language_data, _ = get_stat_data(
            ['language'], geo_level, geo_code, session, order_by='-total')
    language_most_spoken = language_data[language_data.keys()[0]]

    # age groups
    age_dist_data, total_age = get_stat_data(
            ['age groups in 5 years'], geo_level, geo_code, session,
            table_name='agegroupsin5years',
            recode=COLLAPSED_AGE_CATEGORIES,
            key_order=('0-9', '10-19',
                       '20-29', '30-39',
                       '40-49', '50-59',
                       '60-69', '70-79',
                       '80+'))

    # sex
    db_model_sex = get_model_from_fields(['gender'], geo_level, table_name='gender')
    query = session.query(func.sum(db_model_sex.total)) \
                   .filter(db_model_sex.gender == 'Male')
    query = query.filter(db_model_sex.geo_code == geo_code)
    total_male = query.one()[0]

    sex_data = OrderedDict((  # census data refers to sex as gender
            ('Female', {
                "name": "Female",
                "values": {"this": round((total_pop - total_male) / total_pop * 100, 2)},
                "numerators": {"this": total_pop - total_male},
            }),
            ('Male', {
                "name": "Male",
                "values": {"this": round(total_male / total_pop * 100, 2)},
                "numerators": {"this": total_male},
            }),
        ))

    add_metadata(sex_data, db_model_sex)

    final_data = {
        'language_distribution': language_data,
        'language_most_spoken': language_most_spoken,
        'population_group_distribution': pop_dist_data,
        'age_group_distribution': age_dist_data,
        'sex_ratio': sex_data,
        'total_population': {
            "name": "People",
            "values": {"this": total_pop},
        }
    }

    geo = geo_data.get_geography(geo_code, geo_level)
    if geo.square_kms:
        final_data['population_density'] = {
            'name': "people per square kilometre",
            'values': {"this": total_pop / geo.square_kms},
        }

    # median age/age category
    db_model_age = get_model_from_fields(
        ['age in completed years'], geo_level,
        table_name='ageincompletedyears'
    )
    objects = sorted(
        get_objects_by_geo(db_model_age, geo_code, geo_level, session),
        key=lambda x: int(getattr(x, 'age in completed years'))
    )
    # median age
    median = calculate_median(objects, 'age in completed years')
    final_data['median_age'] = {
        "name": "Median age",
        "values": {"this": median},
    }

    # age category
    age_dist, _ = get_stat_data(
        ['age in completed years'], geo_level, geo_code, session,
        table_name='ageincompletedyearssimplified',
        key_order=['Under 18', '18 to 64', '65 and over'],
        recode={'< 18': 'Under 18',
                '>= 65': '65 and over'})
    final_data['age_category_distribution'] = age_dist

    # citizenship
    citizenship_dist, _ = get_stat_data(
            ['citizenship'], geo_level, geo_code, session,
            order_by='-total')

    sa_citizen = citizenship_dist['Yes']['numerators']['this']

    final_data['citizenship_distribution'] = citizenship_dist
    final_data['citizenship_south_african'] = {
            'name': 'South African citizens',
            'values': {'this': percent(sa_citizen, total_pop)},
            'numerators': {'this': sa_citizen},
            }

    # migration
    province_of_birth_dist, _ = get_stat_data(
            ['province of birth'], geo_level, geo_code, session,
            exclude_zero=True, order_by='-total')

    final_data['province_of_birth_distribution'] = province_of_birth_dist

    def region_recode(field, key):
        if key == 'Born in South Africa':
            return 'South Africa'
        else:
            return key

    region_of_birth_dist, _ = get_stat_data(
            ['region of birth'], geo_level, geo_code, session,
            exclude_zero=True, order_by='-total',
            recode=region_recode)

    if 'South Africa' in region_of_birth_dist:
        born_in_sa = region_of_birth_dist['South Africa']['numerators']['this']
    else:
        born_in_sa = 0

    final_data['region_of_birth_distribution'] = region_of_birth_dist
    final_data['born_in_south_africa'] = {
            'name': 'Born in South Africa',
            'values': {'this': percent(born_in_sa, total_pop)},
            'numerators': {'this': born_in_sa},
            }

    return final_data
Пример #16
0
def get_schools_profile(geo, session, year):
    # ownership status
    schools_dist, total_schools = get_stat_data(['ownership'], geo=geo, session=session, only={'year_of_result': [year]})
    #school_dist_data, _ = get_stat_data('age in completed years',geo=geo, session=session, only={'year_of_result': [year]})
    if geo.geo_level == "country":
        reg = 'region'
    elif geo.geo_level == "region":
        reg = 'district'
    elif geo.geo_level == "district":
        reg = 'ward'
    region_dist, total_schools = get_stat_data([reg], geo=geo, session=session, only={'year_of_result': [year]})

    category_dist, _ = get_stat_data(['more_than_40'], geo=geo, session=session, only={'year_of_result': [year]})

    gender_dist, _ = get_stat_data(['gender'], geo=geo, session=session, only={'year_of_result': [year]})
    # Choosing sorting option
    #Sorting will only be done using national_rank all, as regional and district ranks are unknown for some result esp historical
    rank_column = Base.metadata.tables['secondary_school'].c.national_rank_all
    # Getting top for schools with more than 40 students
    top_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("yes%"))\
                    .order_by(asc(cast(rank_column, Integer)))\
                    .all()
    # Getting top for schools with less than 40 students
    top_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("no%"))\
                    .order_by(asc(cast(rank_column, Integer)))\
                    .all()

    # Getting lowest schools with more than 40 students
    lowest_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("yes%"))\
                    .order_by(desc(cast(rank_column, Integer)))\
                    .all()
    # Getting lowest for schools with less than 40 students
    lowest_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("no%"))\
                    .order_by(desc(cast(rank_column, Integer)))\
                    .all()
    # median gpa
    db_model_age = get_model_from_fields(['code', 'name', 'avg_gpa'], geo.geo_level)
    objects = get_objects_by_geo(db_model_age, geo, session, ['avg_gpa'])
    median = calculate_median(objects, 'avg_gpa')

    # gpa in 1 point groups
    def gpa_recode(f, x):
        gpa = x
        if gpa >= 4:
            return '4+'
        bucket = 1 * (gpa / 1)
        return '%d-%d' % (bucket, bucket + 2)

    gpa_dist_data, total_schools = get_stat_data(
        'avg_gpa', geo, session,
        table_fields=['code', 'name', 'avg_gpa'],
        recode=gpa_recode, exclude=['unspecified'], only={'year_of_result': [year]})

    total_private = 0.0
    for data in schools_dist['Non-Government'].itervalues():
        if 'numerators' in data:
            total_private += data['numerators']['this']

    return {
        'schools_distribution': schools_dist,
        'region_distribution': region_dist,
        'category_distribution': category_dist,
        'best_schools_more_40': top_schools_40_more,
        'worst_schools_more_40': lowest_schools_40_more,
        'best_schools_less_40': top_schools_40_less,
        'worst_schools_less_40': lowest_schools_40_less,
        'gpa_group_distribution': gpa_dist_data,
        'gender_distribution': gender_dist,
        'total_schools': {
            "name": "Schools",
            "values": {"this": total_schools}
        },
        'median_gpa': {
            "name": "Median GPA",
            "values": {"this": median},
        },
    }