예제 #1
0
def get_demographics_profile(geo_code, geo_level, session):
    # sex
    sex_dist_data, total_pop = get_stat_data(
        'sex', geo_level, geo_code, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'])

    # urban/rural by sex
    urban_dist_data, _ = get_stat_data(
        ['rural or urban', 'sex'], geo_level, geo_code, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'])
    total_urbanised = 0
    for data in urban_dist_data['Urban'].itervalues():
        if 'numerators' in data:
            total_urbanised += data['numerators']['this']

    # median age
    db_model_age = get_model_from_fields(['age in completed years', 'sex', 'rural or urban'], geo_level)
    objects = get_objects_by_geo(db_model_age, geo_code, geo_level, session, ['age in completed years'])
    objects = sorted((o for o in objects if getattr(o, 'age in completed years') != 'unspecified'),
                     key=lambda x: int(getattr(x, 'age in completed years').replace('+', '')))
    median = calculate_median(objects, 'age in completed years')

    # age in 10 year groups
    def age_recode(f, x):
        age = int(x.replace('+', ''))
        if age >= 80:
            return '80+'
        bucket = 10 * (age / 10)
        return '%d-%d' % (bucket, bucket + 9)

    age_dist_data, _ = get_stat_data(
        'age in completed years', geo_level, geo_code, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'],
        recode=age_recode, exclude=['unspecified'])

    # age category
    def age_cat_recode(f, x):
        age = int(x.replace('+', ''))
        if age < 18:
            return 'Under 18'
        elif age >= 65:
            return '65 and over'
        else:
            return '18 to 64'

    age_cats, _ = get_stat_data(
        'age in completed years', geo_level, geo_code, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'],
        recode=age_cat_recode,
        exclude=['unspecified'])

    final_data = {
        'sex_ratio': sex_dist_data,
        'urban_distribution': urban_dist_data,
        'urbanised': {
            'name': 'In urban areas',
            'numerators': {'this': total_urbanised},
            'values': {'this': round(total_urbanised / total_pop * 100, 2)}
        },
        'age_group_distribution': age_dist_data,
        'age_category_distribution': age_cats,
        'median_age': {
            "name": "Median age",
            "values": {"this": median},
        },
        'total_population': {
            "name": "People",
            "values": {"this": total_pop}
        }}

    return final_data
예제 #2
0
def get_schools_profile(geo, session, year):
    schools_dist = LOCATIONNOTFOUND
    region_dist = LOCATIONNOTFOUND
    category_dist = LOCATIONNOTFOUND
    top_schools_40_more = []
    top_schools_40_less = []
    lowest_schools_40_less = []
    lowest_schools_40_more = []
    gpa_dist_data = []
    gender_dist = []
    total_schools = 0
    median = 0

    reg = 'region'
    if geo.geo_level == "country":
        reg = 'region'
    elif geo.geo_level == "region":
        reg = 'district'
    elif geo.geo_level == "district":
        reg = 'ward'

    with dataset_context(year='2017'):
        try:
            schools_dist, total_schools = get_stat_data(['ownership'], geo=geo, table_name='secondary_school',
                                                        session=session, only={
                    'year_of_result': [year]})
        except Exception as e:
            pass

        try:
            region_dist, total_schools = get_stat_data([reg], geo=geo,
                                                       session=session, only={
                    'year_of_result': [year]})
        except Exception as e:
            pass

        try:
            category_dist, _ = get_stat_data(['more_than_40'], geo=geo,
                                             session=session,
                                             only={'year_of_result': [year]})
        except Exception as e:
            pass

        try:
            gender_dist, _ = get_stat_data(['gender'], geo=geo, session=session,
                                           only={'year_of_result': [year]})
        except Exception:
            pass





        try:
            # ownership status
            # school_dist_data, _ = get_stat_data('age in completed years',geo=geo, session=session, only={'year_of_result': [year]})
            # Choosing sorting option
            # Sorting will only be done using national_rank all, as regional and district ranks are unknown for some result esp historical
            Base.metadata.reflect()
            rank_column = Base.metadata.tables[
                'secondary_school'].c.national_rank_all
            # Getting top for schools with more than 40 students
            top_schools_40_more = session.query(
                Base.metadata.tables['secondary_school']) \
                .filter(Base.metadata.tables[
                            'secondary_school'].c.geo_level == geo.geo_level) \
                .filter(
                Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code) \
                .filter(
                Base.metadata.tables['secondary_school'].c.year_of_result == year) \
                .filter(
                Base.metadata.tables['secondary_school'].c.more_than_40.like(
                    "yes%")) \
                .order_by(asc(cast(rank_column, Integer))) \
                .all()
            # Getting top for schools with less than 40 students
            top_schools_40_less = session.query(
                Base.metadata.tables['secondary_school']) \
                .filter(Base.metadata.tables[
                            'secondary_school'].c.geo_level == geo.geo_level) \
                .filter(
                Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code) \
                .filter(
                Base.metadata.tables['secondary_school'].c.year_of_result == year) \
                .filter(
                Base.metadata.tables['secondary_school'].c.more_than_40.like("no%")) \
                .order_by(asc(cast(rank_column, Integer))) \
                .all()

            # Getting lowest schools with more than 40 students
            lowest_schools_40_more = session.query(
                Base.metadata.tables['secondary_school']) \
                .filter(Base.metadata.tables[
                            'secondary_school'].c.geo_level == geo.geo_level) \
                .filter(
                Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code) \
                .filter(
                Base.metadata.tables['secondary_school'].c.year_of_result == year) \
                .filter(
                Base.metadata.tables['secondary_school'].c.more_than_40.like(
                    "yes%")) \
                .order_by(desc(cast(rank_column, Integer))) \
                .all()
            # Getting lowest for schools with less than 40 students
            lowest_schools_40_less = session.query(
                Base.metadata.tables['secondary_school']) \
                .filter(Base.metadata.tables[
                            'secondary_school'].c.geo_level == geo.geo_level) \
                .filter(
                Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code) \
                .filter(
                Base.metadata.tables['secondary_school'].c.year_of_result == year) \
                .filter(
                Base.metadata.tables['secondary_school'].c.more_than_40.like("no%")) \
                .order_by(desc(cast(rank_column, Integer))) \
                .all()
            # median gpa
            db_model_age = get_datatable('code_name_avg_gpa')
            objects = db_model_age.get_rows_for_geo(geo, session)
            median = calculate_median(objects, 'avg_gpa')

            # gpa in 1 point groups
            def gpa_recode(f, x):
                gpa = x
                if gpa >= 4:
                    return '4+'
                bucket = 1 * (gpa / 1)
                return '%d-%d' % (bucket, bucket + 2)

            gpa_dist_data, total_schools = get_stat_data(
                'avg_gpa', geo, session,
                table_fields=['code', 'name', 'avg_gpa'],
                recode=gpa_recode, exclude=['unspecified'],
                only={'year_of_result': [year]})

            total_private = 0.0
            for data in schools_dist.get('Non-Government', {}).values():
                if 'numerators' in data:
                    total_private += data['numerators']['this']
        except Exception as e:
            pass

    return {
        'schools_distribution': schools_dist,
        'region_distribution': region_dist,
        'category_distribution': category_dist,
        'best_schools_more_40': top_schools_40_more,
        'worst_schools_more_40': lowest_schools_40_more,
        'best_schools_less_40': top_schools_40_less,
        'worst_schools_less_40': lowest_schools_40_less,
        'gpa_group_distribution': gpa_dist_data,
        'gender_distribution': gender_dist,
        'total_schools': {
            "name": "Schools",
            "values": {"this": total_schools}
        },
        'median_gpa': {
            "name": "Median GPA",
            "values": {"this": median},
        },
    }
예제 #3
0
def get_demographics_profile(geo, session):
    # population group
    pop_dist_data, total_pop = get_stat_data(
            ['population group'], geo, session, table_dataset='Census 2011')

    # language
    language_data, _ = get_stat_data(
            ['language'], geo, session, order_by='-total')
    language_most_spoken = language_data[language_data.keys()[0]]

    # age groups
    age_dist_data, total_age = get_stat_data(
            ['age groups in 5 years'], geo, session,
            table_name='agegroupsin5years',
            recode=COLLAPSED_AGE_CATEGORIES,
            key_order=('0-9', '10-19',
                       '20-29', '30-39',
                       '40-49', '50-59',
                       '60-69', '70-79',
                       '80+'))

    # sex
    sex_data, _ = get_stat_data(
            ['gender'], geo, session, table_name='gender')

    final_data = {
        'language_distribution': language_data,
        'language_most_spoken': language_most_spoken,
        'population_group_distribution': pop_dist_data,
        'age_group_distribution': age_dist_data,
        'sex_ratio': sex_data,
        'total_population': {
            "name": "People",
            "values": {"this": total_pop},
        }
    }

    if geo.square_kms:
        final_data['population_density'] = {
            'name': "people per square kilometre",
            'values': {"this": total_pop / geo.square_kms},
        }

    # median age/age category
    db_model_age = get_model_from_fields(
        ['age in completed years'], geo.geo_level,
        table_name='ageincompletedyears'
    )
    objects = sorted(
        get_objects_by_geo(db_model_age, geo, session),
        key=lambda x: int(getattr(x, 'age in completed years'))
    )
    # median age
    median = calculate_median(objects, 'age in completed years')
    final_data['median_age'] = {
        "name": "Median age",
        "values": {"this": median},
    }

    # age category
    age_dist, _ = get_stat_data(
        ['age in completed years'], geo, session,
        table_name='ageincompletedyearssimplified',
        key_order=['Under 18', '18 to 64', '65 and over'],
        recode={'< 18': 'Under 18',
                '>= 65': '65 and over'})
    final_data['age_category_distribution'] = age_dist

    # citizenship
    citizenship_dist, _ = get_stat_data(
            ['citizenship'], geo, session,
            order_by='-total')

    sa_citizen = citizenship_dist['Yes']['numerators']['this']

    final_data['citizenship_distribution'] = citizenship_dist
    final_data['citizenship_south_african'] = {
            'name': 'South African citizens',
            'values': {'this': percent(sa_citizen, total_pop)},
            'numerators': {'this': sa_citizen},
            }

    # migration
    province_of_birth_dist, _ = get_stat_data(
            ['province of birth'], geo, session,
            exclude_zero=True, order_by='-total')

    final_data['province_of_birth_distribution'] = province_of_birth_dist

    def region_recode(field, key):
        if key == 'Born in South Africa':
            return 'South Africa'
        else:
            return {
                'Not applicable': 'Other',
                }.get(key, key)

    region_of_birth_dist, _ = get_stat_data(
            ['region of birth'], geo, session,
            exclude_zero=True, order_by='-total',
            recode=region_recode)

    if 'South Africa' in region_of_birth_dist:
        born_in_sa = region_of_birth_dist['South Africa']['numerators']['this']
    else:
        born_in_sa = 0

    final_data['region_of_birth_distribution'] = region_of_birth_dist
    final_data['born_in_south_africa'] = {
            'name': 'Born in South Africa',
            'values': {'this': percent(born_in_sa, total_pop)},
            'numerators': {'this': born_in_sa},
            }

    return final_data
예제 #4
0
def get_schools_profile(geo, session):
    print geo.geo_level
    # ownership status
    schools_dist, total_schools = get_stat_data(['ownership'], geo, session)

    # region status
    region_dist, total_schools = get_stat_data(['region'], geo, session)

    # Choosing sorting option
    #Sorting will only be done using national_rank all, as regional and district ranks are unknown for some result esp historical
    rank_column = Base.metadata.tables['secondary_school'].c.national_rank_all

    # Getting top for schools with more than 40 students
    top_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "yes")\
                    .order_by(asc(cast(rank_column, Integer)))\
                    .all()
    # Getting top for schools with less than 40 students
    top_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "no")\
                    .order_by(asc(cast(rank_column, Integer)))\
                    .all()

    # Getting lowest schools with more than 40 students
    lowest_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "yes")\
                    .order_by(desc(cast(rank_column, Integer)))\
                    .all()
    # Getting lowest for schools with less than 40 students
    lowest_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40 == "no")\
                    .order_by(desc(cast(rank_column, Integer)))\
                    .all()

    # median gpa
    db_model_age = get_model_from_fields(['code', 'name', 'avg_gpa'], geo.geo_level)
    objects = get_objects_by_geo(db_model_age, geo, session, ['avg_gpa'])
    median = calculate_median(objects, 'avg_gpa')

    # gpa in 1 point groups
    def gpa_recode(f, x):
        gpa = x
        if gpa >= 4:
            return '4+'
        bucket = 1 * (gpa / 1)
        return '%d-%d' % (bucket, bucket + 2)

    gpa_dist_data, _ = get_stat_data(
        'avg_gpa', geo, session,
        table_fields=['code', 'name', 'avg_gpa'],
        recode=gpa_recode, exclude=['unspecified'])

    total_private = 0.0
    for data in schools_dist['PRIVATE'].itervalues():
        if 'numerators' in data:
            total_private += data['numerators']['this']

    return {
        'schools_distribution': schools_dist,
        'region_distribution': region_dist,
        'top_schools': top_schools,
        'lowest_schools': lowest_schools,
        'gpa_group_distribution': gpa_dist_data,
        'median_gpa': {
            "name": "Median GPA",
            "values": {"this": median},
        },
    }
예제 #5
0
def get_demographics_profile(geo_code, geo_level, session):
    # population by sex
    sex_dist_data, total_pop = get_stat_data(
        'sex', geo_level, geo_code, session,
        table_fields=['disability', 'sex'])

    if total_pop > 0:
        # population by disability
        disability_dist_data, total_disabled = get_stat_data(
            'disability', geo_level, geo_code, session,
            table_fields=['disability', 'sex'],
            recode=dict(DISABILITY_RECODES),
            key_order=DISABILITY_RECODES.values(),
            exclude=['NO_DISABILITY'])

        demographic_data = {
            'has_data': True,
            'sex_ratio': sex_dist_data,
            'disability_ratio': disability_dist_data,
            'total_population': {
                "name": "People",
                "values": {"this": total_pop}
            },
            'total_disabled': {
                'name': 'People',
                'values':
                    {'this': total_disabled},
            },
            'percent_disabled': {
                'name': 'Are disabled',
                'values':
                    {'this': round(total_disabled / float(total_pop) * 100, 2)},
            },
            'is_vdc': True
        }

        if geo_level != 'vdc':

            income_table = get_datatable('per_capita_income')
            per_capita_income, _ = income_table.get_stat_data(
                geo_level, geo_code, percent=False)

            lifeexpectancy_table = get_datatable('lifeexpectancy')
            life_expectancy, _ = lifeexpectancy_table.get_stat_data(
                geo_level, geo_code, percent=False)

            # population projection for 2031
            pop_2031_dist_data, pop_projection_2031 = get_stat_data(
                'sex', geo_level, geo_code, session,
                table_fields=['sex'],
                table_name='population_projection_2031')

            # poverty (UNDP and Open Nepal)
            poverty_dist_data, undp_survey_pop = get_stat_data(
                'poverty', geo_level, geo_code, session,
                recode=dict(POVERTY_RECODES),
                key_order=POVERTY_RECODES.values())

            total_in_poverty = \
                poverty_dist_data['In Poverty']['numerators']['this']

            # language
            language_data, _ = get_stat_data(
                ['language'], geo_level, geo_code, session, order_by='-total')
            language_most_spoken = language_data[language_data.keys()[0]]

            # caste or ethnic group
            caste_data, _ = get_stat_data(['caste or ethnic group'], geo_level,
                                          geo_code, session, order_by='-total')
            most_populous_caste = caste_data[caste_data.keys()[0]]

            citizenship_data, _ = get_stat_data(
                            ['citizenship', 'sex'], geo_level,
                            geo_code, session, order_by='-total'
            )
            citizenship_by_sex = {
                'Nepal': citizenship_data['Nepal'],
                'India': citizenship_data['India'],
                'China': citizenship_data['China'],
                'Others': citizenship_data['Others'],
                'metadata': citizenship_data['metadata']
            }
            citizenship_distribution, _ = get_stat_data(
                'citizenship', geo_level, geo_code, session,
                order_by='-total')

            # age
            # age in 10 year groups
            def age_recode(f, x):
                age = int(x)
                if age >= 80:
                    return '80+'
                bucket = 10 * (age / 10)
                return '%d-%d' % (bucket, bucket + 9)

            age_dist_data, _ = get_stat_data(
                'age in completed years', geo_level, geo_code, session,
                table_fields=['age in completed years', 'sex'],
                recode=age_recode,
                table_name='age_sex')

            ordered_age_dist_data = OrderedDict(
                sorted(age_dist_data.items(),
                       key=lambda age_range: age_range[0])
            )

            # age category
            def age_cat_recode(f, x):
                age = int(x.replace('+', ''))
                if age < 20:
                    return 'Under 20'
                elif age >= 60:
                    return '60 and over'
                else:
                    return '20 to 59'

            age_cats, _ = get_stat_data(
                'age in completed years', geo_level, geo_code, session,
                table_fields=['age in completed years', 'sex'],
                recode=age_cat_recode,
                table_name='age_sex')

            ordered_age_cats_data = OrderedDict(
                [('Under 20', age_cats['Under 20']),
                 ('20 to 59', age_cats['20 to 59']),
                 ('60 and over', age_cats['60 and over']),
                 ('metadata', age_cats['metadata'])]
            )

            # median age
            db_model_age = get_model_from_fields(
                ['age in completed years', 'sex'],
                geo_level)
            objects = get_objects_by_geo(db_model_age,
                                         geo_code,
                                         geo_level,
                                         session,
                                         ['age in completed years'])
            objects = sorted((o for o in objects if
                              getattr(o, 'age in completed years') !=
                              'unspecified'),
                             key=lambda x:
                             int(getattr(x, 'age in completed years')
                                 .replace('+', '')))
            median_age = calculate_median(objects, 'age in completed years')

            # add non-VDC data
            demographic_data['is_vdc'] = False

            demographic_data['per_capita_income'] = {
                'name': 'Per capita income in US dollars',
                'values': {'this': per_capita_income['income']['values']['this']}
            }

            demographic_data['life_expectancy'] = {
                'name': 'Life expectancy in years',
                'values': {'this': life_expectancy['years']['values']['this']}
            }
            demographic_data['pop_2031_dist'] = pop_2031_dist_data
            demographic_data['pop_projection_2031'] = {
                "name": "Projected in 2031",
                "values": {"this": pop_projection_2031}
            }
            demographic_data['poverty_dist'] = poverty_dist_data
            demographic_data['poverty_population'] = {
                'name': 'Estimated Population',
                'values': {'this': undp_survey_pop}
            }
            demographic_data['percent_impoverished'] = {
                'name': 'Are in poverty',
                'numerators': {'this': total_in_poverty},
                'values': {
                    'this': round(
                        total_in_poverty / undp_survey_pop * 100,
                        2)}
            }
            demographic_data['language_distribution'] = language_data
            demographic_data['language_most_spoken'] = language_most_spoken
            demographic_data['ethnic_distribution'] = caste_data
            demographic_data['most_populous_caste'] = most_populous_caste
            demographic_data['citizenship_by_sex'] = citizenship_by_sex
            demographic_data['citizenship_distribution'] = citizenship_distribution
            demographic_data['age_group_distribution'] = ordered_age_dist_data
            demographic_data['age_category_distribution'] = \
                ordered_age_cats_data
            demographic_data['median_age'] = {
                'name': 'Median age',
                'values': {'this': median_age},
            }

    else:
        demographic_data = {
            'area_has_data': False
        }

    return demographic_data
예제 #6
0
def get_demographics_profile(geo, session):
    # sex
    sex_dist_data, total_pop = get_stat_data(
        'sex', geo, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'])

    religion_dist_data, _ = get_stat_data(
        'religion', geo, session)

    # urban/rural by sex
    urban_dist_data, _ = get_stat_data(
        ['rural or urban', 'sex'], geo, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'])
    total_urbanised = 0
    for data in urban_dist_data['Urban'].itervalues():
        if 'numerators' in data:
            total_urbanised += data['numerators']['this']

    # median age
    db_model_age = get_model_from_fields(
        ['age in completed years', 'sex', 'rural or urban'], geo.geo_level)
    objects = get_objects_by_geo(db_model_age, geo, session, [
                                 'age in completed years'])
    objects = sorted((o for o in objects if getattr(o, 'age in completed years') != 'unspecified'),
                     key=lambda x: int(getattr(x, 'age in completed years').replace('+', '')))
    median = calculate_median(objects, 'age in completed years')

    # age in 10 year groups
    def age_recode(f, x):
        age = int(x.replace('+', ''))
        if age >= 80:
            return '80+'
        bucket = 10 * (age / 10)
        return '%d-%d' % (bucket, bucket + 9)

    age_dist_data, _ = get_stat_data(
        'age in completed years', geo, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'],
        recode=age_recode, exclude=['unspecified'])

    # age category
    def age_cat_recode(f, x):
        age = int(x.replace('+', ''))
        if age < 18:
            return 'Under 18'
        elif age >= 65:
            return '65 and over'
        else:
            return '18 to 64'

    age_cats, _ = get_stat_data(
        'age in completed years', geo, session,
        table_fields=['age in completed years', 'sex', 'rural or urban'],
        recode=age_cat_recode,
        exclude=['unspecified'])

    final_data = {
        'sex_ratio': sex_dist_data,
        'religion_ratio': religion_dist_data,
        'urban_distribution': urban_dist_data,
        'urbanised': {
            'name': 'In urban areas',
            'numerators': {'this': total_urbanised},
            'values': {'this': round(total_urbanised / total_pop * 100, 2)}
        },
        'age_group_distribution': age_dist_data,
        'age_category_distribution': age_cats,
        'median_age': {
            "name": "Median age",
            "values": {"this": median},
        },
        'total_population': {
            "name": "People",
            "values": {"this": total_pop}
        }
    }

    return final_data
예제 #7
0
def get_demographics_profile(geo_code, geo_level, session):
    # population group
    pop_dist_data, total_pop = get_stat_data(
            ['population group'], geo_level, geo_code, session)

    # language
    language_data, _ = get_stat_data(
            ['language'], geo_level, geo_code, session, order_by='-total')
    language_most_spoken = language_data[language_data.keys()[0]]

    # age groups
    age_dist_data, total_age = get_stat_data(
            ['age groups in 5 years'], geo_level, geo_code, session,
            table_name='agegroupsin5years',
            recode=COLLAPSED_AGE_CATEGORIES,
            key_order=('0-9', '10-19',
                       '20-29', '30-39',
                       '40-49', '50-59',
                       '60-69', '70-79',
                       '80+'))

    # sex
    db_model_sex = get_model_from_fields(['gender'], geo_level, table_name='gender')
    query = session.query(func.sum(db_model_sex.total)) \
                   .filter(db_model_sex.gender == 'Male')
    query = query.filter(db_model_sex.geo_code == geo_code)
    total_male = query.one()[0]

    sex_data = OrderedDict((  # census data refers to sex as gender
            ('Female', {
                "name": "Female",
                "values": {"this": round((total_pop - total_male) / total_pop * 100, 2)},
                "numerators": {"this": total_pop - total_male},
            }),
            ('Male', {
                "name": "Male",
                "values": {"this": round(total_male / total_pop * 100, 2)},
                "numerators": {"this": total_male},
            }),
        ))

    add_metadata(sex_data, db_model_sex)

    final_data = {
        'language_distribution': language_data,
        'language_most_spoken': language_most_spoken,
        'population_group_distribution': pop_dist_data,
        'age_group_distribution': age_dist_data,
        'sex_ratio': sex_data,
        'total_population': {
            "name": "People",
            "values": {"this": total_pop},
        }
    }

    geo = geo_data.get_geography(geo_code, geo_level)
    if geo.square_kms:
        final_data['population_density'] = {
            'name': "people per square kilometre",
            'values': {"this": total_pop / geo.square_kms},
        }

    # median age/age category
    db_model_age = get_model_from_fields(
        ['age in completed years'], geo_level,
        table_name='ageincompletedyears'
    )
    objects = sorted(
        get_objects_by_geo(db_model_age, geo_code, geo_level, session),
        key=lambda x: int(getattr(x, 'age in completed years'))
    )
    # median age
    median = calculate_median(objects, 'age in completed years')
    final_data['median_age'] = {
        "name": "Median age",
        "values": {"this": median},
    }

    # age category
    age_dist, _ = get_stat_data(
        ['age in completed years'], geo_level, geo_code, session,
        table_name='ageincompletedyearssimplified',
        key_order=['Under 18', '18 to 64', '65 and over'],
        recode={'< 18': 'Under 18',
                '>= 65': '65 and over'})
    final_data['age_category_distribution'] = age_dist

    # citizenship
    citizenship_dist, _ = get_stat_data(
            ['citizenship'], geo_level, geo_code, session,
            order_by='-total')

    sa_citizen = citizenship_dist['Yes']['numerators']['this']

    final_data['citizenship_distribution'] = citizenship_dist
    final_data['citizenship_south_african'] = {
            'name': 'South African citizens',
            'values': {'this': percent(sa_citizen, total_pop)},
            'numerators': {'this': sa_citizen},
            }

    # migration
    province_of_birth_dist, _ = get_stat_data(
            ['province of birth'], geo_level, geo_code, session,
            exclude_zero=True, order_by='-total')

    final_data['province_of_birth_distribution'] = province_of_birth_dist

    def region_recode(field, key):
        if key == 'Born in South Africa':
            return 'South Africa'
        else:
            return key

    region_of_birth_dist, _ = get_stat_data(
            ['region of birth'], geo_level, geo_code, session,
            exclude_zero=True, order_by='-total',
            recode=region_recode)

    if 'South Africa' in region_of_birth_dist:
        born_in_sa = region_of_birth_dist['South Africa']['numerators']['this']
    else:
        born_in_sa = 0

    final_data['region_of_birth_distribution'] = region_of_birth_dist
    final_data['born_in_south_africa'] = {
            'name': 'Born in South Africa',
            'values': {'this': percent(born_in_sa, total_pop)},
            'numerators': {'this': born_in_sa},
            }

    return final_data
예제 #8
0
def get_demographics_profile(geo, session):
    # population group
    pop_dist_data, total_pop = get_stat_data(
        ["population group"], geo, session, table_dataset="Census and Community Survey"
    )

    # language
    language_data, _ = get_stat_data(
        ["language"],
        geo,
        session,
        table_dataset="Census and Community Survey",
        order_by="-total",
    )
    language_most_spoken = language_data[language_data.keys()[0]]

    # age groups
    age_dist_data, total_age = get_stat_data(
        ["age groups in 5 years"],
        geo,
        session,
        table_dataset="Census and Community Survey",
        recode=COLLAPSED_AGE_CATEGORIES,
        key_order=(
            "0-9",
            "10-19",
            "20-29",
            "30-39",
            "40-49",
            "50-59",
            "60-69",
            "70-79",
            "80+",
        ),
    )

    # sex
    sex_data, _ = get_stat_data(
        ["gender"],
        geo,
        session,
        table_universe="Population",
        table_dataset="Census and Community Survey",
    )

    final_data = {
        "language_distribution": language_data,
        "language_most_spoken": language_most_spoken,
        "population_group_distribution": pop_dist_data,
        "age_group_distribution": age_dist_data,
        "sex_ratio": sex_data,
        "total_population": {"name": "People", "values": {"this": total_pop}},
    }

    if geo.square_kms:
        final_data["population_density"] = {
            "name": "people per square kilometre",
            "values": {"this": total_pop / geo.square_kms},
        }

    # median age/age category
    age_table = get_datatable("ageincompletedyears")
    objects = sorted(
        age_table.get_rows_for_geo(geo, session),
        key=lambda x: int(getattr(x, "age in completed years")),
    )

    # median age
    median = calculate_median(objects, "age in completed years")
    final_data["median_age"] = {"name": "Median age", "values": {"this": median}}

    # age category
    age_dist, _ = get_stat_data(
        ["age in completed years"],
        geo,
        session,
        table_dataset="Census and Community Survey",
        table_name="ageincompletedyearssimplified",
        key_order=["Under 18", "18 to 64", "65 and over"],
        recode={"< 18": "Under 18", ">= 65": "65 and over"},
    )
    final_data["age_category_distribution"] = age_dist

    # citizenship
    citizenship_dist, _ = get_stat_data(
        ["citizenship"],
        geo,
        session,
        table_dataset="Census and Community Survey",
        order_by="-total",
    )

    sa_citizen = citizenship_dist["Yes"]["numerators"]["this"]

    final_data["citizenship_distribution"] = citizenship_dist
    final_data["citizenship_south_african"] = {
        "name": "South African citizens",
        "values": {"this": percent(sa_citizen, total_pop)},
        "numerators": {"this": sa_citizen},
    }

    # migration
    province_of_birth_dist, _ = get_stat_data(
        ["province of birth"],
        geo,
        session,
        table_dataset="Census and Community Survey",
        exclude_zero=True,
        order_by="-total",
    )

    final_data["province_of_birth_distribution"] = province_of_birth_dist

    def region_recode(field, key):
        if key == "Born in South Africa":
            return "South Africa"
        else:
            return {"Not applicable": "Other"}.get(key, key)

    region_of_birth_dist, _ = get_stat_data(
        ["region of birth"],
        geo,
        session,
        table_dataset="Census and Community Survey",
        exclude_zero=True,
        order_by="-total",
        recode=region_recode,
    )

    if "South Africa" in region_of_birth_dist:
        born_in_sa = region_of_birth_dist["South Africa"]["numerators"]["this"]
    else:
        born_in_sa = 0

    final_data["region_of_birth_distribution"] = region_of_birth_dist
    final_data["born_in_south_africa"] = {
        "name": "Born in South Africa",
        "values": {"this": percent(born_in_sa, total_pop)},
        "numerators": {"this": born_in_sa},
    }

    return final_data
예제 #9
0
def get_schools_profile(geo, session, year):
    # ownership status
    schools_dist, total_schools = get_stat_data(['ownership'], geo=geo, session=session, only={'year_of_result': [year]})
    #school_dist_data, _ = get_stat_data('age in completed years',geo=geo, session=session, only={'year_of_result': [year]})
    if geo.geo_level == "country":
        reg = 'region'
    elif geo.geo_level == "region":
        reg = 'district'
    elif geo.geo_level == "district":
        reg = 'ward'
    region_dist, total_schools = get_stat_data([reg], geo=geo, session=session, only={'year_of_result': [year]})

    category_dist, _ = get_stat_data(['more_than_40'], geo=geo, session=session, only={'year_of_result': [year]})

    gender_dist, _ = get_stat_data(['gender'], geo=geo, session=session, only={'year_of_result': [year]})
    # Choosing sorting option
    #Sorting will only be done using national_rank all, as regional and district ranks are unknown for some result esp historical
    rank_column = Base.metadata.tables['secondary_school'].c.national_rank_all
    # Getting top for schools with more than 40 students
    top_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("yes%"))\
                    .order_by(asc(cast(rank_column, Integer)))\
                    .all()
    # Getting top for schools with less than 40 students
    top_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("no%"))\
                    .order_by(asc(cast(rank_column, Integer)))\
                    .all()

    # Getting lowest schools with more than 40 students
    lowest_schools_40_more = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("yes%"))\
                    .order_by(desc(cast(rank_column, Integer)))\
                    .all()
    # Getting lowest for schools with less than 40 students
    lowest_schools_40_less = session.query(Base.metadata.tables['secondary_school'])\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_level == geo.geo_level)\
                    .filter(Base.metadata.tables['secondary_school'].c.geo_code == geo.geo_code)\
                    .filter(Base.metadata.tables['secondary_school'].c.year_of_result == year)\
                    .filter(Base.metadata.tables['secondary_school'].c.more_than_40.like("no%"))\
                    .order_by(desc(cast(rank_column, Integer)))\
                    .all()
    # median gpa
    db_model_age = get_model_from_fields(['code', 'name', 'avg_gpa'], geo.geo_level)
    objects = get_objects_by_geo(db_model_age, geo, session, ['avg_gpa'])
    median = calculate_median(objects, 'avg_gpa')

    # gpa in 1 point groups
    def gpa_recode(f, x):
        gpa = x
        if gpa >= 4:
            return '4+'
        bucket = 1 * (gpa / 1)
        return '%d-%d' % (bucket, bucket + 2)

    gpa_dist_data, total_schools = get_stat_data(
        'avg_gpa', geo, session,
        table_fields=['code', 'name', 'avg_gpa'],
        recode=gpa_recode, exclude=['unspecified'], only={'year_of_result': [year]})

    total_private = 0.0
    for data in schools_dist['Non-Government'].itervalues():
        if 'numerators' in data:
            total_private += data['numerators']['this']

    return {
        'schools_distribution': schools_dist,
        'region_distribution': region_dist,
        'category_distribution': category_dist,
        'best_schools_more_40': top_schools_40_more,
        'worst_schools_more_40': lowest_schools_40_more,
        'best_schools_less_40': top_schools_40_less,
        'worst_schools_less_40': lowest_schools_40_less,
        'gpa_group_distribution': gpa_dist_data,
        'gender_distribution': gender_dist,
        'total_schools': {
            "name": "Schools",
            "values": {"this": total_schools}
        },
        'median_gpa': {
            "name": "Median GPA",
            "values": {"this": median},
        },
    }