示例#1
0
def test_employment_age_distribution(do_show, do_save, create_sample_pop_e2e,
                                     get_fig_dir_by_module):
    sp.logger.info(
        "Test employment age distribution vs the employment_rates_by_age.dat")

    plotting_kwargs = sc.objdict(do_show=do_show,
                                 do_save=do_save,
                                 figdir=get_fig_dir_by_module)
    actual_employment_age_count = create_sample_pop_e2e.count_employment_by_age(
    )
    total_employee = sum(actual_employment_age_count.values())
    expected_employment_age_dist = sp.norm_dic(
        sp.get_employment_rates(**create_sample_pop_e2e.loc_pars))

    expected_employment_age_count = {
        i: round(expected_employment_age_dist[i] * total_employee)
        for i in expected_employment_age_dist
    }

    # generate list of ages based on the actual count
    generated_actual = sum([[i] * actual_employment_age_count[i]
                            for i in actual_employment_age_count], [])
    generated_expected = sum([[i] * expected_employment_age_count[i]
                              for i in expected_employment_age_count], [])
    # run statistical tests for employment by age distribution
    # TODO: Need to refine the data for fair comparison
    sp.statistic_test(expected=generated_expected,
                      actual=generated_actual,
                      test=st.kstest,
                      verbose=True)
    # plot enrollment by age
    create_sample_pop_e2e.plot_employment_rates_by_age(**plotting_kwargs)
示例#2
0
def test_get_uids_potential_workers(location='seattle_metro',
                                    state_location='Washington',
                                    country_location='usa'):
    n = 10000
    homes = sp.get_head_age_by_size_distr(datadir,
                                          state_location,
                                          country_location,
                                          file_path=None,
                                          household_size_1_included=False,
                                          use_default=True)
    homes_by_uids, age_by_uid_dic = sp.assign_uids_by_homes(homes, id_len=16)
    uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school(
        datadir,
        n,
        location,
        state_location,
        country_location,
        age_by_uid_dic,
        homes_by_uids,
        use_default=False)
    employment_rates = sp.get_employment_rates(
        datadir,
        location=location,
        state_location=state_location,
        country_location=country_location,
        use_default=True)
    potential_worker_uids, potential_worker_uids_by_age, potential_worker_ages_left_count = sp.get_uids_potential_workers(
        uids_in_school, employment_rates, age_by_uid_dic)
    assert potential_worker_ages_left_count is not None

    return potential_worker_uids, potential_worker_uids_by_age, employment_rates, age_by_uid_dic
示例#3
0
def test_assign_teachers_to_work(location='seattle_metro',
                                 state_location='Washington',
                                 country_location='usa',
                                 folder_name='contact_networks',
                                 n=10000):
    # Assign students to school
    gen_schools, gen_school_uids = test_send_students_to_school()

    employment_rates = sp.get_employment_rates(
        datadir,
        location=location,
        state_location=state_location,
        country_location=country_location,
        use_default=True)

    age_by_uid_dic = sp.read_in_age_by_uid(datadir, location, state_location,
                                           country_location, folder_name, n)

    uids_in_school = sp.get_uids_in_school(datadir,
                                           n,
                                           location,
                                           state_location,
                                           country_location,
                                           folder_name=folder_name,
                                           use_default=True)

    potential_worker_uids, potential_worker_uids_by_age, \
    potential_worker_ages_left_count = sp.get_uids_potential_workers(uids_in_school, employment_rates, age_by_uid_dic)

    workers_by_age_to_assign_count = sp.get_workers_by_age_to_assign(
        employment_rates, potential_worker_ages_left_count, age_by_uid_dic)

    # Assign teachers and update school lists
    syn_schools, syn_school_uids, potential_worker_uids, potential_worker_uids_by_age, \
    workers_by_age_to_assign_count = sp.assign_teachers_to_work(gen_schools, gen_school_uids, employment_rates,
                                                                workers_by_age_to_assign_count,
                                                                potential_worker_uids, potential_worker_uids_by_age,
                                                                potential_worker_ages_left_count,
                                                                student_teacher_ratio=30, teacher_age_min=25,
                                                                teacher_age_max=75, verbose=False)

    for n in range(len(syn_schools)):
        print(syn_schools[n])
        assert syn_schools[n] is not None
        assert syn_school_uids[n] is not None

    assert syn_schools == gen_schools
    assert syn_school_uids == gen_school_uids
    assert potential_worker_uids == potential_worker_uids
    assert potential_worker_uids_by_age == potential_worker_uids_by_age
    assert workers_by_age_to_assign_count == workers_by_age_to_assign_count
def test_get_uids_potential_workers(location='seattle_metro', state_location='Washington',
                                    country_location='usa', folder_name='contact_networks'):
    Nhomes = 10000
    uids_in_school = sp.get_uids_in_school(datadir, Nhomes, location,
                                           state_location,
                                           country_location,
                                           folder_name=folder_name,
                                           use_default=True)
    employment_rates = sp.get_employment_rates(datadir, location=location, state_location=state_location,
                                               country_location=country_location, use_default=True)
    age_by_uid_dic = sp.read_in_age_by_uid(datadir, location, state_location, country_location, folder_name, Nhomes)
    potential_worker_uids, potential_worker_uids_by_age, potential_worker_ages_left_count = sp.get_uids_potential_workers(
        uids_in_school, employment_rates, age_by_uid_dic)
    assert potential_worker_ages_left_count is not None

    return potential_worker_uids, potential_worker_uids_by_age, employment_rates, age_by_uid_dic
def test_generate_workplace_sizes(location='seattle_metro', state_location='Washington',
                                  country_location='usa', folder_name='contact_networks'):
    Npeople = 10000
    uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school(datadir, Npeople, location,
                                                                                        state_location,
                                                                                        country_location,
                                                                                        folder_name=folder_name,
                                                                                        use_default=True)

    school_size_distr_by_bracket = sp.get_school_size_distr_by_brackets(datadir, location, state_location,
                                                                        country_location)
    school_size_brackets = sp.get_school_size_brackets(datadir, location, state_location, country_location)
    school_sizes = sp.generate_school_sizes(school_size_distr_by_bracket, school_size_brackets, uids_in_school)

    age_brackets_filepath = sp.get_census_age_brackets_path(datadir, state_location, country_location)
    age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    contact_matrix_dic = sp.get_contact_matrix_dic(datadir, sheet_name='United States of America')

    # Need to instead get syn_schools now
    syn_schools, syn_school_uids = sp.send_students_to_school(school_sizes, uids_in_school, uids_in_school_by_age,
                                                              ages_in_school_count, age_brackets, age_by_brackets_dic,
                                                              contact_matrix_dic)

    employment_rates = sp.get_employment_rates(datadir, location=location, state_location=state_location,
                                               country_location=country_location, use_default=True)

    age_by_uid_dic = sp.read_in_age_by_uid(datadir, location, state_location, country_location, folder_name, Npeople)

    potential_worker_uids, potential_worker_uids_by_age, potential_worker_ages_left_count = sp.get_uids_potential_workers(
        syn_school_uids, employment_rates, age_by_uid_dic)

    workers_by_age_to_assign_count = sp.get_workers_by_age_to_assign(employment_rates, potential_worker_ages_left_count,
                                                                     age_by_uid_dic)

    workplace_size_brackets = sp.get_workplace_size_brackets(datadir, location, state_location, country_location,
                                                             use_default=True)

    workplace_size_distr_by_brackets = sp.get_workplace_size_distr_by_brackets(datadir,
                                                                               state_location=state_location,
                                                                               country_location=country_location,
                                                                               use_default=True)
    workplace_sizes = sp.generate_workplace_sizes(workplace_size_distr_by_brackets, workplace_size_brackets,
                                                  workers_by_age_to_assign_count)

    return workers_by_age_to_assign_count, workplace_size_brackets, workplace_size_distr_by_brackets, workplace_sizes
示例#6
0
def test_generate_workplace_sizes(location='seattle_metro',
                                  state_location='Washington',
                                  country_location='usa'):
    Npeople = 10000
    uids_in_school, uids_in_school_by_age, uids_in_school_count = sp.get_uids_in_school(
        datadir,
        Npeople,
        location,
        state_location,
        country_location,
        use_default=True)

    employment_rates = sp.get_employment_rates(
        datadir,
        location=location,
        state_location=state_location,
        country_location=country_location,
        use_default=True)

    age_by_uid_dic = sp.read_in_age_by_uid(datadir, location, state_location,
                                           country_location, Npeople)

    potential_worker_uids, potential_worker_uids_by_age, potential_worker_ages_left_count = sp.get_uids_potential_workers(
        uids_in_school, employment_rates, age_by_uid_dic)

    workers_by_age_to_assign_count = sp.get_workers_by_age_to_assign(
        employment_rates, potential_worker_ages_left_count, age_by_uid_dic)

    workplace_size_brackets = sp.get_workplace_size_brackets(datadir,
                                                             location,
                                                             state_location,
                                                             country_location,
                                                             use_default=True)

    workplace_size_distr_by_brackets = sp.get_workplace_size_distr_by_brackets(
        datadir,
        state_location=state_location,
        country_location=country_location,
        use_default=True)
    workplace_sizes = sp.generate_workplace_sizes(
        workplace_size_distr_by_brackets, workplace_size_brackets,
        workers_by_age_to_assign_count)
    print(workplace_sizes)
                                                uids_in_school)
    # print(gen_school_sizes)

    # print(age_brackets)
    # print(age_by_brackets_dic[34])

    gen_schools, gen_school_uids = sp.send_students_to_school(
        gen_school_sizes, uids_in_school, uids_in_school_by_age,
        ages_in_school_count, age_brackets, age_by_brackets_dic,
        contact_matrix_dic)

    # for s in range(5):
    # print(Counter(gen_schools[s]))
    # print(gen_schools[s])

    emp_rates = sp.get_employment_rates(datadir, location, state_location,
                                        country_location)
    # print(emp_rates)
    potential_worker_uids, potential_worker_uids_by_age, potential_worker_ages_left_count = sp.get_uids_potential_workers(
        uids_in_school, uids_in_school_by_age, age_by_uid_dic)
    workers_by_age_to_assign_count = sp.get_workers_by_age_to_assign(
        emp_rates, potential_worker_ages_left_count, uids_by_age_dic)
    # print(len(potential_worker_uids))
    gen_schools, gen_school_uids, potential_worker_uids, potential_worker_uids_by_age, workers_by_age_to_assign_count = sp.assign_teachers_to_work(
        gen_schools, gen_school_uids, emp_rates,
        workers_by_age_to_assign_count, potential_worker_uids,
        potential_worker_uids_by_age, potential_worker_ages_left_count)
    # print(len(potential_worker_uids))

    # for a in potential_worker_uids_by_age:
    # print(a, len(potential_worker_uids_by_age[a]))
示例#8
0
def test_get_uids_potential_workers(location='seattle_metro',
                                    state_location='Washington',
                                    country_location='usa'):
    n = 10000
    homes = sprw.read_setting_groups(datadir,
                                     location,
                                     state_location,
                                     country_location,
                                     folder_name,
                                     'households',
                                     n,
                                     with_ages=True)

    homes_by_uids, age_by_uid_dic = sp.assign_uids_by_homes(homes)

    uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school(
        datadir,
        n,
        location,
        state_location,
        country_location,
        age_by_uid_dic,
        homes_by_uids,
        use_default=False)

    employment_rates = sp.get_employment_rates(
        datadir,
        location=location,
        state_location=state_location,
        country_location=country_location,
        use_default=True)

    school_size_distr_by_bracket = sp.get_school_size_distr_by_brackets(
        datadir, location, state_location, country_location)

    school_size_brackets = sp.get_school_size_brackets(datadir, location,
                                                       state_location,
                                                       country_location)
    school_sizes = sp.generate_school_sizes(school_size_distr_by_bracket,
                                            school_size_brackets,
                                            uids_in_school)
    age_brackets_filepath = sp.get_census_age_brackets_path(
        datadir, state_location, country_location)
    age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)
    contact_matrix_dic = sp.get_contact_matrix_dic(
        datadir, sheet_name='United States of America')

    syn_schools, syn_school_uids, syn_school_types = sp.send_students_to_school(
        school_sizes,
        uids_in_school,
        uids_in_school_by_age,
        ages_in_school_count,
        age_brackets,
        age_by_brackets_dic,
        contact_matrix_dic,
        verbose=False)

    potential_worker_uids, potential_worker_uids_by_age, potential_worker_ages_left_count = sp.get_uids_potential_workers(
        syn_school_uids, employment_rates, age_by_uid_dic)
    assert potential_worker_ages_left_count is not None

    return potential_worker_uids, potential_worker_uids_by_age, employment_rates, age_by_uid_dic
示例#9
0
def check_employment_age_distribution(pop,
                                      n,
                                      datadir,
                                      figdir,
                                      location=None,
                                      state_location=None,
                                      country_location=None,
                                      file_path=None,
                                      use_default=False,
                                      test_prefix="",
                                      skip_stat_check=False,
                                      do_close=True):
    """
    Check the population employment by age distribution against the reference data

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        location         : name of the location
        state_location   : name of the state the location is in
        country_location : name of the country the location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        skip_stat_check  : skip the statistics check for distribution
        do_close         : close the image immediately if set to True

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    figdir = os.path.join(figdir, "employment")
    er = sp.get_employment_rates(datadir=datadir,
                                 location=location,
                                 state_location=state_location,
                                 country_location=country_location,
                                 file_path=file_path,
                                 use_default=use_default)
    brackets = sp.get_census_age_brackets(datadir=datadir,
                                          state_location=state_location,
                                          country_location=country_location)
    ageindex = sp.get_age_by_brackets_dic(brackets)
    age_dist = sp.read_age_bracket_distr(datadir=datadir,
                                         location=location,
                                         state_location=state_location,
                                         country_location=country_location,
                                         file_path=file_path,
                                         use_default=use_default)
    # counting the actual population by age with employment including teachers and staffs
    actual_employed_age_dist, actual_unemployed_age_dist = \
        utilities.get_ids_count_by_param(pop,
                                         condition_name=['wpid', 'sc_teacher', 'sc_staff'],
                                         param='age')
    utilities.plot_array([
        actual_employed_age_dist[k] for k in sorted(actual_employed_age_dist)
    ],
                         datadir=figdir,
                         names=[k for k in sorted(actual_employed_age_dist)],
                         expect_label='employed by age count',
                         xlabel_rotation=90,
                         testprefix="employeed count by age " + test_prefix)
    utilities.plot_array([
        actual_unemployed_age_dist[k]
        for k in sorted(actual_unemployed_age_dist)
    ],
                         datadir=figdir,
                         names=[k for k in sorted(actual_unemployed_age_dist)],
                         expect_label='unemployed by age count',
                         xlabel_rotation=90,
                         testprefix="unemployed count by age " + test_prefix)

    sorted_actual_employed_rate = {}
    actual_employed_rate = utilities.calc_rate(actual_employed_age_dist,
                                               actual_unemployed_age_dist)
    for i in er.keys():
        if i in actual_employed_rate:
            sorted_actual_employed_rate[i] = actual_employed_rate[i]
        else:
            sorted_actual_employed_rate[i] = 0
    actual_values = np.array(list(sorted_actual_employed_rate.values()))
    expected_values = np.array(list(er.values()))
    if not skip_stat_check:
        utilities.statistic_test(expected_values,
                                 actual_values,
                                 test="x",
                                 comments="employment rate distribution check")
    # plotting fill 0 to under age 16 for better display
    filled_count = min(er.keys())
    expected_values = np.insert(expected_values, 0, np.zeros(filled_count))
    actual_values = np.insert(actual_values, 0, np.zeros(filled_count))
    names = [i for i in range(0, max(er.keys()) + 1)]
    # somehow double stacks for age 100
    utilities.plot_array(
        expected_values,
        actual_values,
        names=None,
        datadir=figdir,
        testprefix="employment rate distribution " + test_prefix,
        do_close=do_close,
    )

    # check if total employment match
    expected_employed_brackets = {k: 0 for k in brackets}
    actual_employed_brackets = {k: 0 for k in brackets}
    for i in names:
        expected_employed_brackets[ageindex[i]] += expected_values[i]
        if i in actual_employed_age_dist:
            actual_employed_brackets[
                ageindex[i]] += actual_employed_age_dist[i]
    for i in expected_employed_brackets:
        expected_employed_brackets[i] = expected_employed_brackets[i] / len(
            brackets[i]) * age_dist[i] * n

    expected_total = np.array(list(expected_employed_brackets.values()))
    actual_total = np.array(list(actual_employed_brackets.values()))
    utilities.plot_array(expected_total,
                         actual_total,
                         names=brackets.keys(),
                         datadir=figdir,
                         testprefix="employment total " + test_prefix,
                         do_close=do_close)
    expected_etotal = np.round(np.sum(expected_total))
    actual_etotal = np.round(np.sum(actual_total))
    utilities.check_error_percentage(n,
                                     expected_etotal,
                                     actual_etotal,
                                     name="employee")