Exemplo n.º 1
0
def test_work_size_distribution(do_show, do_save, create_sample_pop_e2e,
                                get_fig_dir_by_module):
    sp.logger.info(
        "Test workplace size distribution vs the work_size_count.dat")

    plotting_kwargs = sc.objdict(do_show=do_show,
                                 do_save=do_save,
                                 figdir=get_fig_dir_by_module)

    workplace_brackets_index = sp.get_index_by_brackets(
        sp.get_workplace_size_brackets(**create_sample_pop_e2e.loc_pars))

    actual_workplace_sizes = create_sample_pop_e2e.count_workplace_sizes()
    # count the workplaces by size bracket

    actual_count = {k: 0 for k in set(workplace_brackets_index.values())}
    for i in workplace_brackets_index:
        actual_count[
            workplace_brackets_index[i]] += actual_workplace_sizes.get(i, 0)

    expected_distr = sp.norm_dic(
        sp.get_workplace_size_distr_by_brackets(
            **create_sample_pop_e2e.loc_pars))

    # calculate expected count by using actual number of workplaces
    expected_count = {
        k: expected_distr[k] * sum(actual_count.values())
        for k in expected_distr
    }
    # perform statistical check
    sp.statistic_test([expected_count[i] for i in sorted(expected_count)],
                      [actual_count[i] for i in sorted(actual_count)])

    create_sample_pop_e2e.plot_workplace_sizes(**plotting_kwargs)
Exemplo n.º 2
0
def test_generate_workplace_sizes(location='seattle_metro', state_location='Washington',
                                  country_location='usa', folder_name='contact_networks'):
    Npeople = 10000
    uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school(datadir, Npeople, location,
                                                                                        state_location,
                                                                                        country_location,
                                                                                        folder_name=folder_name,
                                                                                        use_default=True)

    school_size_distr_by_bracket = sp.get_school_size_distr_by_brackets(datadir, location, state_location,
                                                                        country_location)
    school_size_brackets = sp.get_school_size_brackets(datadir, location, state_location, country_location)
    school_sizes = sp.generate_school_sizes(school_size_distr_by_bracket, school_size_brackets, uids_in_school)

    age_brackets_filepath = sp.get_census_age_brackets_path(datadir, state_location, country_location)
    age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    contact_matrix_dic = sp.get_contact_matrix_dic(datadir, sheet_name='United States of America')

    # Need to instead get syn_schools now
    syn_schools, syn_school_uids = sp.send_students_to_school(school_sizes, uids_in_school, uids_in_school_by_age,
                                                              ages_in_school_count, age_brackets, age_by_brackets_dic,
                                                              contact_matrix_dic)

    employment_rates = sp.get_employment_rates(datadir, location=location, state_location=state_location,
                                               country_location=country_location, use_default=True)

    age_by_uid_dic = sp.read_in_age_by_uid(datadir, location, state_location, country_location, folder_name, Npeople)

    potential_worker_uids, potential_worker_uids_by_age, potential_worker_ages_left_count = sp.get_uids_potential_workers(
        syn_school_uids, employment_rates, age_by_uid_dic)

    workers_by_age_to_assign_count = sp.get_workers_by_age_to_assign(employment_rates, potential_worker_ages_left_count,
                                                                     age_by_uid_dic)

    workplace_size_brackets = sp.get_workplace_size_brackets(datadir, location, state_location, country_location,
                                                             use_default=True)

    workplace_size_distr_by_brackets = sp.get_workplace_size_distr_by_brackets(datadir,
                                                                               state_location=state_location,
                                                                               country_location=country_location,
                                                                               use_default=True)
    workplace_sizes = sp.generate_workplace_sizes(workplace_size_distr_by_brackets, workplace_size_brackets,
                                                  workers_by_age_to_assign_count)

    return workers_by_age_to_assign_count, workplace_size_brackets, workplace_size_distr_by_brackets, workplace_sizes
Exemplo n.º 3
0
def test_generate_workplace_sizes(location='seattle_metro',
                                  state_location='Washington',
                                  country_location='usa'):
    Npeople = 10000
    uids_in_school, uids_in_school_by_age, uids_in_school_count = sp.get_uids_in_school(
        datadir,
        Npeople,
        location,
        state_location,
        country_location,
        use_default=True)

    employment_rates = sp.get_employment_rates(
        datadir,
        location=location,
        state_location=state_location,
        country_location=country_location,
        use_default=True)

    age_by_uid_dic = sp.read_in_age_by_uid(datadir, location, state_location,
                                           country_location, Npeople)

    potential_worker_uids, potential_worker_uids_by_age, potential_worker_ages_left_count = sp.get_uids_potential_workers(
        uids_in_school, employment_rates, age_by_uid_dic)

    workers_by_age_to_assign_count = sp.get_workers_by_age_to_assign(
        employment_rates, potential_worker_ages_left_count, age_by_uid_dic)

    workplace_size_brackets = sp.get_workplace_size_brackets(datadir,
                                                             location,
                                                             state_location,
                                                             country_location,
                                                             use_default=True)

    workplace_size_distr_by_brackets = sp.get_workplace_size_distr_by_brackets(
        datadir,
        state_location=state_location,
        country_location=country_location,
        use_default=True)
    workplace_sizes = sp.generate_workplace_sizes(
        workplace_size_distr_by_brackets, workplace_size_brackets,
        workers_by_age_to_assign_count)
    print(workplace_sizes)
    # print(emp_rates)
    potential_worker_uids, potential_worker_uids_by_age, potential_worker_ages_left_count = sp.get_uids_potential_workers(
        uids_in_school, uids_in_school_by_age, age_by_uid_dic)
    workers_by_age_to_assign_count = sp.get_workers_by_age_to_assign(
        emp_rates, potential_worker_ages_left_count, uids_by_age_dic)
    # print(len(potential_worker_uids))
    gen_schools, gen_school_uids, potential_worker_uids, potential_worker_uids_by_age, workers_by_age_to_assign_count = sp.assign_teachers_to_work(
        gen_schools, gen_school_uids, emp_rates,
        workers_by_age_to_assign_count, potential_worker_uids,
        potential_worker_uids_by_age, potential_worker_ages_left_count)
    # print(len(potential_worker_uids))

    # for a in potential_worker_uids_by_age:
    # print(a, len(potential_worker_uids_by_age[a]))

    workplace_size_brackets = sp.get_workplace_size_brackets(
        datadir, country_location)
    workplace_size_count = sp.get_workplace_sizes(datadir, country_location)

    workplace_sizes = sp.generate_workplace_sizes(
        workplace_size_count, workplace_size_brackets,
        workers_by_age_to_assign_count)
    # print(workplace_sizes)
    # print(workplace_size_count)
    # print(workplace_size_brackets)

    gen_workplaces, gen_workplace_uids, potential_worker_uids, potential_worker_uids_by_age, workers_by_age_to_assign_count = sp.assign_rest_of_workers(
        workplace_sizes, potential_worker_uids, potential_worker_uids_by_age,
        workers_by_age_to_assign_count, age_brackets, age_by_brackets_dic,
        contact_matrix_dic)
    # print(age_by_brackets_dic[75])
Exemplo n.º 5
0
def test_workplace_contact_distribution_2(create_sample_pop_e2e):
    sp.logger.info(
        "Not a test - exploratory --- workplaces that don't match are quite close to expected results"
    )
    pop = create_sample_pop_e2e
    max_contacts = pop.max_contacts
    max_w_size = int(max_contacts['W'] // 2)
    wsize_brackets = sp.get_workplace_size_brackets(**pop.loc_pars)
    wsize_index = sp.get_index_by_brackets(wsize_brackets)
    contacts, contacts_by_id = cn.get_contact_counts_by_layer(
        pop.popdict, layer="w", with_layer_ids=True)

    wpids = sorted(contacts_by_id.keys())

    max_size_full_connected = 0

    runs = 0
    passed = 0
    failedsize = []
    allsize = []
    for nw, wpid in enumerate(wpids):
        wnc = set(contacts_by_id[wpid])
        wsize = len(contacts_by_id[wpid])
        allsize.append(wsize_index[wsize])

        if len(wnc) == 1:

            assert list(wnc)[0] + 1 == wsize, 'Check Failed'
            if max_size_full_connected < wsize:
                max_size_full_connected = wsize

        else:
            print(
                f"workplace id is {wpid}, no.contacts, {wnc}, size {wsize}, mu {max_w_size}"
            )
            N = wsize

            p = (max_contacts['W'] - 1) / N
            # degree distribution for an ER random graph follows a binomial distribution that is truncated
            # to the max size N. When N is large this approximates the poisson distribution. Perhaps our
            # test could look at the zero-N truncated binomial distribution
            # G = nx.erdos_renyi_graph(N, p, seed=0)
            G = nx.fast_gnp_random_graph(N, p, seed=0)
            degree = [G.degree(i) for i in G.nodes()]

            # sp.statistic_test(degree, contacts_by_id[wpid], verbose=True)
            # sp.check_truncated_poisson(contacts_by_id[wpid], mu=max_contacts['W'] - 2, lowerbound=max_contacts['W'] // 2, upperbound=wsize - 1)
            runs += 1
            result = sp.check_truncated_poisson(contacts_by_id[wpid],
                                                mu=max_contacts['W'] - 2,
                                                lowerbound=max_contacts['W'] //
                                                2,
                                                upperbound=wsize - 1,
                                                skipcheck=0,
                                                do_show=0)
            passed += int(result)
            if not result:
                failedsize.append(wsize_index[wsize])
                sp.statistic_test(degree, contacts_by_id[wpid], verbose=True)
            print('workplace id', wpid)
            print('\n\n')
    print(
        f'total workplaces: {runs}, passing checks: {passed}, passed rate:{round(passed/runs,2) *100} %'
    )
    print("size brackets:\tcount")
    failed_counts = {
        i: dict(Counter(failedsize))[i]
        for i in sorted(dict(Counter(failedsize)).keys())
    }
    all_counts = {
        i: dict(Counter(allsize))[i]
        for i in sorted(dict(Counter(allsize)).keys())
    }
    for k, v in failed_counts.items():
        print(
            f"{min(wsize_brackets[k])}-{max(wsize_brackets[k])}:\t{v}, {v/all_counts[k] * 100:.2f}"
        )
    print('max_size_full_connected', max_size_full_connected)
def test_information_in_generation():
    """
    Basic tests that summaries are produced produced when synthpops generates
    populations. Summaries are stored and accessed via sp.Pop().information.
    """
    sp.logger.info("Test summaries are produced when populations are generated.")
    sp.logger.info("Temporary basic tests. To be reorganized and converted to plotting based tests.\n")

    pop = sp.Pop(**pars)

    # check age_count information
    assert isinstance(pop.information.age_count, dict), "Check failed"
    print(f"Check passed. Age count information exists and is a dictionary. The age range is from {min(pop.information.age_count.keys())} to {max(pop.information.age_count.keys())} years old.")

    assert sum(pop.information.age_count.values()) == pop.n, f"Check failed. The sum of pop.age_count ({sum(pop.information.age_count.values())}) does not equal the population size ({pop.n})."
    print(f"Check passed. Age count information of the generated population matches the expected size ({pop.n}).\n")

    # check household size information
    assert sum(pop.information.household_size_count.values()) > 0, "Check failed. No people placed into unique households."
    print("Check passed. Household sizes exist in pop object and is a dictionary by household id (hhid).")

    assert sum(pop.information.household_sizes.values()) == sum([pop.information.household_size_count[k] * k for k in pop.information.household_size_count]), "Household sizes information check failed."
    print("Check passed. Household sizes information check passed.\n")

    # check household size distribution
    household_size_dist = sp.get_generated_household_size_distribution(pop.information.household_sizes)
    expected_household_size_dist = sp.get_household_size_distr(**pop.loc_pars)
    if expected_household_size_dist[1] > 0:
        assert household_size_dist[1] > 0, "Check failed. No one lives alone even though the expected household size distribution says otherwise."
        print("Check passed. At least some people live alone as expected from the household size distribution.")

    # check household head information
    assert min(pop.information.household_head_ages.values()) >= 18, "Check failed. Min head of household age is younger than 18 years old."
    print("Check passed. All heads of households are at least 18 years old.")

    # check household head age count information
    assert sum(pop.information.household_head_age_count.values()) == len(pop.information.household_sizes), "Check on count of household head ages failed."
    print("Check passed. The count of household head ages matches the number of households created.\n")

    # check ltcf information
    assert sum(pop.information.ltcf_sizes.values()) > 0, "Check failed. No people placed in ltcfs."
    print("Check passed. Ltcfs created.")

    # count only LTCF residents
    ltcf_sizes_res = pop.get_ltcf_sizes(keys_to_exclude=['ltcf_staff'])
    assert sum(ltcf_sizes_res.values()) < sum(pop.information.ltcf_sizes.values()), "Check failed. Ltcf residents is greater than or equal to all people in ltcfs."
    print("Check passed. Ltcf residents created separately.")

    # check that those living in households or LTCFs account for the entire expected population
    assert sum(pop.information.household_sizes.values()) + sum(ltcf_sizes_res.values()) == pop.n, f"Check failed. Population size is {pop.n} and the sum of people generated living in households and ltcfs is {sum(pop.information.household_sizes.values()) + sum(ltcf_sizes_res.values())}."
    print("Check passed. Everyone lives either in a household or ltcf.")

    # count only LTCF staff
    ltcf_sizes_staff = pop.get_ltcf_sizes(keys_to_exclude=['ltcf_res'])
    assert sum(ltcf_sizes_res.values()) + sum(ltcf_sizes_staff.values()) == sum(pop.information.ltcf_sizes.values()), "Check failed. The sum of ltcf residets and staff counted separately does not equal the count of them together."
    print("Check passed. Ltcf staff created separately.\n")

    # check enrollment count by age
    assert sum(pop.information.enrollment_by_age.values()) > 0, f"Check failed. Student enrollment is less than or equal to 0 ({sum(pop.enrollment_by_age.values())})."
    print("Check passed. Student enrollment count by age exists and is greater than 0.")

    # check enrollment rates by age
    enrollment_rates_by_age = pop.enrollment_rates_by_age  # a property rather than stored data so make a copy here
    assert 0 < enrollment_rates_by_age[10] <= 1., f"Check failed. Enrollment rate for age 10 is less than or equal to 0 ({enrollment_rates_by_age[10]}."
    print(f"Check passed. Enrollment rate for age 10 is {enrollment_rates_by_age[10] * 100:.2f}%.\n")

    # check employment rates by age
    employment_rates_by_age = pop.employment_rates_by_age  # a property rather than stored data so make a copy here
    assert 0 < employment_rates_by_age[25] <= 1., f"Check failed. Employment rate for age 25 is less than or equal to 0 ({employment_rates_by_age[25]})."
    print(f"Check passed. Employment rate for age 25 is {employment_rates_by_age[25] * 100:.2f}%.")

    # check workplace sizes
    assert sum(pop.information.workplace_sizes.values()) > 0, "Check failed. Sum of workplace sizes is less than or equal to 0."
    print("Workplace sizes exists in pop object and is a dictionary by workplace id (wpid).")

    workplace_size_brackets = sp.get_workplace_size_brackets(**pop.loc_pars)

    # check that bins and bin labels can be made
    workplace_size_bins = sp.get_bin_edges(workplace_size_brackets)
    assert len(workplace_size_bins) >= 2, "Check failed. workplace size bins contains the limits for less than one bin."
    print(f"Check passed. There are {len(workplace_size_bins) - 1} workplace size bins.")

    # check that bin labels are all strings
    workplace_size_bin_labels = sp.get_bin_labels(workplace_size_brackets)
    label_types = list(set([type(bl) for bl in workplace_size_bin_labels]))

    assert len(label_types) == 1, f"Check failed. There is more than one type for the workplace size bin labels generated."
    print("Check passed. There is only one type for workplace size bin labels generated.")

    assert isinstance(workplace_size_bin_labels[0], str), f"Check failed. Bin labels are not strings."
    print("Check passed. Bin labels are strings.")

    workplace_size_dist = sp.get_generated_workplace_size_distribution(pop.information.workplace_sizes, workplace_size_bins)
    expected_workplace_size_dist = sp.norm_dic(sp.get_workplace_size_distr_by_brackets(sp.settings.datadir, state_location=pop.state_location, country_location=pop.country_location))
    if expected_workplace_size_dist[0] > 0:
        assert workplace_size_dist[0] > 0, f"Check failed. Expected some workplaces to be created in the smallest bin size but there are none in this bin."
        print("Check passed for workplaces in the smallest bin.")
Exemplo n.º 7
0
def check_work_size_distribution(pop,
                                 n,
                                 datadir,
                                 figdir,
                                 location=None,
                                 state_location=None,
                                 country_location=None,
                                 file_path=None,
                                 use_default=False,
                                 test_prefix="",
                                 skip_stat_check=False,
                                 do_close=True):
    """
    Check the population workplace size distribution against the reference data

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        location         : name of the location
        state_location   : name of the state the location is in
        country_location : name of the country the location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        skip_stat_check  : skip the statistics check for distribution
        do_close         : close the image immediately if set to True

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    figdir = os.path.join(figdir, "work_size")
    wb = sp.get_workplace_size_brackets(datadir=datadir,
                                        location=location,
                                        state_location=state_location,
                                        country_location=country_location,
                                        file_path=file_path,
                                        use_default=use_default)
    ws = sp.norm_dic(
        sp.get_workplace_size_distr_by_brackets(
            datadir=datadir,
            location=location,
            state_location=state_location,
            country_location=country_location,
            file_path=file_path,
            use_default=use_default))
    ws_index = sp.get_index_by_brackets_dic(wb)
    upper_bound = max(ws_index.keys())
    actual_work_dist, actual_work_dist_none = utilities.get_ids_count_by_param(
        pop, "wpid")
    actual_worksizes = {}
    for v in actual_work_dist.values():
        if v > upper_bound:
            v = upper_bound
        actual_worksizes.setdefault(ws_index[v], 0)
        actual_worksizes[ws_index[v]] += 1

    actual_values = np.zeros(len(ws.keys()))
    for i in range(0, len(ws.keys())):
        if i in actual_worksizes:
            actual_values[i] = actual_worksizes[i]
    actual_values = actual_values / np.nansum(actual_values)
    expected_values = np.array(list(ws.values()))
    xlabels = [str(wb[b][0]) + '-' + str(wb[b][-1]) for b in sorted(wb.keys())]
    utilities.plot_array(expected_values,
                         actual_values,
                         names=xlabels,
                         datadir=figdir,
                         testprefix="work size distribution " + test_prefix,
                         do_close=do_close,
                         xlabel_rotation=50)
    if not skip_stat_check:
        utilities.statistic_test(expected_values,
                                 actual_values,
                                 test="x",
                                 comments="work size distribution check")