Пример #1
0
def test_plot_generated_trimmed_contact_matrix(setting_code='H', n=5000, aggregate_flag=True, logcolors_flag=True,
                                               density_or_frequency='density'):
    datadir = sp.datadir

    state_location = 'Washington'
    location = 'seattle_metro'
    country_location = 'usa'

    popdict = {}

    options_args = {'use_microstructure': True}
    network_distr_args = {'Npop': int(n)}
    contacts = sp.make_contacts(popdict, state_location=state_location, location=location, options_args=options_args,
                                network_distr_args=network_distr_args)
    contacts = sp.trim_contacts(contacts, trimmed_size_dic=None, use_clusters=False)

    age_brackets = sp.get_census_age_brackets(datadir, state_location=state_location, country_location=country_location)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    ages = []
    for uid in contacts:
        ages.append(contacts[uid]['age'])

    age_count = Counter(ages)
    aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic)

    freq_matrix_dic = sp.calculate_contact_matrix(contacts, density_or_frequency)

    fig = sp.plot_contact_frequency(freq_matrix_dic, age_count, aggregate_age_count, age_brackets, age_by_brackets_dic,
                                    setting_code, density_or_frequency, logcolors_flag, aggregate_flag)

    return fig
Пример #2
0
def plot_generated_trimmed_contact_matrix(datadir,
                                          n,
                                          location='seattle_metro',
                                          state_location='Washington',
                                          country_location='usa',
                                          setting_code='H',
                                          aggregate_flag=True,
                                          logcolors_flag=True,
                                          density_or_frequency='density',
                                          trimmed_size_dic=None):

    popdict = {}

    options_args = {'use_microstructure': True}
    network_distr_args = {'Npop': int(n)}
    contacts = sp.make_contacts(popdict,
                                country_location=country_location,
                                state_location=state_location,
                                location=location,
                                options_args=options_args,
                                network_distr_args=network_distr_args)
    contacts = sp.trim_contacts(contacts,
                                trimmed_size_dic=trimmed_size_dic,
                                use_clusters=False)

    age_brackets = sp.get_census_age_brackets(
        datadir,
        state_location=state_location,
        country_location=country_location)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    ages = []
    for uid in contacts:
        ages.append(contacts[uid]['age'])

    num_agebrackets = len(age_brackets)

    age_count = Counter(ages)
    aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic)

    symmetric_matrix = calculate_contact_matrix(contacts, density_or_frequency,
                                                setting_code)

    fig = plot_contact_matrix(symmetric_matrix,
                              age_count,
                              aggregate_age_count,
                              age_brackets,
                              age_by_brackets_dic,
                              setting_code=setting_code,
                              density_or_frequency=density_or_frequency,
                              logcolors_flag=logcolors_flag,
                              aggregate_flag=aggregate_flag)
    return fig
Пример #3
0
def test_trimmed_contacts_are_bidirectional():
    num_people = 5
    checked_people = []
    last_index_checked = 0
    close_contacts_numbers = {'S': 10, 'W': 10}
    is_debugging = False

    my_contacts = sp.make_contacts(location=location,
                                   state_location=state_location,
                                   country_location=country_location,
                                   options_args=options_args,
                                   network_distr_args=network_distr_args)
    my_trim_contacts = sp.trim_contacts(
        my_contacts, trimmed_size_dic=close_contacts_numbers)

    popdict = my_trim_contacts

    uids = popdict.keys()
    uid_list = list(uids)

    while len(checked_people) < num_people:
        my_uid, last_index_checked, checked_people = \
            find_fresh_uid(uid_list=uid_list,
                           last_index_checked=last_index_checked,
                           checked_people=checked_people)

        first_person = popdict[my_uid]

        person_json = make_person_json(popdict=popdict, target_uid=my_uid)

        if is_debugging:
            person_filename = f"DEBUG_popdict_person_{my_uid}.json"
            print(f"TEST: {my_uid}")
            if os.path.isfile(person_filename):
                os.unlink(person_filename)
                pass
            with open(person_filename, "w") as outfile:
                json.dump(person_json, outfile, indent=4, sort_keys=True)
                pass
            pass

        # Now check that each person in each network has me in their network
        check_bidirectionality_of_contacts(person_json=person_json,
                                           popdict=popdict)
        pass
    pass
Пример #4
0
def make_population(n=None,
                    max_contacts=None,
                    as_objdict=False,
                    generate=False):
    '''
    Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington cached data.

    Args:
        n (int)             : The number of people to create.
        max_contacts (dict) : A dictionary for maximum number of contacts per layer: keys must be "S" (school) and/or "W" (work).
        as_objdict (bool)   : If True, change popdict type to ``sc.objdict``.
        generate (bool)     : If True, first look for cached population files and if those are not available, generate new population

    Returns:
        network (dict): A dictionary of the full population with ages and connections.

    '''

    default_n = 10000
    default_max_contacts = {
        'S': 20,
        'W': 10
    }  # this can be anything but should be based on relevant average number of contacts for the population under study

    if n is None: n = default_n
    n = int(n)
    if n not in popsize_choices:
        if not generate:
            choicestr = ', '.join([str(choice) for choice in popsize_choices])
            errormsg = f'Number of people must be one of {choicestr}, not {n}'

            raise ValueError(errormsg)

        # else:
        # Let's start generating a new network shall we?

    max_contacts = sc.mergedicts(default_max_contacts, max_contacts)

    country_location = 'usa'
    state_location = 'Washington'
    location = 'seattle_metro'
    sheet_name = 'United States of America'

    options_args = {'use_microstructure': True}
    network_distr_args = {'Npop': int(n)}

    # Heavy lift 1: make the contacts and their connections
    try:
        # try to read in from file
        population = sp.make_contacts(location=location,
                                      state_location=state_location,
                                      country_location=country_location,
                                      options_args=options_args,
                                      network_distr_args=network_distr_args)
    except:
        # make a new network on the fly
        if generate:
            population = sp.generate_synthetic_population(
                n,
                sp.datadir,
                location=location,
                state_location=state_location,
                country_location=country_location,
                sheet_name=sheet_name,
                plot=False,
                return_popdict=True)
        else:
            raise ValueError(errormsg)

    # Semi-heavy-lift 2: trim them to the desired numbers
    population = sp.trim_contacts(population,
                                  trimmed_size_dic=max_contacts,
                                  use_clusters=False)

    # Change types
    if as_objdict:
        population = sc.objdict(population)
    for key, person in population.items():
        if as_objdict:
            population[key] = sc.objdict(population[key])
            population[key]['contacts'] = sc.objdict(
                population[key]['contacts'])
        for layerkey in population[key]['contacts'].keys():
            population[key]['contacts'][layerkey] = list(
                population[key]['contacts'][layerkey])

    return population
Пример #5
0
def make_population(n=None, max_contacts=None, generate=None, with_industry_code=False, with_facilities=False,
                    use_two_group_reduction=True, average_LTCF_degree=20, ltcf_staff_age_min=20, ltcf_staff_age_max=60,
                    with_school_types=False, school_mixing_type='random', average_class_size=20, inter_grade_mixing=0.1,
                    average_student_teacher_ratio=20, average_teacher_teacher_degree=3, teacher_age_min=25, teacher_age_max=75,
                    with_non_teaching_staff=False,
                    average_student_all_staff_ratio=15, average_additional_staff_degree=20, staff_age_min=20, staff_age_max=75,
                    rand_seed=None):
    '''
    Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington cached data.
    Args:
        n (int)                                 : The number of people to create.
        max_contacts (dict)                     : A dictionary for maximum number of contacts per layer: keys must be "W" (work).
        generate (bool)                         : If True, generate a new population. Else, look for cached population and if those are not available, generate a new population.
        with_industry_code (bool)               : If True, assign industry codes for workplaces, currently only possible for cached files of populations in the US.
        with_facilities (bool)                  : If True, create long term care facilities, currently only available for locations in the US.
        use_two_group_reduction (bool)          : If True, create long term care facilities with reduced contacts across both groups.
        average_LTCF_degree (float)             : default average degree in long term care facilities.
        ltcf_staff_age_min (int)                : Long term care facility staff minimum age.
        ltcf_staff_age_max (int)                : Long term care facility staff maximum age.
        with_school_types (bool)                : If True, creates explicit school types.
        school_mixing_type (str or dict)                : The mixing type for schools, 'random', 'age_clustered', or 'age_and_class_clustered' if string, and a dictionary of these by school type otherwise.
        average_class_size (float)              : The average classroom size.
        inter_grade_mixing (float)              : The average fraction of mixing between grades in the same school for clustered school mixing types.
        average_student_teacher_ratio (float)   : The average number of students per teacher.
        average_teacher_teacher_degree (float)  : The average number of contacts per teacher with other teachers.
        teacher_age_min (int)                   : The minimum age for teachers.
        teacher_age_max (int)                   : The maximum age for teachers.
        with_non_teaching_staff (bool)          : If True, includes non teaching staff.
        average_student_all_staff_ratio (float) : The average number of students per staff members at school (including both teachers and non teachers).
        average_additional_staff_degree (float) : The average number of contacts per additional non teaching staff in schools.
        staff_age_min (int)                     : The minimum age for non teaching staff.
        staff_age_max (int)                     : The maximum age for non teaching staff.
        rand_seed (int)                         : Start point random sequence is generated from.

    Returns:
        network (dict): A dictionary of the full population with ages and connections.
    '''

    if rand_seed is not None:
        sp.set_seed(rand_seed)

    default_n = 10000
    default_max_contacts = {'W': 20}  # this can be anything but should be based on relevant average number of contacts for the population under study

    if n is None:
        n = default_n
    n = int(n)

    if n not in popsize_choices:
        if generate is False:
            choicestr = ', '.join([str(choice) for choice in popsize_choices])
            errormsg = f'If generate=False, number of people must be one of {choicestr}, not {n}'
            raise ValueError(errormsg)
        else:
            generate = True  # If n not found in popsize_choices and generate was not False, generate a new population.

    # Default to False, unless LTCF are requested
    if generate is None:
        if with_facilities:
            generate = True
        else:
            generate = False

    max_contacts = sc.mergedicts(default_max_contacts, max_contacts)

    country_location = 'usa'
    state_location = 'Washington'
    location = 'seattle_metro'
    sheet_name = 'United States of America'

    options_args = {}
    options_args['use_microstructure'] = True
    options_args['use_industry_code'] = with_industry_code
    options_args['use_long_term_care_facilities'] = with_facilities
    options_args['use_two_group_reduction'] = use_two_group_reduction
    options_args['with_school_types'] = with_school_types
    options_args['with_non_teaching_staff'] = with_non_teaching_staff

    network_distr_args = {}
    network_distr_args['Npop'] = int(n)

    network_distr_args['average_LTCF_degree'] = average_LTCF_degree

    network_distr_args['average_class_size'] = average_class_size
    network_distr_args['average_student_teacher_ratio'] = average_student_teacher_ratio
    network_distr_args['average_teacher_teacher_degree'] = average_teacher_teacher_degree
    network_distr_args['inter_grade_mixing'] = inter_grade_mixing
    network_distr_args['average_student_all_staff_ratio'] = average_student_all_staff_ratio
    network_distr_args['average_additional_staff_degree'] = average_additional_staff_degree
    network_distr_args['school_mixing_type'] = school_mixing_type

    # Heavy lift 1: make the contacts and their connections
    if not generate:
        # must read in from file, will fail if the data has not yet been generated
        population = sp.make_contacts(location=location, state_location=state_location,
                                      country_location=country_location, sheet_name=sheet_name,
                                      options_args=options_args,
                                      network_distr_args=network_distr_args)
    else:
        # make a new network on the fly
        if with_facilities and with_industry_code:
            errormsg = f'Requesting both long term care facilities and industries by code is not supported yet.'
            raise ValueError(errormsg)
        elif with_facilities:
            population = sp.generate_microstructure_with_facilities(sp.datadir, location=location, state_location=state_location, country_location=country_location, n=n, sheet_name=sheet_name,
                                                                    use_two_group_reduction=use_two_group_reduction, average_LTCF_degree=average_LTCF_degree, ltcf_staff_age_min=ltcf_staff_age_min, ltcf_staff_age_max=ltcf_staff_age_max,
                                                                    with_school_types=with_school_types, school_mixing_type=school_mixing_type, average_class_size=average_class_size, inter_grade_mixing=inter_grade_mixing,
                                                                    average_student_teacher_ratio=average_student_teacher_ratio, average_teacher_teacher_degree=average_teacher_teacher_degree, teacher_age_min=teacher_age_min, teacher_age_max=teacher_age_max,
                                                                    average_student_all_staff_ratio=average_student_all_staff_ratio, average_additional_staff_degree=average_additional_staff_degree, staff_age_min=staff_age_min, staff_age_max=staff_age_max,
                                                                    return_popdict=True )
        else:
            population = sp.generate_synthetic_population(n, sp.datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name,
                                                          with_school_types=with_school_types, school_mixing_type=school_mixing_type, average_class_size=average_class_size, inter_grade_mixing=inter_grade_mixing,
                                                          average_student_teacher_ratio=average_student_teacher_ratio, average_teacher_teacher_degree=average_teacher_teacher_degree, teacher_age_min=teacher_age_min, teacher_age_max=teacher_age_max,
                                                          average_student_all_staff_ratio=average_student_all_staff_ratio, average_additional_staff_degree=average_additional_staff_degree, staff_age_min=staff_age_min, staff_age_max=staff_age_max,
                                                          return_popdict=True,
                                                          )

    # Semi-heavy-lift 2: trim them to the desired numbers
    population = sp.trim_contacts(population, trimmed_size_dic=max_contacts, use_clusters=False)

    # Change types
    for key, person in population.items():
        for layerkey in population[key]['contacts'].keys():
            population[key]['contacts'][layerkey] = list(population[key]['contacts'][layerkey])
    return population
Пример #6
0
def make_population(n=None,
                    max_contacts=None,
                    generate=None,
                    with_industry_code=False,
                    with_facilities=False,
                    use_two_group_reduction=True,
                    average_LTCF_degree=20,
                    rand_seed=None):
    '''
    Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington cached data.

    Args:
        n (int)                        : The number of people to create.
        max_contacts (dict)            : A dictionary for maximum number of contacts per layer: keys must be "S" (school) and/or "W" (work).
        generate (bool)                : If True, first look for cached population files and if those are not available, generate new population
        with_industry_code (bool)      : If True, assign industry codes for workplaces, currently only possible for cached files of populations in the US
        with_facilities (bool)         : If True, create long term care facilities
        use_two_group_reduction (bool) : If True, create long term care facilities with reduced contacts across both groups
        average_LTCF_degree (int)      : default average degree in long term care facilities

    Returns:
        network (dict): A dictionary of the full population with ages and connections.

    '''

    if rand_seed is not None:
        sp.set_seed(rand_seed)

    default_n = 10000
    default_max_contacts = {
        'S': 20,
        'W': 20
    }  # this can be anything but should be based on relevant average number of contacts for the population under study

    if n is None:
        n = default_n
    n = int(n)

    if n not in popsize_choices:
        if generate is False:
            choicestr = ', '.join([str(choice) for choice in popsize_choices])
            errormsg = f'If generate=False, number of people must be one of {choicestr}, not {n}'
            raise ValueError(errormsg)
        else:
            generate = True  # If not found, generate

    # Default to False, unless LTCF are requested
    if generate is None:
        if with_facilities:
            generate = True
        else:
            generate = False

    max_contacts = sc.mergedicts(default_max_contacts, max_contacts)

    country_location = 'usa'
    state_location = 'Washington'
    location = 'seattle_metro'
    sheet_name = 'United States of America'

    options_args = {
        'use_microstructure': True,
        'use_industry_code': with_industry_code,
        'use_long_term_care_facilities': with_facilities,
        'use_two_group_reduction': use_two_group_reduction,
        'average_LTCF_degree': average_LTCF_degree
    }
    network_distr_args = {'Npop': int(n)}

    # Heavy lift 1: make the contacts and their connections
    if not generate:
        # must read in from file, will fail if the data has not yet been generated
        population = sp.make_contacts(location=location,
                                      state_location=state_location,
                                      country_location=country_location,
                                      options_args=options_args,
                                      network_distr_args=network_distr_args)
    else:
        # make a new network on the fly
        if with_facilities and with_industry_code:
            errormsg = f'Requesting both long term care facilities and industries by code is not supported yet.'
            raise ValueError(errormsg)
        elif with_facilities:
            population = sp.generate_microstructure_with_facilities(
                sp.datadir,
                location=location,
                state_location=state_location,
                country_location=country_location,
                n=n,
                return_popdict=True,
                use_two_group_reduction=use_two_group_reduction,
                average_LTCF_degree=average_LTCF_degree)
        else:
            population = sp.generate_synthetic_population(
                n,
                sp.datadir,
                location=location,
                state_location=state_location,
                country_location=country_location,
                sheet_name=sheet_name,
                plot=False,
                return_popdict=True)

    # Semi-heavy-lift 2: trim them to the desired numbers
    population = sp.trim_contacts(population,
                                  trimmed_size_dic=max_contacts,
                                  use_clusters=False)

    # Change types
    for key, person in population.items():
        for layerkey in population[key]['contacts'].keys():
            population[key]['contacts'][layerkey] = list(
                population[key]['contacts'][layerkey])
    return population
    country_location = 'usa'
    sheet_name = 'United States of America'
    level = 'county'

    n = 10000
    verbose = True
    plot = True

    # loads population with microstructure and age demographics that approximate those of the location selected
    # files located in:
    #    datadir/demographics/contact_matrices_152_countries/state_location/

    # load population into a dictionary of individuals who know who their contacts are
    options_args = {'use_microstructure': True}
    network_distr_args = {'Npop': n}
    contacts = sp.make_contacts(location=location,
                                state_location=state_location,
                                country_location=country_location,
                                options_args=options_args,
                                network_distr_args=network_distr_args)

    # not all school and workplace contacts are going to be close contacts so create 'closer' contacts for these settings
    close_contacts_number = {'S': 20, 'W': 20}
    contacts = sp.trim_contacts(contacts,
                                trimmed_size_dic=close_contacts_number)

    verbose = True
    # verbose = False
    if verbose:
        show_layers(contacts, show_ages=True)