def test_plot_generated_trimmed_contact_matrix(setting_code='H', n=5000, aggregate_flag=True, logcolors_flag=True, density_or_frequency='density'): datadir = sp.datadir state_location = 'Washington' location = 'seattle_metro' country_location = 'usa' popdict = {} options_args = {'use_microstructure': True} network_distr_args = {'Npop': int(n)} contacts = sp.make_contacts(popdict, state_location=state_location, location=location, options_args=options_args, network_distr_args=network_distr_args) contacts = sp.trim_contacts(contacts, trimmed_size_dic=None, use_clusters=False) age_brackets = sp.get_census_age_brackets(datadir, state_location=state_location, country_location=country_location) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) ages = [] for uid in contacts: ages.append(contacts[uid]['age']) age_count = Counter(ages) aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic) freq_matrix_dic = sp.calculate_contact_matrix(contacts, density_or_frequency) fig = sp.plot_contact_frequency(freq_matrix_dic, age_count, aggregate_age_count, age_brackets, age_by_brackets_dic, setting_code, density_or_frequency, logcolors_flag, aggregate_flag) return fig
def plot_generated_trimmed_contact_matrix(datadir, n, location='seattle_metro', state_location='Washington', country_location='usa', setting_code='H', aggregate_flag=True, logcolors_flag=True, density_or_frequency='density', trimmed_size_dic=None): popdict = {} options_args = {'use_microstructure': True} network_distr_args = {'Npop': int(n)} contacts = sp.make_contacts(popdict, country_location=country_location, state_location=state_location, location=location, options_args=options_args, network_distr_args=network_distr_args) contacts = sp.trim_contacts(contacts, trimmed_size_dic=trimmed_size_dic, use_clusters=False) age_brackets = sp.get_census_age_brackets( datadir, state_location=state_location, country_location=country_location) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) ages = [] for uid in contacts: ages.append(contacts[uid]['age']) num_agebrackets = len(age_brackets) age_count = Counter(ages) aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic) symmetric_matrix = calculate_contact_matrix(contacts, density_or_frequency, setting_code) fig = plot_contact_matrix(symmetric_matrix, age_count, aggregate_age_count, age_brackets, age_by_brackets_dic, setting_code=setting_code, density_or_frequency=density_or_frequency, logcolors_flag=logcolors_flag, aggregate_flag=aggregate_flag) return fig
def test_trimmed_contacts_are_bidirectional(): num_people = 5 checked_people = [] last_index_checked = 0 close_contacts_numbers = {'S': 10, 'W': 10} is_debugging = False my_contacts = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, options_args=options_args, network_distr_args=network_distr_args) my_trim_contacts = sp.trim_contacts( my_contacts, trimmed_size_dic=close_contacts_numbers) popdict = my_trim_contacts uids = popdict.keys() uid_list = list(uids) while len(checked_people) < num_people: my_uid, last_index_checked, checked_people = \ find_fresh_uid(uid_list=uid_list, last_index_checked=last_index_checked, checked_people=checked_people) first_person = popdict[my_uid] person_json = make_person_json(popdict=popdict, target_uid=my_uid) if is_debugging: person_filename = f"DEBUG_popdict_person_{my_uid}.json" print(f"TEST: {my_uid}") if os.path.isfile(person_filename): os.unlink(person_filename) pass with open(person_filename, "w") as outfile: json.dump(person_json, outfile, indent=4, sort_keys=True) pass pass # Now check that each person in each network has me in their network check_bidirectionality_of_contacts(person_json=person_json, popdict=popdict) pass pass
def make_population(n=None, max_contacts=None, as_objdict=False, generate=False): ''' Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington cached data. Args: n (int) : The number of people to create. max_contacts (dict) : A dictionary for maximum number of contacts per layer: keys must be "S" (school) and/or "W" (work). as_objdict (bool) : If True, change popdict type to ``sc.objdict``. generate (bool) : If True, first look for cached population files and if those are not available, generate new population Returns: network (dict): A dictionary of the full population with ages and connections. ''' default_n = 10000 default_max_contacts = { 'S': 20, 'W': 10 } # this can be anything but should be based on relevant average number of contacts for the population under study if n is None: n = default_n n = int(n) if n not in popsize_choices: if not generate: choicestr = ', '.join([str(choice) for choice in popsize_choices]) errormsg = f'Number of people must be one of {choicestr}, not {n}' raise ValueError(errormsg) # else: # Let's start generating a new network shall we? max_contacts = sc.mergedicts(default_max_contacts, max_contacts) country_location = 'usa' state_location = 'Washington' location = 'seattle_metro' sheet_name = 'United States of America' options_args = {'use_microstructure': True} network_distr_args = {'Npop': int(n)} # Heavy lift 1: make the contacts and their connections try: # try to read in from file population = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, options_args=options_args, network_distr_args=network_distr_args) except: # make a new network on the fly if generate: population = sp.generate_synthetic_population( n, sp.datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, plot=False, return_popdict=True) else: raise ValueError(errormsg) # Semi-heavy-lift 2: trim them to the desired numbers population = sp.trim_contacts(population, trimmed_size_dic=max_contacts, use_clusters=False) # Change types if as_objdict: population = sc.objdict(population) for key, person in population.items(): if as_objdict: population[key] = sc.objdict(population[key]) population[key]['contacts'] = sc.objdict( population[key]['contacts']) for layerkey in population[key]['contacts'].keys(): population[key]['contacts'][layerkey] = list( population[key]['contacts'][layerkey]) return population
def make_population(n=None, max_contacts=None, generate=None, with_industry_code=False, with_facilities=False, use_two_group_reduction=True, average_LTCF_degree=20, ltcf_staff_age_min=20, ltcf_staff_age_max=60, with_school_types=False, school_mixing_type='random', average_class_size=20, inter_grade_mixing=0.1, average_student_teacher_ratio=20, average_teacher_teacher_degree=3, teacher_age_min=25, teacher_age_max=75, with_non_teaching_staff=False, average_student_all_staff_ratio=15, average_additional_staff_degree=20, staff_age_min=20, staff_age_max=75, rand_seed=None): ''' Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington cached data. Args: n (int) : The number of people to create. max_contacts (dict) : A dictionary for maximum number of contacts per layer: keys must be "W" (work). generate (bool) : If True, generate a new population. Else, look for cached population and if those are not available, generate a new population. with_industry_code (bool) : If True, assign industry codes for workplaces, currently only possible for cached files of populations in the US. with_facilities (bool) : If True, create long term care facilities, currently only available for locations in the US. use_two_group_reduction (bool) : If True, create long term care facilities with reduced contacts across both groups. average_LTCF_degree (float) : default average degree in long term care facilities. ltcf_staff_age_min (int) : Long term care facility staff minimum age. ltcf_staff_age_max (int) : Long term care facility staff maximum age. with_school_types (bool) : If True, creates explicit school types. school_mixing_type (str or dict) : The mixing type for schools, 'random', 'age_clustered', or 'age_and_class_clustered' if string, and a dictionary of these by school type otherwise. average_class_size (float) : The average classroom size. inter_grade_mixing (float) : The average fraction of mixing between grades in the same school for clustered school mixing types. average_student_teacher_ratio (float) : The average number of students per teacher. average_teacher_teacher_degree (float) : The average number of contacts per teacher with other teachers. teacher_age_min (int) : The minimum age for teachers. teacher_age_max (int) : The maximum age for teachers. with_non_teaching_staff (bool) : If True, includes non teaching staff. average_student_all_staff_ratio (float) : The average number of students per staff members at school (including both teachers and non teachers). average_additional_staff_degree (float) : The average number of contacts per additional non teaching staff in schools. staff_age_min (int) : The minimum age for non teaching staff. staff_age_max (int) : The maximum age for non teaching staff. rand_seed (int) : Start point random sequence is generated from. Returns: network (dict): A dictionary of the full population with ages and connections. ''' if rand_seed is not None: sp.set_seed(rand_seed) default_n = 10000 default_max_contacts = {'W': 20} # this can be anything but should be based on relevant average number of contacts for the population under study if n is None: n = default_n n = int(n) if n not in popsize_choices: if generate is False: choicestr = ', '.join([str(choice) for choice in popsize_choices]) errormsg = f'If generate=False, number of people must be one of {choicestr}, not {n}' raise ValueError(errormsg) else: generate = True # If n not found in popsize_choices and generate was not False, generate a new population. # Default to False, unless LTCF are requested if generate is None: if with_facilities: generate = True else: generate = False max_contacts = sc.mergedicts(default_max_contacts, max_contacts) country_location = 'usa' state_location = 'Washington' location = 'seattle_metro' sheet_name = 'United States of America' options_args = {} options_args['use_microstructure'] = True options_args['use_industry_code'] = with_industry_code options_args['use_long_term_care_facilities'] = with_facilities options_args['use_two_group_reduction'] = use_two_group_reduction options_args['with_school_types'] = with_school_types options_args['with_non_teaching_staff'] = with_non_teaching_staff network_distr_args = {} network_distr_args['Npop'] = int(n) network_distr_args['average_LTCF_degree'] = average_LTCF_degree network_distr_args['average_class_size'] = average_class_size network_distr_args['average_student_teacher_ratio'] = average_student_teacher_ratio network_distr_args['average_teacher_teacher_degree'] = average_teacher_teacher_degree network_distr_args['inter_grade_mixing'] = inter_grade_mixing network_distr_args['average_student_all_staff_ratio'] = average_student_all_staff_ratio network_distr_args['average_additional_staff_degree'] = average_additional_staff_degree network_distr_args['school_mixing_type'] = school_mixing_type # Heavy lift 1: make the contacts and their connections if not generate: # must read in from file, will fail if the data has not yet been generated population = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, options_args=options_args, network_distr_args=network_distr_args) else: # make a new network on the fly if with_facilities and with_industry_code: errormsg = f'Requesting both long term care facilities and industries by code is not supported yet.' raise ValueError(errormsg) elif with_facilities: population = sp.generate_microstructure_with_facilities(sp.datadir, location=location, state_location=state_location, country_location=country_location, n=n, sheet_name=sheet_name, use_two_group_reduction=use_two_group_reduction, average_LTCF_degree=average_LTCF_degree, ltcf_staff_age_min=ltcf_staff_age_min, ltcf_staff_age_max=ltcf_staff_age_max, with_school_types=with_school_types, school_mixing_type=school_mixing_type, average_class_size=average_class_size, inter_grade_mixing=inter_grade_mixing, average_student_teacher_ratio=average_student_teacher_ratio, average_teacher_teacher_degree=average_teacher_teacher_degree, teacher_age_min=teacher_age_min, teacher_age_max=teacher_age_max, average_student_all_staff_ratio=average_student_all_staff_ratio, average_additional_staff_degree=average_additional_staff_degree, staff_age_min=staff_age_min, staff_age_max=staff_age_max, return_popdict=True ) else: population = sp.generate_synthetic_population(n, sp.datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, with_school_types=with_school_types, school_mixing_type=school_mixing_type, average_class_size=average_class_size, inter_grade_mixing=inter_grade_mixing, average_student_teacher_ratio=average_student_teacher_ratio, average_teacher_teacher_degree=average_teacher_teacher_degree, teacher_age_min=teacher_age_min, teacher_age_max=teacher_age_max, average_student_all_staff_ratio=average_student_all_staff_ratio, average_additional_staff_degree=average_additional_staff_degree, staff_age_min=staff_age_min, staff_age_max=staff_age_max, return_popdict=True, ) # Semi-heavy-lift 2: trim them to the desired numbers population = sp.trim_contacts(population, trimmed_size_dic=max_contacts, use_clusters=False) # Change types for key, person in population.items(): for layerkey in population[key]['contacts'].keys(): population[key]['contacts'][layerkey] = list(population[key]['contacts'][layerkey]) return population
def make_population(n=None, max_contacts=None, generate=None, with_industry_code=False, with_facilities=False, use_two_group_reduction=True, average_LTCF_degree=20, rand_seed=None): ''' Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington cached data. Args: n (int) : The number of people to create. max_contacts (dict) : A dictionary for maximum number of contacts per layer: keys must be "S" (school) and/or "W" (work). generate (bool) : If True, first look for cached population files and if those are not available, generate new population with_industry_code (bool) : If True, assign industry codes for workplaces, currently only possible for cached files of populations in the US with_facilities (bool) : If True, create long term care facilities use_two_group_reduction (bool) : If True, create long term care facilities with reduced contacts across both groups average_LTCF_degree (int) : default average degree in long term care facilities Returns: network (dict): A dictionary of the full population with ages and connections. ''' if rand_seed is not None: sp.set_seed(rand_seed) default_n = 10000 default_max_contacts = { 'S': 20, 'W': 20 } # this can be anything but should be based on relevant average number of contacts for the population under study if n is None: n = default_n n = int(n) if n not in popsize_choices: if generate is False: choicestr = ', '.join([str(choice) for choice in popsize_choices]) errormsg = f'If generate=False, number of people must be one of {choicestr}, not {n}' raise ValueError(errormsg) else: generate = True # If not found, generate # Default to False, unless LTCF are requested if generate is None: if with_facilities: generate = True else: generate = False max_contacts = sc.mergedicts(default_max_contacts, max_contacts) country_location = 'usa' state_location = 'Washington' location = 'seattle_metro' sheet_name = 'United States of America' options_args = { 'use_microstructure': True, 'use_industry_code': with_industry_code, 'use_long_term_care_facilities': with_facilities, 'use_two_group_reduction': use_two_group_reduction, 'average_LTCF_degree': average_LTCF_degree } network_distr_args = {'Npop': int(n)} # Heavy lift 1: make the contacts and their connections if not generate: # must read in from file, will fail if the data has not yet been generated population = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, options_args=options_args, network_distr_args=network_distr_args) else: # make a new network on the fly if with_facilities and with_industry_code: errormsg = f'Requesting both long term care facilities and industries by code is not supported yet.' raise ValueError(errormsg) elif with_facilities: population = sp.generate_microstructure_with_facilities( sp.datadir, location=location, state_location=state_location, country_location=country_location, n=n, return_popdict=True, use_two_group_reduction=use_two_group_reduction, average_LTCF_degree=average_LTCF_degree) else: population = sp.generate_synthetic_population( n, sp.datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, plot=False, return_popdict=True) # Semi-heavy-lift 2: trim them to the desired numbers population = sp.trim_contacts(population, trimmed_size_dic=max_contacts, use_clusters=False) # Change types for key, person in population.items(): for layerkey in population[key]['contacts'].keys(): population[key]['contacts'][layerkey] = list( population[key]['contacts'][layerkey]) return population
country_location = 'usa' sheet_name = 'United States of America' level = 'county' n = 10000 verbose = True plot = True # loads population with microstructure and age demographics that approximate those of the location selected # files located in: # datadir/demographics/contact_matrices_152_countries/state_location/ # load population into a dictionary of individuals who know who their contacts are options_args = {'use_microstructure': True} network_distr_args = {'Npop': n} contacts = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, options_args=options_args, network_distr_args=network_distr_args) # not all school and workplace contacts are going to be close contacts so create 'closer' contacts for these settings close_contacts_number = {'S': 20, 'W': 20} contacts = sp.trim_contacts(contacts, trimmed_size_dic=close_contacts_number) verbose = True # verbose = False if verbose: show_layers(contacts, show_ages=True)