Пример #1
0
 def get_cached_mse(self):
     
     if self.all_fits:
         if scale_fit_terms:
             min_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['min_terms'], fit_terms_types['mse'])
             max_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['max_terms'], fit_terms_types['mse'])
             return val_scale(self.all_fits[self.cluster_id][self.sim_key]['fit_terms'], max_term, min_term) 
         else:
             return self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['mse']
         
     else:
         error_loading_fit_terms('mse')
Пример #2
0
    def __init__(self, model, sim_data, cluster_id, reinfection_penalty = 0.0, reinfection_penalty_weight = 0.0,  clinical_cases_penalty = 0.0, clinical_cases_penalty_weight = 0.0, all_fits = None):

        self.cluster_id = cluster_id
        self.reinfection_penalty = reinfection_penalty
        self.reinfection_penalty_term = reinfection_penalty
        self.reinfection_penalty_weight = reinfection_penalty_weight
        self.ref_reinfection_num_points = 0
        
        self.rho = None
        self.p_val = None
        
        self.clinical_cases_penalty = clinical_cases_penalty
        self.clinical_cases_penalty_term = clinical_cases_penalty 
        self.clinical_cases_penalty_weight = clinical_cases_penalty_weight
        self.ref_clinical_cases_num_points = 0
        self.sim_data = sim_data
        
        # pre calculated fits
        self.all_fits = all_fits
        
        self.ref_avg_reinfection_rate = 0.0
        self.sim_avg_reinfection_rate = 0.0 
        
        #debug_p('model id during kariba conversion prior model assignment ' + str(model.get_model_id()))
        self.model = model
        
        model_meta = self.model.get_meta()
        self.sim_key = model_meta['sim_key']
        
        #debug_p('model id during kariba conversion after model assignment ' + str(self.model.get_model_id()))
        
        # get reinfection rates from sim data, compute reinfection penalty and model penalties
        
        if not reinf_weight == 0:
            model_report_channels = sim_report_channels_model_format(reports_channels, self.sim_data)
            if not load_reinf_penalty:
                self.set_reinfection_penalty(model_report_channels['reinfections'], self.cluster_id)
            else:
                if self.all_fits:
                    self.reinfection_penalty = self.all_fits[self.cluster_id][self.sim_key]['reinf_penalty']
                    self.reinfection_penalty_weight = reinf_weight
                else:
                    error_loading_fit_terms('reinfection penalty')
                    

        if not load_cc_penalty:
            if 'ls_folded_norm' in cc_penalty_model: 
                self.set_clinical_cases_penalty_by_ls(self.sim_data['cc'], self.cluster_id)
            if 'ls_folded_no_norm' in cc_penalty_model: 
                self.set_clinical_cases_penalty_by_ls_no_norm(self.sim_data['cc'], self.cluster_id)
            if 'corr' in cc_penalty_model:
                self.set_clinical_cases_penalty_by_corr(self.sim_data['cc'], self.cluster_id)
        else:
            if self.all_fits:
                max_term = 0
                min_term = 0
                if 'ls_folded_norm' in cc_penalty_model: 
                    self.clinical_cases_penalty = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['ls_norm'] 
                    if scale_fit_terms:                        
                        max_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['max_terms'], fit_terms_types['ls_norm'])
                        min_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['min_terms'], fit_terms_types['ls_norm']) 

                elif 'ls_norm_not_folded' in cc_penalty_model:
                    self.clinical_cases_penalty = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['ls_norm_not_folded']
                    if scale_fit_terms:
                        
                        # change path in fit_terms_types for ls_norm_not_folded if we use that again; need to add corresponding entry as well 
                        # if we are not using that feature again, remove these lines altogether; this is just a placeholder
                        max_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['max_terms'], fit_terms_types['ls_norm'])
                        min_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['min_terms'], fit_terms_types['ls_norm'])
                
                
                if 'ls_no_norm' in cc_penalty_model: 
                    self.clinical_cases_penalty = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['ls_no_norm']
                    if scale_fit_terms:
                        
                        # change path in fit_terms_types for ls_norm_not_folded if we use that again; need to add corresponding entry as well 
                        # if we are not using that feature again, remove these lines altogether; this is just a placeholder
                        max_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['max_terms'], fit_terms_types['ls_norm'])
                        min_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['min_terms'], fit_terms_types['ls_norm'])
                        
                if 'corr_folded' in cc_penalty_model:
                    self.clinical_cases_penalty = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['corr_folded']['penalty']
                    
                    if scale_fit_terms:
                        max_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['max_terms'], fit_terms_types['corr_folded'])
                        min_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['min_terms'], fit_terms_types['corr_folded'])
                    
                    
                    self.rho = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['corr_folded']['rho']
                    self.p_val = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['corr_folded']['p_val']
                    

                if 'corr_not_folded' in cc_penalty_model:
                    self.clinical_cases_penalty = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['corr_not_folded']['penalty']
                    
                    if scale_fit_terms:
                        
                        # change path in fit_terms_types for ls_norm_not_folded if we use that again; need to add corresponding entry as well 
                        # if we are not using that feature again, remove these lines altogether; this is just a placeholder
                        max_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['max_terms'], fit_terms_types['corr_not_folded'])
                        min_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['min_terms'], fit_terms_types['corr_not_folded'])
                    
                    
                    self.rho = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['corr_not_folded']['rho']
                    self.p_val = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['corr_not_folded']['p_val']
                
                self.clinical_cases_penalty_term = self.clinical_cases_penalty
                if scale_fit_terms: # should have found proper min_term and max_term if scale_fit_terms is True
                    self.clinical_cases_penalty = val_scale(self.clinical_cases_penalty, max_term, min_term)
                    
                self.clinical_cases_penalty_weight = cc_weight
                
            else:
                error_loading_fit_terms('clinical cases penalty')
            
                    
        self.set_model_penalties()
Пример #3
0
    def set_reinfection_penalty(self, model_reinfection_rates, cluster_id):
        
        ref_reinfection_rates = cluster_2_reinfection_rates(cluster_id)
        
        if ref_reinfection_rates:
            cluster_pops = cluster_2_pops(cluster_id)
            reinfection_feature = []
            pop_feature = []
            total_pop = 0.0
            # find max and min values of reinfection rates feature
            
            count_reinf = 0
            for i in range(0,5):    
                if ('reinf_' + str(i+1) + '_' + str(i+2) in model_reinfection_rates) and (i+1 != 3 and i+2 != 4):
        
                    cluster_pop = get_cluster_pop_per_rnd_pair(i+1, i+2)
                    total_pop = total_pop + ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total']
                    
                    if cluster_pop:
                        pop_feature = pop_feature.append(ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total']/cluster_pop)

                    ref_reinfection_rate = ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['reinf']/(ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total'] + 0.0)
                    model_reinfection_rate = model_reinfection_rates['round_' + str(i+1) + '_' + str(i+2)]
                    if(is_number(ref_reinfection_rate) and is_number(model_reinfection_rate)):
                        reinfection_feature.append(ref_reinfection_rate)  
                        reinfection_feature.append(model_reinfection_rate)
                        
                        self.sim_avg_reinfection_rate = self.sim_avg_reinfection_rate + model_reinfection_rate 
                        self.ref_avg_reinfection_rate = self.ref_avg_reinfection_rate + ref_reinfection_rate
                        
                        count_reinf = count_reinf + 1
                        
            if count_reinf != 0:
                self.sim_avg_reinfection_rate = self.sim_avg_reinfection_rate / (count_reinf + 0.0)
                self.ref_avg_reinfection_rate = self.ref_avg_reinfection_rate / (count_reinf + 0.0)
                        
            max_reinf_val = None
            min_reinf_val = None
            if reinfection_feature:        
                max_reinf_val = max(reinfection_feature)
                min_reinf_val = min(reinfection_feature)
            else: # no data observed; penalty is set to 0.0
                self.reinfection_penalty = 0.0
                return
                
            max_pop_val = None
            min_pop_val = None
            if pop_feature:
                max_pop_val = max(pop_feature)
                min_pop_val = min(pop_feature)
            else: # no data observed; penalty is set to 0.0
                self.reinfection_penalty = 0.0
                return
            
                    
            # compute square error between reference and model scaled reinfection features to use as a penalty if there are more than threshold number of people linked
            num_linked_threshold = 40
            
            se_reinfection_rates = []
            self.reinfection_penalty = 0.0
            self.ref_reinfection_num_points = 0.0
            
            for i in range(0,5):        
                # do feature scaling
                if ('reinf_' + str(i+1) + '_' + str(i+2) in model_reinfection_rates) and (i+1 != 3 and i+2 != 4) and ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total'] > num_linked_threshold:
                    cluster_pop = get_cluster_pop_per_rnd_pair(i+1, i+2)
                    if cluster_pop:
                        ref_reinfection_rate = ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['reinf']/(ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total'] + 0.0)
                        model_reinfection_rate = model_reinfection_rates['round_' + str(i+1) + '_' + str(i+2)]
                        if(is_number(ref_reinfection_rate) and is_number(model_reinfection_rate)):
                            self.ref_reinfection_num_points = self.ref_reinfection_num_points + 1
            
                            # weight square error se for this round pair proportional to the number of linked people for this round pair over the total number of linked people at this cluster for all rounds
                            # also multiple by a weight in [0,1] depending on how close the number of linked people for this round pair is to the known population of the cluster at these rounds;
                            # the closer the number of linked people the closer the weight to 1; the round pair with closest number of linked people is weighted the most 
                            rnd_pair_weight = (val_scale(ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total']/(cluster_pop + 0.0), max_pop_val, min_pop_val))*ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total']/total_pop
                            
                            se = pow(val_scale(ref_reinfection_rate, max_reinf_val, min_reinf_val) - val_scale(model_reinfection_rate, max_reinf_val, min_reinf_val),2)
                            self.reinfection_penalty = self.reinfection_penalty + rnd_pair_weight*se
                        
           
            # weight the reinfection penalty at this cluster based on how much data is available; number of potentially available reinfection measurements is 
            # max_ref_reinfection_points in kariba_settings.py
            
            #debug_p('reinfection penalty ' + str(self.reinfection_penalty))
            
            self.reinfection_penalty_weight = self.ref_reinfection_num_points/(max_ref_reinfection_points + 0.0)
            #debug_p('reinfection penalty weight ' + str(self.reinfection_penalty_weight))
            
            #debug_p('weighted reinfection penalty ' + str(self.reinfection_penalty*self.reinfection_penalty_weight))
            
            return 
        
        else: # no reinfection data found so set penalty to 0
            self.reinfection_penalty = 0.0
            return