def probability_of_findings_opt3(self, present_findings, absent_findings, d_i=None, showStatus = False): ''' The probability that a mixture of findings will be present or absent. Absorbs negative findings first. ''' local_PD = list(self.PD) #Absorb evidence from the negative findings for i in absent_findings: for j in util.get_diseases_related_to_finding(self.Q, i): local_PD[j] *= (1-self.Q[i,j]) res = 0 iteration = 0 for F in _powerset(present_findings): sign = (-1) ** len(F) out_prod = 1 for i in util.parents_of_findings(self.Q, F + absent_findings): inn_prod = 1 for f in F: inn_prod = inn_prod * (1 - self.Q[f, i]) if (i == d_i): out_prod = out_prod * inn_prod else: out_prod = out_prod * (inn_prod * local_PD[i] + (self.oneMinusPD(i))) res = res + sign * out_prod if showStatus and iteration%round((2**len(present_findings))/8)==0: print(iteration/(2**len(present_findings))*100,'%') iteration += 1 return res
def mple_dis_post_fast_v3(self, diseases, present_findings, absent_findings, return_finding_prob=False): ''' Gives posterior over each disease in diseases given the set of symptomps. Implemented with a preprocessing step that caches some probabilities beforehand. This revision only iterates over relevant parents in the making of the dicitonary ''' res = {} relevant_parents = util.parents_of_findings(self.Q, present_findings + absent_findings) # Preprocessing step P_only_di = {} dict2 = {} denn = 0 for F in _powerset_tuple(tuple(present_findings)): F_entry = 1 for i in relevant_parents: entry_F_i = 1 for f in F + tuple(absent_findings): entry_F_i = entry_F_i * (1 - self.Q[f, i]) P_only_di[F, i] = entry_F_i # Extra preprocessing step: For each element in the powerset, calculate the associated product. This will be saved in a dict with an entry for each element. F_entry = F_entry * (entry_F_i * self.PD[i] + (1 - self.PD[i])) # Calculate the denominator here. This is the sum (with correct sign) of the entries in dict2 denn = denn + (-1) ** len(F) * F_entry dict2[F] = F_entry # Calculate denominator(joint probability) # Check if denominator is 0 if denn == 0: print('Probability of findings is 0. Division by zero.') for i in diseases: res[i] = 0 else: # Calculate posterior for each query disease for i in diseases: if i in relevant_parents: res_sum = 0 for F in _powerset_tuple(tuple(present_findings)): sign = (-1) ** len(F) P_only_di[F, i] # for each entry in dict2 divide out the factor that is superfluous and multiply with the correct factor. e = (dict2[F] / (P_only_di[F, i] * self.PD[i] + (1 - self.PD[i]))) * P_only_di[F, i] res_sum = res_sum + sign * e res[i] = res_sum * self.PD[i] / denn else: res[i] = self.PD[i] if return_finding_prob == False: return res else: return res, denn
def update_qlb_parameters(self, Q, PD, transformed_findings, exact_positive, exact_negative, q_params_old=None, maxIter=200, change_limit=1e-7): if q_params_old is None: #initialize q-parameters #q_params_old = np.zeros((len(transformed_findings), Q.shape[1])) #q_params_old = np.zeros(Q.shape) q_params_old = np.ones(Q.shape) #for i in transformed_findings: # for j in util.findingRelatedDis(Q,i): # q_params_old[i,j] = 1 relevant_diseases = util.parents_of_findings(Q, transformed_findings) qs = Quickscore.Quickscore(Q, PD) relevant_posteriors = qs.mple_dis_post_fast_v3(relevant_diseases, exact_positive, []) q_sumold = 1e-10 q_updated = q_params_old for i in range(maxIter): q_updated = self.updateQlbParameters_oneIteration( Q, PD, transformed_findings, exact_positive, exact_negative, q_updated, relevant_posteriors) #Normalize parameters q_updated = util.rownormalize_2D_array(q_updated) #Variable to keep track of convergence q_sum = 0 for ii in transformed_findings: for j in util.get_diseases_related_to_finding(Q, ii): q_sum += q_updated[ii, j] #print("q_sum diff: ", abs(q_sumold-q_sum)) # Break out of loop if the sum q parameters doesn't change significantly if q_sum / q_sumold - 1 < change_limit: print(i) break q_sumold = q_sum if i == maxIter - 1: print(i) return q_updated
def optimize_parameters(self, trans_findings, exact_findings, qs, xi=None): self.iterationCounter = 0 nd = qs.N_disease() ns = qs.N_findings() local_xi = [2 for i in range(len(trans_findings))] if xi is None else xi tic = time.time() all_posts, finding_prob = qs.mple_dis_post_fast_v3( util.parents_of_findings(qs.Q, trans_findings), exact_findings, [], return_finding_prob=True) tac = time.time() func = self.logP_fplus_eps obj = optimize.minimize(func, local_xi, args=(trans_findings, exact_findings, qs, all_posts), bounds=[(0.0001, None) for i in range(len(trans_findings))], callback=self.callback) return obj, finding_prob
def logP_fplus_eps(self, XI, trans_findings, exact_findings, qs, all_posts_arg=None): '''Function to calculate the log probability of the positive findings with some treated exact and some treatet approximately. ''' ND = qs.N_disease() # Calculate first term #term1 = 0 term1 = 0 for xi in XI: term1 = term1 + (-self.conjugate(xi)) # Calculate second term: log to the expectation value term2 = 1 if all_posts_arg == None: relevantFindings = util.parents_of_findings(qs.Q, trans_findings) all_posts = qs.mple_dis_post_fast_v3(relevantFindings, exact_findings, []) else: all_posts = all_posts_arg for xi_index, i in enumerate(trans_findings): for j in util.get_diseases_related_to_finding(qs.Q, i): post_dj = all_posts[j] term2 *= (1 - post_dj) + post_dj * np.exp( XI[xi_index] * self.theta_ij(i, j, qs.Q)) term2 = np.log(term2) return term1 + term2
def probability_of_findings_opt2(self, present_findings, absent_findings, d_i=None, showStatus = False): ''' Implements equation 11 from paper: The probability that a mixture of findings will be present or absent. Only iterates over relevant disease parents. ''' res = 0 iteration = 0 for F in _powerset(present_findings): sign = (-1) ** len(F) out_prod = 1 for i in util.parents_of_findings(self.Q, F + absent_findings): inn_prod = 1 for f in F + absent_findings: inn_prod = inn_prod * (1 - self.Q[f, i]) if (i == d_i): out_prod = out_prod * inn_prod else: out_prod = out_prod * (inn_prod * self.PD[i] + (self.oneMinusPD(i))) res = res + sign * out_prod if showStatus and iteration%round((2**len(present_findings))/8)==0: print(iteration/(2**len(present_findings))*100,'%') iteration += 1 return res