Пример #1
0
    def probability_of_findings_opt3(self, present_findings, absent_findings, d_i=None, showStatus = False):
        '''
        The probability that a mixture of findings will be present or absent.
        Absorbs negative findings first.
        '''

        local_PD = list(self.PD)


        #Absorb evidence from the negative findings
        for i in absent_findings:
            for j in util.get_diseases_related_to_finding(self.Q, i):
                local_PD[j] *= (1-self.Q[i,j])

        res = 0
        iteration = 0
        for F in _powerset(present_findings):
            sign = (-1) ** len(F)
            out_prod = 1
            for i in util.parents_of_findings(self.Q, F + absent_findings):
                inn_prod = 1
                for f in F:
                    inn_prod = inn_prod * (1 - self.Q[f, i])

                if (i == d_i):
                    out_prod = out_prod * inn_prod
                else:
                    out_prod = out_prod * (inn_prod * local_PD[i] + (self.oneMinusPD(i)))
            res = res + sign * out_prod
            if showStatus and iteration%round((2**len(present_findings))/8)==0:
                print(iteration/(2**len(present_findings))*100,'%')
            iteration += 1
        return res
Пример #2
0
    def mple_dis_post_fast_v3(self, diseases, present_findings, absent_findings, return_finding_prob=False):
        '''
        Gives posterior over each disease in diseases given the set of symptomps. Implemented with a preprocessing step that caches
        some probabilities beforehand.

        This revision only iterates over relevant parents in the making of the dicitonary

        '''
        res = {}
        relevant_parents = util.parents_of_findings(self.Q, present_findings + absent_findings)
        # Preprocessing step
        P_only_di = {}
        dict2 = {}
        denn = 0
        for F in _powerset_tuple(tuple(present_findings)):
            F_entry = 1
            for i in relevant_parents:
                entry_F_i = 1
                for f in F + tuple(absent_findings):
                    entry_F_i = entry_F_i * (1 - self.Q[f, i])
                P_only_di[F, i] = entry_F_i
                # Extra preprocessing step: For each element in the powerset, calculate the associated product. This will be saved in a dict with an entry for each element.
                F_entry = F_entry * (entry_F_i * self.PD[i] + (1 - self.PD[i]))
            # Calculate the denominator here. This is the sum (with correct sign) of the entries in dict2
            denn = denn + (-1) ** len(F) * F_entry
            dict2[F] = F_entry

        # Calculate denominator(joint probability)
        # Check if denominator is 0
        if denn == 0:
            print('Probability of findings is 0. Division by zero.')
            for i in diseases:
                res[i] = 0


        else:
            # Calculate posterior for each query disease
            for i in diseases:
                if i in relevant_parents:
                    res_sum = 0
                    for F in _powerset_tuple(tuple(present_findings)):
                        sign = (-1) ** len(F)
                        P_only_di[F, i]
                        # for each entry in dict2 divide out the factor that is superfluous and multiply with the correct factor.
                        e = (dict2[F] / (P_only_di[F, i] * self.PD[i] + (1 - self.PD[i]))) * P_only_di[F, i]
                        res_sum = res_sum + sign * e
                    res[i] = res_sum * self.PD[i] / denn
                else:
                    res[i] = self.PD[i]

        if return_finding_prob == False:
            return res
        else:
            return res, denn
Пример #3
0
    def update_qlb_parameters(self,
                              Q,
                              PD,
                              transformed_findings,
                              exact_positive,
                              exact_negative,
                              q_params_old=None,
                              maxIter=200,
                              change_limit=1e-7):
        if q_params_old is None:
            #initialize q-parameters
            #q_params_old = np.zeros((len(transformed_findings), Q.shape[1]))
            #q_params_old = np.zeros(Q.shape)
            q_params_old = np.ones(Q.shape)

            #for i in transformed_findings:
            #    for j in util.findingRelatedDis(Q,i):
            #        q_params_old[i,j] = 1

        relevant_diseases = util.parents_of_findings(Q, transformed_findings)
        qs = Quickscore.Quickscore(Q, PD)
        relevant_posteriors = qs.mple_dis_post_fast_v3(relevant_diseases,
                                                       exact_positive, [])

        q_sumold = 1e-10
        q_updated = q_params_old
        for i in range(maxIter):
            q_updated = self.updateQlbParameters_oneIteration(
                Q, PD, transformed_findings, exact_positive, exact_negative,
                q_updated, relevant_posteriors)
            #Normalize parameters
            q_updated = util.rownormalize_2D_array(q_updated)
            #Variable to keep track of convergence
            q_sum = 0
            for ii in transformed_findings:
                for j in util.get_diseases_related_to_finding(Q, ii):
                    q_sum += q_updated[ii, j]

            #print("q_sum diff: ", abs(q_sumold-q_sum))
            # Break out of loop if the sum q parameters doesn't change significantly
            if q_sum / q_sumold - 1 < change_limit:
                print(i)
                break
            q_sumold = q_sum
        if i == maxIter - 1:
            print(i)
        return q_updated
Пример #4
0
    def optimize_parameters(self, trans_findings, exact_findings, qs, xi=None):
        self.iterationCounter = 0
        nd = qs.N_disease()
        ns = qs.N_findings()
        local_xi = [2
                    for i in range(len(trans_findings))] if xi is None else xi
        tic = time.time()
        all_posts, finding_prob = qs.mple_dis_post_fast_v3(
            util.parents_of_findings(qs.Q, trans_findings),
            exact_findings, [],
            return_finding_prob=True)
        tac = time.time()
        func = self.logP_fplus_eps
        obj = optimize.minimize(func,
                                local_xi,
                                args=(trans_findings, exact_findings, qs,
                                      all_posts),
                                bounds=[(0.0001, None)
                                        for i in range(len(trans_findings))],
                                callback=self.callback)

        return obj, finding_prob
Пример #5
0
    def logP_fplus_eps(self,
                       XI,
                       trans_findings,
                       exact_findings,
                       qs,
                       all_posts_arg=None):
        '''Function to calculate the log probability of the positive findings with some treated exact and some treatet approximately.
        '''
        ND = qs.N_disease()
        # Calculate first term
        #term1 = 0

        term1 = 0
        for xi in XI:
            term1 = term1 + (-self.conjugate(xi))

        # Calculate second term: log to the expectation value
        term2 = 1
        if all_posts_arg == None:
            relevantFindings = util.parents_of_findings(qs.Q, trans_findings)
            all_posts = qs.mple_dis_post_fast_v3(relevantFindings,
                                                 exact_findings, [])
        else:
            all_posts = all_posts_arg

        for xi_index, i in enumerate(trans_findings):

            for j in util.get_diseases_related_to_finding(qs.Q, i):

                post_dj = all_posts[j]

                term2 *= (1 - post_dj) + post_dj * np.exp(
                    XI[xi_index] * self.theta_ij(i, j, qs.Q))

        term2 = np.log(term2)

        return term1 + term2
Пример #6
0
    def probability_of_findings_opt2(self, present_findings, absent_findings, d_i=None, showStatus = False):
        '''
        Implements equation 11 from paper: The probability that a mixture of findings will be present or absent.
        Only iterates over relevant disease parents.
        '''
        res = 0
        iteration = 0
        for F in _powerset(present_findings):
            sign = (-1) ** len(F)
            out_prod = 1
            for i in util.parents_of_findings(self.Q, F + absent_findings):
                inn_prod = 1
                for f in F + absent_findings:
                    inn_prod = inn_prod * (1 - self.Q[f, i])

                if (i == d_i):
                    out_prod = out_prod * inn_prod
                else:
                    out_prod = out_prod * (inn_prod * self.PD[i] + (self.oneMinusPD(i)))
            res = res + sign * out_prod
            if showStatus and iteration%round((2**len(present_findings))/8)==0:
                print(iteration/(2**len(present_findings))*100,'%')
            iteration += 1
        return res