def initializeMu(self): self.mu = [] for e in range(self.numObjects): mu_e = [(self.gamma[e][v] - 1.0) for v in range(len(self.candidates[e]))] #mu_e = [log(float(self.value_counts[candName]))+2 for candName,_ in self.candidates[e]] normalize1d(mu_e) self.mu.append(mu_e) self.initializeMuNumDen()
def EstepQuality(self, claimsByObjI, quality, estepParam, pop=False): for e in range(self.numObjects): numValues = len(self.candidates[e]) if self.skip_single_cand and self.single_cand[e]: continue for i, (s, vs) in enumerate(claimsByObjI[e]): ptmp = estepParam[e][i] ptmp.fill(0.0) if self.descendant_ancestor_relationship[e]: node_vs = self.matching_nodes[e][vs] for v in range(numValues): if vs == v: ptmp[0] = self.mu[e][v] elif pop: node_v = self.matching_nodes[e][v] if node_vs.isAncestorof(node_v): ptmp[1] += (self.mu[e][v] * self.pop_weight1[e][v][vs]) else: ptmp[2] += (self.mu[e][v] * self.pop_weight2[e][v][vs]) else: node_v = self.matching_nodes[e][v] if node_vs.isAncestorof(node_v): ptmp[1] += (self.mu[e][v] / self.count_parents[e][v]) else: ptmp[2] += (self.mu[e][v] / (self.count_values[e] - self.count_parents[e][v] - 1)) else: for v in range(numValues): if vs == v: ptmp[0] = self.mu[e][v] ptmp[1] = self.mu[e][v] elif pop: ptmp[2] += (self.mu[e][v] * self.pop_weight2[e][v][vs]) else: ptmp[2] += (self.mu[e][v] / (self.count_values[e] - self.count_parents[e][v] - 1)) for k in range(3): ptmp[k] *= quality[s][k] normalize1d(ptmp)
def initializePopularity(self): self.pop_weight1 = [] self.pop_weight2 = [] for e in range(self.numObjects): numClaims = float( len(self.claimsByObjI[e]) + len(self.answersByObjI[e])) numValues = len(self.candidates[e]) pw1 = np.zeros((numValues, numValues)) pw2 = np.zeros((numValues, numValues)) for v in range(numValues): node_v = self.matching_nodes[e][v] case1 = False case2 = False for i, (s, vs) in enumerate(self.claimsByObjI[e]): if vs == v: continue node_vs = self.matching_nodes[e][vs] if node_vs.isAncestorof(node_v): pw1[v][vs] += 1.0 case1 = True else: pw2[v][vs] += 1.0 case2 = True if case1: normalize1d(pw1[v]) if case2: normalize1d(pw2[v]) self.pop_weight1.append(pw1) self.pop_weight2.append(pw2) popularity = [] for e in range(self.numObjects): numClaims = float( len(self.claimsByObjI[e]) + len(self.answersByObjI[e])) pg = np.zeros(len(self.candidates[e])) for i, cand in enumerate(self.candidates[e]): pg[i] = cand[1] / numClaims popularity.append(pg) self.popularity = popularity
def MstepQuality(self, quality, prior, claimsByObjI, estepPrams, numSources, pop): numStates = 3 if self.regularization: for s in range(numSources): for b in range(numStates): quality[s][b] = prior[s][b] - 1.0 for e in range(self.numObjects): numValues = len(self.candidates[e]) if self.skip_single_cand and self.single_cand[e]: continue for i, (s, vs) in enumerate(claimsByObjI[e]): for b in range(numStates): quality[s][b] += estepPrams[e][i][b] for s in range(numSources): normalize1d(quality[s])
def initializeConfidences(self): for s in range(self.numSources): self.phis[s][0] = 0.5 self.phis[s][1] = 0.3 self.phis[s][2] = 0.2 #''' self.phis[s][0] = np.random.uniform(0.6, 0.7) self.phis[s][1] = np.random.uniform(0.2, 0.3) self.phis[s][2] = np.random.uniform(0.1, 0.2) #''' normalize1d(self.phis[s]) #print(self.phis) for w in range(self.numWorkers): self.psiw[w][0] = np.random.uniform(0.65, 0.75) self.psiw[w][1] = np.random.uniform(0.15, 0.25) self.psiw[w][2] = np.random.uniform(0.15, 0.25) normalize1d(self.psiw[w])
def regiWorkers(self, numNewWorkers): super(TDH_meta, self).regiWorkers(numNewWorkers) self.initPriors() maxs = -1 maxconf = 0.0 for s in range(self.numSources): if self.phis[s][0] > maxconf: maxconf = self.phis[s][0] maxs = s for w in range(numNewWorkers): pw = [0.0] * self.states_wrk pw[0] = np.random.uniform(0.7, 0.8) pw[1] = np.random.uniform(0.2, 0.3) pw[2] = np.random.uniform(0.1, 0.2) if self.states_wrk > 3: print("num state err") normalize1d(pw) self.psiw.append(pw)
def get_estimated_distribution_matrix(self, w): answered = set([answer[0] for answer in self.answersByWorkerI[w]]) estimated_distribution_list = [] for e in range(self.numObjects): if e in answered: continue estmated_distribution = np.zeros(len(self.mu[e])) numCand = len(estmated_distribution) if numCand == 1: continue #step 1 p_at_rev = np.zeros((numCand, numCand)) p_a = np.zeros((numCand)) for t in range(numCand): node_t = self.matching_nodes[e][t] if self.descendant_ancestor_relationship[e]: for a in range(numCand): if a == t: p_tmp = self.psiw[w][0] elif self.popWrk: node_a = self.matching_nodes[e][a] if node_a.isAncestorof(node_t): p_tmp = self.psiw[w][1] * self.pop_weight1[e][ t][a] else: p_tmp = self.psiw[w][2] * self.pop_weight2[e][ t][a] else: node_a = self.matching_nodes[e][a] if node_a.isAncestorof(node_t): p_tmp = self.psiw[w][1] / ( self.count_parents[e][t]) else: p_tmp = self.psiw[w][2] / ( self.count_values[e] - self.count_parents[e][t] - 1) p_at_rev[t][a] = p_tmp else: for a in range(numCand): if a == t: p_tmp = self.psiw[w][0] + self.psiw[w][1] elif self.popWrk: p_tmp = self.psiw[w][2] * self.pop_weight2[e][t][a] else: p_tmp = self.psiw[w][2] / ( self.count_values[e] - self.count_parents[e][t] - 1) p_at_rev[t][a] = p_tmp normalize1d(p_at_rev[t]) for a in range(numCand): p_a[a] += p_at_rev[t][a] * self.mu[e][t] if sum(p_a) > 1.01 or sum(p_a) < 0.99: print("sum") li = np.random.choice(range(numCand), p=p_a) #step 2 for t in range(numCand): estmated_distribution[t] = self.mu[e][t] * p_at_rev[li][t] normalize1d(estmated_distribution) estimated_distribution_list.append((e, estmated_distribution)) return estimated_distribution_list