def canIrecurse(self, data, trueset): d = [(datum.word, datum.X, datum.Y) for datum in data] hyps = [self.value[w] for w in self.all_words()] try: grammar = hyps[0].grammar except: return True # Because if it doesn't have a grammar it's a force function counts, inx, _ = create_counts(grammar, hyps) counts = np.sum(counts['SET'], axis=0) relinx = [(k[2], inx[k]) for k in inx.keys() if k[1] == 'recurse_'] F1s = [] for wi, w in enumerate(self.all_words()): wd = [dp for dp in d if dp[0] == w] # Word Data pw = [dp for dp in trueset if dp[0] == w] # Proposed Word Data pId = [dp for dp in wd if dp in pw] # Proposed Word Data Observed precision = float(len(set(pId))) / float(len(pw) + 1e-6) recall = float(len(pId)) / float(len(wd) + 1e-6) f1 = (2.*precision*recall) / (precision + recall + 1e-6) i = [ri[1] for ri in relinx if ri[0] == q(w)] F1s.append((counts[i], w, f1, precision, recall)) if counts[i] >= 1 and f1 <= self.alpha * 2./ 3.: return False return True
def canIrecurse(self, data, trueset): d = [(datum.word, datum.X, datum.Y) for datum in data] hyps = [self.value[w] for w in self.all_words()] try: grammar = hyps[0].grammar except: return True # Because if it doesn't have a grammar it's a force function counts, inx, _ = create_counts(grammar, hyps) relinx = [(k[2], inx[k]) for k in inx.keys() if k[1] == 'recurse_'] if len(relinx) == 0: return True counts = np.sum(counts['SET'], axis=0) F1s = [] for wi, w in enumerate(self.all_words()): wd = [dp for dp in d if dp[0] == w] # Word Data pw = [dp for dp in trueset if dp[0] == w] # Proposed Word Data pId = [dp for dp in wd if dp in pw] # Proposed Word Data Observed precision = float(len(set(pId))) / float(len(pw) + 1e-6) recall = float(len(pId)) / float(len(wd) + 1e-6) f1 = (2. * precision * recall) / (precision + recall + 1e-6) i = [ri[1] for ri in relinx if ri[0] == q(w)] F1s.append((counts[i], w, f1, precision, recall)) if counts[i] >= 1 and f1 <= self.alpha * 2. / 3.: return False return True
def compute_reuse_prior(lex): counts = create_counts(grammar, [lex.value[w] for w in lex.all_words()])[0] prior = 0.0 for k in counts.keys(): c = np.sum(counts[k], axis=0) prior += multdir([c], np.ones(len(c)) / float(len(c)))[0] return prior
def compute_reuse_prior(lex): counts = create_counts(grammar, [lex.value[w] for w in lex.all_words()])[0] prior =0.0 for k in counts.keys(): c = np.sum(counts[k], axis=0) prior += multdir([c], np.ones(len(c))/float(len(c)))[0] return prior
print "# Loaded human data" from Model.Data import concept2data print "# Loaded concept2data" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get the rule count matrices # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Inference.GrammarInference.Precompute import create_counts from Model.Grammar import grammar trees = [h.value for h in hypotheses] nt2counts, sig2idx, prior_offset = create_counts(grammar, trees, log=None) print "# Computed counts for each hypothesis & nonterminal" # print counts # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Build up the info about the data # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concepts = concept2data.keys() NYes = [] NNo = [] # Make NYes and Nno
def do_I_reuse(lex): counts = create_counts(grammar, [lex.value[w] for w in lex.all_words()])[0] return bool([1 for x in counts.values() if np.any(x > 1)])