def print_relation_stats(self, output_file): relation_counts = {} relation_count = 0 relation_hist = {} for key in self.corpus_dict: lst = self.corpus_dict[key] length = len(lst) if length not in relation_hist: relation_hist[length] = 1 else: relation_hist[length] += 1 for relation in lst: relation_count += 1 if not relation[0] in relation_counts: relation_counts[relation[0]] = 1 else: relation_counts[relation[0]] += 1 title = "Relation Distribution" tools.print_header(output_file, title) output_file.write("relation_counts: " + str(relation_counts) + " \n") output_file.write("relation_count: " + str(relation_count) + " \n") output_file.write("relation_hist: " + str(relation_hist) + " \n") tools.print_footer(output_file, title)
def run(self): testNumber = 0 correct_score = 0 valid_score = 0 exact_score = 0 while testNumber < self.num_trials: words = self.rng.sample(self.corpus_dict, self.num_trials-testNumber) for word in words: testableLinks = [r for r in self.corpus_dict[word] if r[0] in self.relation_type_vectors] if len(testableLinks) > 0: prompt = self.rng.sample(testableLinks, 1)[0] tools.print_header(self.output_file, "New Jump Test") answers = [r[1] for r in self.corpus_dict[word] if r[0] == prompt[0]] relation_vec = self.relation_type_vectors[prompt[0]] result, correct, valid, exact = self.test_link( relation_vec, None, word, prompt[1], self.output_file, num_relations=len(testableLinks), answers=answers) print >> self.output_file, "Correct goal? ", correct print >> self.output_file, "Valid answers? ", valid print >> self.output_file, "Exact goal? ", exact testNumber += 1 if correct: correct_score += 1 if valid: valid_score += 1 if exact: exact_score += 1 # print the score title = "Jump Test Summary" tools.print_header(self.output_file, title) self.output_file.write("valid_score,"+str(valid_score)+":\n") self.output_file.write("totaltests,"+str(testNumber)+":\n") tools.print_footer(self.output_file, title) correct_score = float(correct_score) / float(testNumber) valid_score = float(valid_score) / float(testNumber) exact_score = float(exact_score) / float(testNumber) print "score,"+str(correct_score) self.add_data("jump_score_correct", correct_score) self.add_data("jump_score_valid", valid_score) self.add_data("jump_score_exact", exact_score)
def print_config(self): title = "WordnetTest Config" tools.print_header(self.output_file, title) self.output_file.write("num_trials : " + str(self.num_trials) + "\n") self.output_file.write("test_threshold : " + str(self.test_threshold) + "\n") self.corpus.print_config(self.output_file) self.extractor.print_config(self.output_file) self.output_file.write(self.__class__.__name__) self._print_config() tools.print_footer(self.output_file, title)
def run(self): self.dimension = len(self.id_vectors.values()[0]) self.role_hrrs = self.create_role_hrrs() self.pos_map = self.create_pos_map() score = defaultdict(float) for i in range(self.num_trials): title = "New Sentence Test" if self.deep: title += "- Deep" tools.print_header(self.output_file, title) sentence = self.generate_sentence() if self.deep: embed = self.rng.sample(sentence.keys(), 1)[0] embedded_sentence = self.generate_sentence() del sentence[embed] for role in embedded_sentence.keys(): sentence[embed + role] = embedded_sentence[role] tag_vectors = {} sentence_hrr = HRR(data=np.zeros(self.dimension)) # Pick role-fillers and create HRR representing the sentence # Also store the hrr to use as the query to extract each synset # included in the sentence. for role in sentence: tag_hrr = [self.role_hrrs[x] for x in role] tag_hrr = reduce(lambda x, y: x * y, tag_hrr) synset = sentence[role] sentence_hrr += tag_hrr * HRR(data=self.id_vectors[synset]) tag_vectors[role] = tag_hrr.v sentence_hrr.normalize() sentence_vector = sentence_hrr.v print >> self.output_file, "Roles in sentence:" print >> self.output_file, sentence # ask about parts of the sentence sentence_score = defaultdict(float) sentence_length = defaultdict(float) for role in sentence.keys(): answer = sentence[role] self.current_start_key = None self.current_target_keys = [answer] self.current_num_relations = len(sentence) print >> self.output_file, "\nTesting ", role result, correct, valid, exact = self.test_link( tag_vectors[role], sentence_vector, None, answer, output_file=self.output_file, return_vec=False, num_relations=len(sentence), answers=[answer]) depth = len(role) if correct: sentence_score[depth] += 1 print >> self.output_file, "Correct." else: print >> self.output_file, "Incorrect." sentence_length[depth] += 1 if self.short: break for d in sentence_length: sentence_percent = sentence_score[d] / sentence_length[d] print >> self.output_file, \ "Percent correct for current sentence at depth %d: %f" \ % (d, sentence_percent) score[d] = score[d] + sentence_percent for d in score: print "Sentence test score at depth %d: %f out of %d" \ % (d, score[d], self.num_trials) percent = score[d] / self.num_trials title = "Sentence Test Summary - Depth = %d" % d tools.print_header(self.output_file, title) print >> self.output_file, "Correct: ", score[d] print >> self.output_file, "Total: ", self.num_trials print >> self.output_file, "Percent: ", percent tools.print_footer(self.output_file, title) self.add_data("sentence_score_%d" % d, percent)
def run(self): """Check whether word A is a type of word B. Test with n cases in which word A IS NOT a descendant of word B and m cases where word A IS a descendent of word B. The rtype parameter specifies which relationships to use in the search (by default, only the isA relationships).""" rtype = self.relation_types p = self.num_trials if self.do_neg: n = p else: n = 0 p_count = 0 n_count = 0 p_score = 0 n_score = 0 negative_pairs = [] positive_pairs = [] # find positive and negative pairs while n_count < n: start = self.rng.sample(self.corpus_dict, 1)[0] target = self.rng.sample(self.corpus_dict, 1)[0] parent_list = self.findAllParents(start, None, rtype, False, print_output=False) pair = (start, target) if target in parent_list and p_count < p: positive_pairs.append(pair) p_count += 1 elif not (target in parent_list): negative_pairs.append(pair) n_count += 1 while p_count < p: start = self.rng.sample(self.corpus_dict, 1)[0] parent_list = self.findAllParents(start, None, rtype, False, print_output=False) if len(parent_list) == 0: continue target = self.rng.sample(parent_list, 1)[0] positive_pairs.append((start, target)) p_count += 1 # now run the tests title = "New Hierarchical Test - Negative" for pair in negative_pairs: tools.print_header(self.output_file, title) # do it symbolically first, for comparison self.findAllParents(pair[0], pair[1], rtype, False, print_output=True) result = self.findAllParents(pair[0], pair[1], rtype, True, print_output=True) if result == -1: n_score += 1 title = "New Hierarchical Test - Positive" for pair in positive_pairs: tools.print_header(self.output_file, title) # do it symbolically first, for comparison self.findAllParents(pair[0], pair[1], rtype, False, print_output=True) result = self.findAllParents(pair[0], pair[1], rtype, True, print_output=True) if result > -1: p_score += 1 # print the score title = "Hierarchical Test Summary" tools.print_header(self.output_file, title) self.output_file.write("Start trial:\n") self.output_file.write("FP," + str(n - n_score) + "\n") self.output_file.write("CR," + str(n_score) + "\n") self.output_file.write("hits," + str(p_score) + "\n") self.output_file.write("misses," + str(p - p_score) + "\n") self.output_file.write("TS," + str(n_score + p_score) + " out of " + str(n + p) + "\n") self.output_file.write("NT," + str(n) + "\n") self.output_file.write("PT," + str(p) + "\n") tools.print_footer(self.output_file, title) print "Start trial:\n" print "FP," + str(n - n_score) + "\n" print "CR," + str(n_score) + "\n" print "hits," + str(p_score) + "\n" print "misses," + str(p - p_score) + "\n" print "TS," + str(n_score + p_score) + " out of " + str(n + p) + "\n" print "NT," + str(n) + "\n" print "PT," + str(p) + "\n" overall_score = float(n_score + p_score) / float(p + n) self.add_data("hierarchical_score", overall_score) return result
def run(self): testNumber = 0 correct_score = 0 valid_score = 0 exact_score = 0 while testNumber < self.num_trials: words = self.rng.sample(self.corpus_dict, self.num_trials - testNumber) for word in words: testableLinks = [ r for r in self.corpus_dict[word] if r[0] in self.relation_type_vectors ] if len(testableLinks) > 0: prompt = self.rng.sample(testableLinks, 1)[0] tools.print_header(self.output_file, "New Jump Test") answers = [ r[1] for r in self.corpus_dict[word] if r[0] == prompt[0] ] relation_vec = self.relation_type_vectors[prompt[0]] result, correct, valid, exact = self.test_link( relation_vec, None, word, prompt[1], self.output_file, num_relations=len(testableLinks), answers=answers) print >> self.output_file, "Correct goal? ", correct print >> self.output_file, "Valid answers? ", valid print >> self.output_file, "Exact goal? ", exact testNumber += 1 if correct: correct_score += 1 if valid: valid_score += 1 if exact: exact_score += 1 # print the score title = "Jump Test Summary" tools.print_header(self.output_file, title) self.output_file.write("valid_score," + str(valid_score) + ":\n") self.output_file.write("totaltests," + str(testNumber) + ":\n") tools.print_footer(self.output_file, title) correct_score = float(correct_score) / float(testNumber) valid_score = float(valid_score) / float(testNumber) exact_score = float(exact_score) / float(testNumber) print "score," + str(correct_score) self.add_data("jump_score_correct", correct_score) self.add_data("jump_score_valid", valid_score) self.add_data("jump_score_exact", exact_score)
def run(self): """Check whether word A is a type of word B. Test with n cases in which word A IS NOT a descendant of word B and m cases where word A IS a descendent of word B. The rtype parameter specifies which relationships to use in the search (by default, only the isA relationships).""" rtype = self.relation_types p = self.num_trials if self.do_neg: n = p else: n = 0 p_count = 0 n_count = 0 p_score = 0 n_score = 0 negative_pairs = [] positive_pairs = [] # find positive and negative pairs while n_count < n: start = self.rng.sample(self.corpus_dict, 1)[0] target = self.rng.sample(self.corpus_dict, 1)[0] parent_list = self.findAllParents( start, None, rtype, False, print_output=False) pair = (start, target) if target in parent_list and p_count < p: positive_pairs.append(pair) p_count += 1 elif not (target in parent_list): negative_pairs.append(pair) n_count += 1 while p_count < p: start = self.rng.sample(self.corpus_dict, 1)[0] parent_list = self.findAllParents( start, None, rtype, False, print_output=False) if len(parent_list) == 0: continue target = self.rng.sample(parent_list, 1)[0] positive_pairs.append((start, target)) p_count += 1 # now run the tests title = "New Hierarchical Test - Negative" for pair in negative_pairs: tools.print_header(self.output_file, title) # do it symbolically first, for comparison self.findAllParents( pair[0], pair[1], rtype, False, print_output=True) result = self.findAllParents( pair[0], pair[1], rtype, True, print_output=True) if result == -1: n_score += 1 title = "New Hierarchical Test - Positive" for pair in positive_pairs: tools.print_header(self.output_file, title) # do it symbolically first, for comparison self.findAllParents( pair[0], pair[1], rtype, False, print_output=True) result = self.findAllParents( pair[0], pair[1], rtype, True, print_output=True) if result > -1: p_score += 1 # print the score title = "Hierarchical Test Summary" tools.print_header(self.output_file, title) self.output_file.write("Start trial:\n") self.output_file.write("FP,"+str(n - n_score)+"\n") self.output_file.write("CR,"+str(n_score)+"\n") self.output_file.write("hits,"+str(p_score)+"\n") self.output_file.write("misses,"+str(p - p_score)+"\n") self.output_file.write( "TS,"+str(n_score + p_score)+" out of "+str(n+p)+"\n") self.output_file.write("NT,"+str(n)+"\n") self.output_file.write("PT,"+str(p)+"\n") tools.print_footer(self.output_file, title) print "Start trial:\n" print "FP,"+str(n-n_score)+"\n" print "CR,"+str(n_score)+"\n" print "hits,"+str(p_score)+"\n" print "misses,"+str(p-p_score)+"\n" print "TS,"+str(n_score+p_score)+" out of "+str(n+p)+"\n" print "NT,"+str(n)+"\n" print "PT,"+str(p)+"\n" overall_score = float(n_score + p_score) / float(p + n) self.add_data("hierarchical_score", overall_score) return result