Пример #1
0
    def print_relation_stats(self, output_file):
        relation_counts = {}
        relation_count = 0
        relation_hist = {}

        for key in self.corpus_dict:
            lst = self.corpus_dict[key]
            length = len(lst)

            if length not in relation_hist:
                relation_hist[length] = 1
            else:
                relation_hist[length] += 1

            for relation in lst:
                relation_count += 1
                if not relation[0] in relation_counts:
                    relation_counts[relation[0]] = 1
                else:
                    relation_counts[relation[0]] += 1

        title = "Relation Distribution"
        tools.print_header(output_file, title)

        output_file.write("relation_counts: " + str(relation_counts) + " \n")
        output_file.write("relation_count: " + str(relation_count) + " \n")
        output_file.write("relation_hist: " + str(relation_hist) + " \n")

        tools.print_footer(output_file, title)
Пример #2
0
    def print_relation_stats(self, output_file):
        relation_counts = {}
        relation_count = 0
        relation_hist = {}

        for key in self.corpus_dict:
            lst = self.corpus_dict[key]
            length = len(lst)

            if length not in relation_hist:
                relation_hist[length] = 1
            else:
                relation_hist[length] += 1

            for relation in lst:
                relation_count += 1
                if not relation[0] in relation_counts:
                    relation_counts[relation[0]] = 1
                else:
                    relation_counts[relation[0]] += 1

        title = "Relation Distribution"
        tools.print_header(output_file, title)

        output_file.write("relation_counts: " + str(relation_counts) + " \n")
        output_file.write("relation_count: " + str(relation_count) + " \n")
        output_file.write("relation_hist: " + str(relation_hist) + " \n")

        tools.print_footer(output_file, title)
Пример #3
0
    def run(self):
        testNumber = 0

        correct_score = 0
        valid_score = 0
        exact_score = 0

        while testNumber < self.num_trials:
            words = self.rng.sample(self.corpus_dict,
                                    self.num_trials-testNumber)

            for word in words:
                testableLinks = [r for r in self.corpus_dict[word]
                                 if r[0] in self.relation_type_vectors]

                if len(testableLinks) > 0:
                    prompt = self.rng.sample(testableLinks, 1)[0]

                    tools.print_header(self.output_file, "New Jump Test")

                    answers = [r[1] for r in self.corpus_dict[word]
                               if r[0] == prompt[0]]
                    relation_vec = self.relation_type_vectors[prompt[0]]

                    result, correct, valid, exact = self.test_link(
                        relation_vec, None, word, prompt[1],
                        self.output_file,
                        num_relations=len(testableLinks),
                        answers=answers)

                    print >> self.output_file, "Correct goal? ", correct
                    print >> self.output_file, "Valid answers? ", valid
                    print >> self.output_file, "Exact goal? ", exact

                    testNumber += 1

                    if correct:
                        correct_score += 1
                    if valid:
                        valid_score += 1
                    if exact:
                        exact_score += 1

        # print the score
        title = "Jump Test Summary"
        tools.print_header(self.output_file, title)
        self.output_file.write("valid_score,"+str(valid_score)+":\n")
        self.output_file.write("totaltests,"+str(testNumber)+":\n")
        tools.print_footer(self.output_file, title)

        correct_score = float(correct_score) / float(testNumber)
        valid_score = float(valid_score) / float(testNumber)
        exact_score = float(exact_score) / float(testNumber)

        print "score,"+str(correct_score)

        self.add_data("jump_score_correct", correct_score)
        self.add_data("jump_score_valid", valid_score)
        self.add_data("jump_score_exact", exact_score)
Пример #4
0
    def print_config(self):
        title = "WordnetTest Config"
        tools.print_header(self.output_file, title)

        self.output_file.write("num_trials : " + str(self.num_trials) + "\n")
        self.output_file.write("test_threshold : " + str(self.test_threshold) +
                               "\n")
        self.corpus.print_config(self.output_file)
        self.extractor.print_config(self.output_file)

        self.output_file.write(self.__class__.__name__)
        self._print_config()

        tools.print_footer(self.output_file, title)
Пример #5
0
    def print_config(self):
        title = "WordnetTest Config"
        tools.print_header(self.output_file, title)

        self.output_file.write("num_trials : " + str(self.num_trials) + "\n")
        self.output_file.write("test_threshold : " +
                               str(self.test_threshold) + "\n")
        self.corpus.print_config(self.output_file)
        self.extractor.print_config(self.output_file)

        self.output_file.write(self.__class__.__name__)
        self._print_config()

        tools.print_footer(self.output_file, title)
Пример #6
0
    def run(self):
        self.dimension = len(self.id_vectors.values()[0])

        self.role_hrrs = self.create_role_hrrs()
        self.pos_map = self.create_pos_map()

        score = defaultdict(float)

        for i in range(self.num_trials):
            title = "New Sentence Test"
            if self.deep:
                title += "- Deep"

            tools.print_header(self.output_file, title)

            sentence = self.generate_sentence()

            if self.deep:
                embed = self.rng.sample(sentence.keys(), 1)[0]

                embedded_sentence = self.generate_sentence()

                del sentence[embed]

                for role in embedded_sentence.keys():
                    sentence[embed + role] = embedded_sentence[role]

            tag_vectors = {}
            sentence_hrr = HRR(data=np.zeros(self.dimension))

            # Pick role-fillers and create HRR representing the sentence
            # Also store the hrr to use as the query to extract each synset
            # included in the sentence.
            for role in sentence:
                tag_hrr = [self.role_hrrs[x] for x in role]
                tag_hrr = reduce(lambda x, y: x * y, tag_hrr)

                synset = sentence[role]

                sentence_hrr += tag_hrr * HRR(data=self.id_vectors[synset])

                tag_vectors[role] = tag_hrr.v

            sentence_hrr.normalize()

            sentence_vector = sentence_hrr.v

            print >> self.output_file, "Roles in sentence:"
            print >> self.output_file, sentence

            # ask about parts of the sentence
            sentence_score = defaultdict(float)
            sentence_length = defaultdict(float)
            for role in sentence.keys():

                answer = sentence[role]

                self.current_start_key = None
                self.current_target_keys = [answer]
                self.current_num_relations = len(sentence)

                print >> self.output_file, "\nTesting ", role

                result, correct, valid, exact = self.test_link(
                    tag_vectors[role],
                    sentence_vector,
                    None,
                    answer,
                    output_file=self.output_file,
                    return_vec=False,
                    num_relations=len(sentence),
                    answers=[answer])

                depth = len(role)
                if correct:
                    sentence_score[depth] += 1
                    print >> self.output_file, "Correct."
                else:
                    print >> self.output_file, "Incorrect."

                sentence_length[depth] += 1

                if self.short:
                    break

            for d in sentence_length:
                sentence_percent = sentence_score[d] / sentence_length[d]

                print >> self.output_file, \
                    "Percent correct for current sentence at depth %d: %f" \
                    % (d, sentence_percent)

                score[d] = score[d] + sentence_percent

        for d in score:
            print "Sentence test score at depth %d: %f out of %d" \
                % (d, score[d], self.num_trials)

            percent = score[d] / self.num_trials

            title = "Sentence Test Summary - Depth = %d" % d
            tools.print_header(self.output_file, title)
            print >> self.output_file, "Correct: ", score[d]
            print >> self.output_file, "Total: ", self.num_trials
            print >> self.output_file, "Percent: ", percent
            tools.print_footer(self.output_file, title)

            self.add_data("sentence_score_%d" % d, percent)
Пример #7
0
    def run(self):
        """Check whether word A is a type of word B. Test with n cases in
        which word A IS NOT a descendant of word B and m cases where word
        A IS a descendent of word B. The rtype parameter specifies which
        relationships to use in the search (by default, only the isA
        relationships)."""
        rtype = self.relation_types

        p = self.num_trials

        if self.do_neg:
            n = p
        else:
            n = 0

        p_count = 0
        n_count = 0

        p_score = 0
        n_score = 0

        negative_pairs = []
        positive_pairs = []

        # find positive and negative pairs
        while n_count < n:
            start = self.rng.sample(self.corpus_dict, 1)[0]
            target = self.rng.sample(self.corpus_dict, 1)[0]

            parent_list = self.findAllParents(start,
                                              None,
                                              rtype,
                                              False,
                                              print_output=False)

            pair = (start, target)
            if target in parent_list and p_count < p:
                positive_pairs.append(pair)
                p_count += 1
            elif not (target in parent_list):
                negative_pairs.append(pair)
                n_count += 1

        while p_count < p:
            start = self.rng.sample(self.corpus_dict, 1)[0]
            parent_list = self.findAllParents(start,
                                              None,
                                              rtype,
                                              False,
                                              print_output=False)

            if len(parent_list) == 0:
                continue

            target = self.rng.sample(parent_list, 1)[0]
            positive_pairs.append((start, target))
            p_count += 1

        # now run the tests
        title = "New Hierarchical Test - Negative"
        for pair in negative_pairs:
            tools.print_header(self.output_file, title)

            # do it symbolically first, for comparison
            self.findAllParents(pair[0],
                                pair[1],
                                rtype,
                                False,
                                print_output=True)

            result = self.findAllParents(pair[0],
                                         pair[1],
                                         rtype,
                                         True,
                                         print_output=True)

            if result == -1:
                n_score += 1

        title = "New Hierarchical Test - Positive"
        for pair in positive_pairs:
            tools.print_header(self.output_file, title)

            # do it symbolically first, for comparison
            self.findAllParents(pair[0],
                                pair[1],
                                rtype,
                                False,
                                print_output=True)

            result = self.findAllParents(pair[0],
                                         pair[1],
                                         rtype,
                                         True,
                                         print_output=True)

            if result > -1:
                p_score += 1

        # print the score
        title = "Hierarchical Test Summary"
        tools.print_header(self.output_file, title)
        self.output_file.write("Start trial:\n")
        self.output_file.write("FP," + str(n - n_score) + "\n")
        self.output_file.write("CR," + str(n_score) + "\n")
        self.output_file.write("hits," + str(p_score) + "\n")
        self.output_file.write("misses," + str(p - p_score) + "\n")
        self.output_file.write("TS," + str(n_score + p_score) + " out of " +
                               str(n + p) + "\n")
        self.output_file.write("NT," + str(n) + "\n")
        self.output_file.write("PT," + str(p) + "\n")
        tools.print_footer(self.output_file, title)

        print "Start trial:\n"
        print "FP," + str(n - n_score) + "\n"
        print "CR," + str(n_score) + "\n"
        print "hits," + str(p_score) + "\n"
        print "misses," + str(p - p_score) + "\n"
        print "TS," + str(n_score + p_score) + " out of " + str(n + p) + "\n"
        print "NT," + str(n) + "\n"
        print "PT," + str(p) + "\n"

        overall_score = float(n_score + p_score) / float(p + n)
        self.add_data("hierarchical_score", overall_score)

        return result
Пример #8
0
    def run(self):
        testNumber = 0

        correct_score = 0
        valid_score = 0
        exact_score = 0

        while testNumber < self.num_trials:
            words = self.rng.sample(self.corpus_dict,
                                    self.num_trials - testNumber)

            for word in words:
                testableLinks = [
                    r for r in self.corpus_dict[word]
                    if r[0] in self.relation_type_vectors
                ]

                if len(testableLinks) > 0:
                    prompt = self.rng.sample(testableLinks, 1)[0]

                    tools.print_header(self.output_file, "New Jump Test")

                    answers = [
                        r[1] for r in self.corpus_dict[word]
                        if r[0] == prompt[0]
                    ]
                    relation_vec = self.relation_type_vectors[prompt[0]]

                    result, correct, valid, exact = self.test_link(
                        relation_vec,
                        None,
                        word,
                        prompt[1],
                        self.output_file,
                        num_relations=len(testableLinks),
                        answers=answers)

                    print >> self.output_file, "Correct goal? ", correct
                    print >> self.output_file, "Valid answers? ", valid
                    print >> self.output_file, "Exact goal? ", exact

                    testNumber += 1

                    if correct:
                        correct_score += 1
                    if valid:
                        valid_score += 1
                    if exact:
                        exact_score += 1

        # print the score
        title = "Jump Test Summary"
        tools.print_header(self.output_file, title)
        self.output_file.write("valid_score," + str(valid_score) + ":\n")
        self.output_file.write("totaltests," + str(testNumber) + ":\n")
        tools.print_footer(self.output_file, title)

        correct_score = float(correct_score) / float(testNumber)
        valid_score = float(valid_score) / float(testNumber)
        exact_score = float(exact_score) / float(testNumber)

        print "score," + str(correct_score)

        self.add_data("jump_score_correct", correct_score)
        self.add_data("jump_score_valid", valid_score)
        self.add_data("jump_score_exact", exact_score)
Пример #9
0
    def run(self):
        self.dimension = len(self.id_vectors.values()[0])

        self.role_hrrs = self.create_role_hrrs()
        self.pos_map = self.create_pos_map()

        score = defaultdict(float)

        for i in range(self.num_trials):
            title = "New Sentence Test"
            if self.deep:
                title += "- Deep"

            tools.print_header(self.output_file, title)

            sentence = self.generate_sentence()

            if self.deep:
                embed = self.rng.sample(sentence.keys(), 1)[0]

                embedded_sentence = self.generate_sentence()

                del sentence[embed]

                for role in embedded_sentence.keys():
                    sentence[embed + role] = embedded_sentence[role]

            tag_vectors = {}
            sentence_hrr = HRR(data=np.zeros(self.dimension))

            # Pick role-fillers and create HRR representing the sentence
            # Also store the hrr to use as the query to extract each synset
            # included in the sentence.
            for role in sentence:
                tag_hrr = [self.role_hrrs[x] for x in role]
                tag_hrr = reduce(lambda x, y: x * y, tag_hrr)

                synset = sentence[role]

                sentence_hrr += tag_hrr * HRR(data=self.id_vectors[synset])

                tag_vectors[role] = tag_hrr.v

            sentence_hrr.normalize()

            sentence_vector = sentence_hrr.v

            print >> self.output_file, "Roles in sentence:"
            print >> self.output_file, sentence

            # ask about parts of the sentence
            sentence_score = defaultdict(float)
            sentence_length = defaultdict(float)
            for role in sentence.keys():

                answer = sentence[role]

                self.current_start_key = None
                self.current_target_keys = [answer]
                self.current_num_relations = len(sentence)

                print >> self.output_file, "\nTesting ", role

                result, correct, valid, exact = self.test_link(
                    tag_vectors[role], sentence_vector, None, answer,
                    output_file=self.output_file, return_vec=False,
                    num_relations=len(sentence), answers=[answer])

                depth = len(role)
                if correct:
                    sentence_score[depth] += 1
                    print >> self.output_file, "Correct."
                else:
                    print >> self.output_file, "Incorrect."

                sentence_length[depth] += 1

                if self.short:
                    break

            for d in sentence_length:
                sentence_percent = sentence_score[d] / sentence_length[d]

                print >> self.output_file, \
                    "Percent correct for current sentence at depth %d: %f" \
                    % (d, sentence_percent)

                score[d] = score[d] + sentence_percent

        for d in score:
            print "Sentence test score at depth %d: %f out of %d" \
                % (d, score[d], self.num_trials)

            percent = score[d] / self.num_trials

            title = "Sentence Test Summary - Depth = %d" % d
            tools.print_header(self.output_file, title)
            print >> self.output_file, "Correct: ", score[d]
            print >> self.output_file, "Total: ", self.num_trials
            print >> self.output_file, "Percent: ", percent
            tools.print_footer(self.output_file, title)

            self.add_data("sentence_score_%d" % d, percent)
Пример #10
0
    def run(self):
        """Check whether word A is a type of word B. Test with n cases in
        which word A IS NOT a descendant of word B and m cases where word
        A IS a descendent of word B. The rtype parameter specifies which
        relationships to use in the search (by default, only the isA
        relationships)."""
        rtype = self.relation_types

        p = self.num_trials

        if self.do_neg:
            n = p
        else:
            n = 0

        p_count = 0
        n_count = 0

        p_score = 0
        n_score = 0

        negative_pairs = []
        positive_pairs = []

        # find positive and negative pairs
        while n_count < n:
            start = self.rng.sample(self.corpus_dict, 1)[0]
            target = self.rng.sample(self.corpus_dict, 1)[0]

            parent_list = self.findAllParents(
                start, None, rtype, False, print_output=False)

            pair = (start, target)
            if target in parent_list and p_count < p:
                positive_pairs.append(pair)
                p_count += 1
            elif not (target in parent_list):
                negative_pairs.append(pair)
                n_count += 1

        while p_count < p:
            start = self.rng.sample(self.corpus_dict, 1)[0]
            parent_list = self.findAllParents(
                start, None, rtype, False, print_output=False)

            if len(parent_list) == 0:
                continue

            target = self.rng.sample(parent_list, 1)[0]
            positive_pairs.append((start, target))
            p_count += 1

        # now run the tests
        title = "New Hierarchical Test - Negative"
        for pair in negative_pairs:
            tools.print_header(self.output_file, title)

            # do it symbolically first, for comparison
            self.findAllParents(
                pair[0], pair[1], rtype, False, print_output=True)

            result = self.findAllParents(
                pair[0], pair[1], rtype, True, print_output=True)

            if result == -1:
                n_score += 1

        title = "New Hierarchical Test - Positive"
        for pair in positive_pairs:
            tools.print_header(self.output_file, title)

            # do it symbolically first, for comparison
            self.findAllParents(
                pair[0], pair[1], rtype, False, print_output=True)

            result = self.findAllParents(
                pair[0], pair[1], rtype, True, print_output=True)

            if result > -1:
                p_score += 1

        # print the score
        title = "Hierarchical Test Summary"
        tools.print_header(self.output_file, title)
        self.output_file.write("Start trial:\n")
        self.output_file.write("FP,"+str(n - n_score)+"\n")
        self.output_file.write("CR,"+str(n_score)+"\n")
        self.output_file.write("hits,"+str(p_score)+"\n")
        self.output_file.write("misses,"+str(p - p_score)+"\n")
        self.output_file.write(
            "TS,"+str(n_score + p_score)+" out of "+str(n+p)+"\n")
        self.output_file.write("NT,"+str(n)+"\n")
        self.output_file.write("PT,"+str(p)+"\n")
        tools.print_footer(self.output_file, title)

        print "Start trial:\n"
        print "FP,"+str(n-n_score)+"\n"
        print "CR,"+str(n_score)+"\n"
        print "hits,"+str(p_score)+"\n"
        print "misses,"+str(p-p_score)+"\n"
        print "TS,"+str(n_score+p_score)+" out of "+str(n+p)+"\n"
        print "NT,"+str(n)+"\n"
        print "PT,"+str(p)+"\n"

        overall_score = float(n_score + p_score) / float(p + n)
        self.add_data("hierarchical_score", overall_score)

        return result