Пример #1
0
    def get_interpretation(self, words):
        rulename = self.__class__.__name__
        print("Trying to match wit h regex: {}".format(rulename))

        match = refo.match(self.regex + Literal(_EOL), words + [_EOL])

        if not match:
            print("No match")
            return None, None

        try:
            match = Match(match, words)
            result = self.interpret(match)
        except BadSemantic as error:
            print(str(error))
            return None, None
        except AttributeError as error:
            print(str(error))
            return None, None
        try:
            expression, userdata = result
        except TypeError:
            expression, userdata = result, None
        expression.rule_used = rulename
        return expression, userdata
Пример #2
0
    def get_interpretation(self, words):
        rulename = self.__class__.__name__
        logger.debug("Trying to match with regex: {}".format(rulename))

        match = refo.match(self.regex + Literal(_EOL), words + [_EOL])
        if not match:
            logger.debug("No match")
            return None, None

        try:
            match = Match(match, words)
            result = self.interpret(match)
        except BadSemantic as error:
            logger.debug(str(error))
            return None, None
        try:
            expression, userdata = result
        except TypeError:
            expression, userdata = result, None

        #expression.rule_used = rulename
        for x in expression:
            x.rule_used = rulename

        return expression, userdata
Пример #3
0
def run_tests(rule_regexes, evidences, answers):
    predictions = []
    real_labels = []
    evidences_with_labels = []

    colorama_init()
    formatter = TerminalEvidenceFormatter()

    for name, regex, answer in rule_regexes:
        title = "Matches for rule '{}' (value: {})".format(name, answer)
        print("\n{}\n{}".format(title, "-" * len(title)))

        anything_matched = False
        for evidence in evidences:
            tokens_to_match = generate_tokens_to_match(evidence)
            match = refo.match(regex, tokens_to_match)

            if match:
                anything_matched = True
                print("  * {}".format(formatter.colored_text(evidence)))

            if evidence in answers and answers[evidence] is not None:
                evidences_with_labels.append(evidence)
                real_labels.append(answers[evidence])

                if match:
                    predictions.append(answer)
                else:
                    predictions.append(False)

        if not anything_matched:
            print("  nothing matched")

        print()

    if real_labels:
        results = result_dict_from_predictions(evidences_with_labels,
                                               real_labels, predictions)
        results.pop("end_time")
        keys = [
            "true_positives",
            "true_negatives",
            "false_positives",
            "false_negatives",
            "precision",
            "recall",
            "accuracy",
            "f1",
        ]

        title = "Metrics"
        print("{}\n{}".format(title, "-" * len(title)))
        for key in keys:
            print("{:>15}: {:.2f}".format(key, results[key]))
Пример #4
0
 def test_match2(self):
     # This regular expression is known to kill the python re module
     # because it exploits the fact that the implementation has exponential
     # worst case complexity.
     # Instead, this implementation has polinomial worst case complexity,
     # and therefore this test should finish in a reasonable time.
     N = 100
     a = refo.Literal("a")
     string = "a" * N
     regex = refo.Question(a) * N + a * N
     m = refo.match(regex, string)
     self.assertNotEqual(m, None)
Пример #5
0
 def test_match2(self):
     # This regular expression is known to kill the python re module
     # because it exploits the fact that the implementation has exponential
     # worst case complexity.
     # Instead, this implementation has polinomial worst case complexity,
     # and therefore this test should finish in a reasonable time.
     N = 100
     a = refo.Literal("a")
     string = "a" * N
     regex = refo.Question(a) * N + a * N
     m = refo.match(regex, string)
     self.assertNotEqual(m, None)
Пример #6
0
    def match(self, evidence):
        subject_kind = evidence.left_entity_occurrence.entity.kind.name
        object_kind = evidence.right_entity_occurrence.entity.kind.name
        Subject = refo.Plus(ConditionPredicate(is_subj=True, kinds__has=subject_kind))
        Object = refo.Plus(ConditionPredicate(is_obj=True, kinds__has=object_kind))
        tokens_to_match = list(self.generate_tokens_to_match(evidence)) + [_EOL]

        for rule in self.rules:
            regex = rule(Subject, Object) + refo.Literal(_EOL)

            match = refo.match(regex, tokens_to_match)
            if match:
                return rule.answer
Пример #7
0
 def test_match_path(self):
     seq = [[1, 2],     # x and y
            [1],        # x
            [1, 2, 3],  # x, y and z
            [1, 2],     # x and y
            [2, 3],     # y and z
            [0, 4, 5],
            []]
     regex = refo.Star(self.y) + refo.Plus(self.x + self.z)
     m = refo.match(regex, seq, keep_path=True)
     self.assertIsInstance(m, Match)
     path = m.get_path()
     self.assertEqual([4, 1, 9, 1, 9], path)
Пример #8
0
 def test_match_path(self):
     seq = [
         [1, 2],  # x and y
         [1],  # x
         [1, 2, 3],  # x, y and z
         [1, 2],  # x and y
         [2, 3],  # y and z
         [0, 4, 5],
         []
     ]
     regex = refo.Star(self.y) + refo.Plus(self.x + self.z)
     m = refo.match(regex, seq, keep_path=True)
     self.assertIsInstance(m, Match)
     path = m.get_path()
     self.assertEqual([4, 1, 9, 1, 9], path)
Пример #9
0
    def match(self, evidence):
        subject_kind = evidence.left_entity_occurrence.entity.kind.name
        object_kind = evidence.right_entity_occurrence.entity.kind.name
        Subject = refo.Plus(
            ConditionPredicate(is_subj=True, kinds__has=subject_kind))
        Object = refo.Plus(
            ConditionPredicate(is_obj=True, kinds__has=object_kind))
        tokens_to_match = list(
            self.generate_tokens_to_match(evidence)) + [_EOL]

        for rule in self.rules:
            regex = rule(Subject, Object) + refo.Literal(_EOL)

            match = refo.match(regex, tokens_to_match)
            if match:
                return rule.answer
def process_corpus(input_filename, output_filename):
    input_f = open(input_filename, 'r')
    original_corpus = pickle.load(input_f)
    input_f.close()

    for instance in original_corpus:
        words = instance['question']
        rules = []
        for regex in freebase_app.partial_rules:
            match = refo.match(regex + refo.Literal(_EOL), words + [_EOL])
            if match:
                rules.append(repr(regex))
        instance['question'] = (instance['question'], rules)

    output_f = open(output_filename, 'w')
    pickle.dump(original_corpus, output_f)
    output_f.close()
Пример #11
0
    def get_semantics(self, words):
        rulename = self.__class__.__name__
        logger.debug("Trying to match with regex: {}".format(rulename))

        match = refo.match(self.regex + Literal(_EOL), words + [_EOL])

        if not match:
            logger.debug("No match")
            return None, None

        try:
            match = Match(match, words)
            result = self.semantics(match)
        except BadSemantic as error:
            logger.debug(str(error))
            return None, None
        try:
            expression, userdata = result
        except TypeError:
            expression, userdata = result, None

        expression.rule_used = rulename
        return expression, userdata
Пример #12
0
 def match(self, evidence):
     tokens_to_match = generate_tokens_to_match(evidence)
     for regex, answer in self.rule_regexes:
         match = refo.match(regex, tokens_to_match)
         if match:
             return answer
Пример #13
0
import refo


def path_function(x):
    def f(xs):
        if x in xs:
            return x * x
        return None

    return f


x = refo.Predicate(path_function(1))
y = refo.Predicate(path_function(2))
z = refo.Predicate(path_function(3))

seq = [
    [1, 2],  # x and y
    [1],  # x
    [1, 2, 3],  # x, y and z
    [3],  # z
    [0, 4, 5],
    []
]

regex = refo.Star(y) + refo.Plus(x + z)
m = refo.match(regex, seq, keep_path=True)
print(m.get_path())
Пример #14
0
 def test_match1(self):
     regex = self.b + self.b + self.a + self.a + self.b
     strregex = re.compile("bbaab")
     m = refo.match(regex, self.seq)
     strm = strregex.match(self.string)
     self._eq_span_n_stuff(m, strm)
Пример #15
0
 def test_match1(self):
     regex = self.b + self.b + self.a + self.a + self.b
     strregex = re.compile("bbaab")
     m = refo.match(regex, self.seq)
     strm = strregex.match(self.string)
     self._eq_span_n_stuff(m, strm)
Пример #16
0

from refo import Literal, Question, match
import re
import time

# This regular expression is known to kill the python re module
# because it exploits the fact that the implementation has exponential
# worst case complexity.
# Instead, this implementation has polinomial worst case complexity,
# and therefore this test should finish in a reasonable time.

# You might want to try with N = 20, 30, 40, 100 to see what happens
N = 30

a = Literal("a")
string = "a" * N
regex = Question(a) * N + a * N
start = time.time()
m = match(regex, string)
end = time.time()
print "Refo finished in {0:.2} seconds".format(end - start)

regex = "(:?a?){{{0}}}a{{{0}}}".format(N)
print "Trying", regex
regex = re.compile(regex)
start = time.time()
regex.match(string)
end = time.time()
print "Python re finished in {0:.2} seconds".format(end - start)
Пример #17
0
import refo


def path_function(x):
    def f(xs):
        if x in xs:
            return x * x
        return None

    return f


x = refo.Predicate(path_function(1))
y = refo.Predicate(path_function(2))
z = refo.Predicate(path_function(3))

seq = [[1, 2], [1], [1, 2, 3], [3], [0, 4, 5], []]  # x and y  # x  # x, y and z  # z


regex = refo.Star(y) + refo.Plus(x + z)
m = refo.match(regex, seq, keep_path=True)
print m.get_path()
Пример #18
0
#!/usr/bin/python


from refo import Literal, Question, match
import re
import time

# This regular expression is known to kill the python re module
# because it exploits the fact that the implementation has exponential
# worst case complexity.
# Instead, this implementation has polinomial worst case complexity,
# and therefore this test should finish in a reasonable time.

# You might want to try with N = 20, 30, 40, 100 to see what happens
N = 25

a = Literal("a")
string = "a" * N
regex = Question(a) * N + a * N
start = time.time()
m = match(regex, string)
end = time.time()
print "Refo finished in {0:.2} seconds".format(end - start)

regex = "(:?a?){{{0}}}a{{{0}}}".format(N)
regex = re.compile(regex)
start = time.time()
regex.match(string)
end = time.time()
print "Python re finished in {0:.2} seconds".format(end - start)
Пример #19
0
 def inner(evidence):
     regex = compile_rule(rule_feature, relation)
     tokens_to_match = generate_tokens_to_match(evidence)
     return int(bool(refo.match(regex, tokens_to_match)))
Пример #20
0
 def inner(evidence):
     regex = compile_rule(rule_feature, relation)
     tokens_to_match = generate_tokens_to_match(evidence)
     return int(bool(refo.match(regex, tokens_to_match)))