Exemplo n.º 1
0
def load_stemming(stemming_file, strip_length=False):
    ruleset = StemmingRuleSet()

    with open(stemming_file) as f:
        stemming_dict = yaml.load(f)

    for key, rules in stemming_dict.items():

        while isinstance(rules, dict) and "ref" in rules:
            if rules["ref"] in stemming_dict:
                rules = stemming_dict[rules["ref"]]
            else:
                raise RefDoesNotExistException(
                    "ref to {} which doesn't exist".format(
                        rules["ref"]))

        for rule in rules:
            if strip_length:
                rule = do_strip_length(rule)
            if ";" in rule:
                rule, annotation = rule.split(";")
                ruleset.add(key, rule, {annotation})
            else:
                ruleset.add(key, rule)

    return ruleset
Exemplo n.º 2
0
 def setUp(self):
     lexicon = Lexicon()
     lexicon.add("FOO", "bar", "foo")
     rules = StemmingRuleSet()
     self.rule = rules.add("barista", "|o><|llow")
     self.inflexion = Inflexion()
     self.inflexion.add_lexicon(lexicon)
     self.inflexion.add_stemming_rule_set(rules)
Exemplo n.º 3
0
 def setUp(self):
     lexicon = Lexicon()
     lexicon.add("FOO", "bar", "foo")
     rules = StemmingRuleSet()
     self.rule = rules.add("barista", "|o><|llow")
     self.inflexion = Inflexion()
     self.inflexion.add_lexicon(lexicon)
     self.inflexion.add_stemming_rule_set(rules)
Exemplo n.º 4
0
 def test_inflect_1(self):
     rules = StemmingRuleSet()
     r = rules.add("foo", "A|B>C<D|E")
     result = rules.inflect("FAB", "foo")
     self.assertEqual(list(result), [{
         "base": "FA",
         "ending": "CE",
         "rule": r,
         "used_default": False,
     }])
Exemplo n.º 5
0
 def test_inflect_1(self):
     rules = StemmingRuleSet()
     r = rules.add("foo", "A|B>C<D|E")
     result = rules.inflect("FAB", "foo")
     self.assertEqual(list(result), [{
         "base": "FA",
         "ending": "CE",
         "rule": r,
         "used_default": False,
     }])
Exemplo n.º 6
0
 def test_inflect_4(self):
     rules = StemmingRuleSet()
     rules.add("foo", "A|B>C<D|E")
     rules.add("foo", "A|B>C<D|F", {"+bar"})
     rules.add("foo", "A|B>C<D|G", {"-bar"})
     self.assertEqual(
         sorted([
             r["base"] + r["ending"] for r in rules.inflect("FAB", "foo")
         ]), ["FACE", "FACG"])
     self.assertEqual(
         sorted([
             r["base"] + r["ending"]
             for r in rules.inflect("FAB", "foo", {"bar"})
         ]), ["FACE", "FACF"])
Exemplo n.º 7
0
def load_stemming(stemming_file, strip_length=False):
    ruleset = StemmingRuleSet()

    with open(stemming_file) as f:
        stemming_dict = yaml.load(f)

    for key, rules in stemming_dict.items():

        while isinstance(rules, dict) and "ref" in rules:
            if rules["ref"] in stemming_dict:
                rules = stemming_dict[rules["ref"]]
            else:
                raise RefDoesNotExistException(
                    "ref to {} which doesn't exist".format(rules["ref"]))

        for rule in rules:
            if strip_length:
                rule = do_strip_length(rule)
            if ";" in rule:
                rule, annotation = rule.split(";")
                ruleset.add(key, rule, {annotation})
            else:
                ruleset.add(key, rule)

    return ruleset
Exemplo n.º 8
0
 def test_inflect_4(self):
     rules = StemmingRuleSet()
     rules.add("foo", "A|B>C<D|E")
     rules.add("foo", "A|B>C<D|F", {"+bar"})
     rules.add("foo", "A|B>C<D|G", {"-bar"})
     self.assertEqual(sorted([
         r["base"] + r["ending"]
         for r in rules.inflect("FAB", "foo")]),
         ["FACE", "FACG"])
     self.assertEqual(sorted([
         r["base"] + r["ending"]
         for r in rules.inflect("FAB", "foo", {"bar"})]),
         ["FACE", "FACF"])
Exemplo n.º 9
0
 def test_possible_stems_1(self):
     rules = StemmingRuleSet()
     rules.add("foo", "A|B>C<D|E")
     self.assertEqual(list(rules.possible_stems("FACE")), [("foo", "FAB")])
Exemplo n.º 10
0
 def test_possible_stems_1(self):
     rules = StemmingRuleSet()
     rules.add("foo", "A|B>C<D|E")
     self.assertEqual(list(rules.possible_stems("FACE")), [("foo", "FAB")])