def test_rule(self): in_samples = [ clean_sexp("""< (?* [副詞 * * * * ((数量相対名詞修飾))] ) >"""), clean_sexp("""< (?*) ((ダミー1 ^ダミー2)) >"""), clean_sexp("""< (?* [* * * * * ((付属))]) >"""), clean_sexp("""< (?* [* * * * * ((付属))]*) >"""), ] gold_fc_strs = [ "None", """FC<SI=['ダミー1'], SNI=['ダミー2']>""", "None", "None", ] gold_nums = [ 2, 1, 2, 2, ] for intext, gold_fc_str, gold_num in zip(in_samples, gold_fc_strs, gold_nums): p = sexpdata.loads(intext) tr = TagRule(p) self.assertEqual(gold_fc_str, str(tr.feature_constraints)) self.assertEqual(gold_num, len(tr.rules))
def test_rules(self): tags_rule_text = clean_sexp( """( ?* ^< (?*) ((ダミーA)) > * < (?*) ((^ダミーB)) >? ?* )""") p = sexpdata.loads(tags_rule_text) for order in [True, False]: tagr = TagsRule(p, order) self.assertEqual(True, len(str(tagr)) > 0)
def test_any(self): sample = clean_sexp(""" < (?* [副詞 * * * * ((数量相対名詞修飾))] ) > """) in_parts = [""" ( ?* ) """, sample] golds = [True, False] for intext, gold in zip(in_parts, golds): p = sexpdata.loads(intext) self.assertEqual(gold, TagRule.is_any(p))
def __init__(self, text: str): self.text = text _s = sexpdata.loads(clean_sexp(text)) assert len(_s) >= 4 self._center_len = 1 self.prev_constraints: Optional[Any] = None if not self.RuleClass.is_any(_s[0]): self.prev_constraints = self.SeqRuleClass(_s[0], False) self.next_constraints: Optional[Any] = None if not self.RuleClass.is_any(_s[2]): self.next_constraints = self.SeqRuleClass(_s[2], True) self.constraints: Optional[Any] = None if not self.RuleClass.is_any(_s[1]): self.constraints = self.SeqRuleClass(_s[1], True) self._center_len = len(self.constraints) self.operations: List[Tuple[bool, str]] = [] for v in _s[3:]: feature = sexpdata.dumps(v) remove = False if feature.startswith("^"): remove = True feature = feature[1:] self.operations.append((remove, feature))
def test_match(self): rule_text = clean_sexp("""< (?* [判定詞] ) ((ダミー1 ^ダミー2)) >""") tr = TagRule(sexpdata.loads(rule_text)) mytest = "無停止だ" mlist: MList = self.jumanpp.get(mytest) tags = Tags(mlist, [0]) tags[0].fs.add("ダミー0") self.assertEqual(False, tr.match(tags, 0)) tags[0].fs.add("ダミー1") self.assertEqual(True, tr.match(tags, 0)) rule_text2 = clean_sexp("""< (?* [接頭辞] ?*) >""") tr2 = TagRule(sexpdata.loads(rule_text2)) self.assertEqual(True, tr2.match(tags, 0)) rule_text4 = clean_sexp("""< (?* [接頭辞] ) ((ダミー9)) >""") tr4 = TagRule(sexpdata.loads(rule_text4)) self.assertEqual(False, tr4.match(tags, 0)) rule_text3 = clean_sexp("""< (?* [非マッチ] ) >""") tr3 = TagRule(sexpdata.loads(rule_text3)) self.assertEqual(False, tr3.match(tags, 0))