def test_inner_node_child_categoryWithFeats(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'cat1', r'\P.P'), SemanticRule(r'NP/NP', r'\P.P'), SemanticRule(r'NP', r'\P Q.(Q -> P)', {'child1_category' : 'NP/NP'})] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> <token base="base2" pos="pos2" surf="surf2" id="t1_2"/> </tokens> <ccg root="sp1-3"> <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/> <span terminal="t1_2" category="NP/NP[mod=xx]" end="3" begin="2" id="sp1-2"/> <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-3"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_base2 -> _base1') self.assertEqual(expected_semantics, lexpr(semantics))
def test_match_any2(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'cat1', r'\P.P'), SemanticRule(r'cat2', r'\P.P'), SemanticRule(r'cat3', r'\P.P'), SemanticRule(r'NP', r'\P Q.(Q & P)', {'rule' : 'lex'}), SemanticRule(r'NP', r'\P Q.(Q | P)', {'child_any_pos' : 'pos1'}), SemanticRule(r'NP', r'\P Q.(Q -> P)', {'child_any_category' : 'cat3'})] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> <token base="base2" pos="pos2" surf="surf2" id="t1_2"/> <token base="base3" pos="pos3" surf="surf3" id="t1_3"/> </tokens> <ccg root="sp1-5"> <span terminal="t1_1" category="cat1" pos="pos1" end="2" begin="1" id="sp1-1"/> <span terminal="t1_2" category="cat2" pos="pos2" end="3" begin="2" id="sp1-2"/> <span terminal="t1_3" category="cat3" pos="pos3" end="4" begin="3" id="sp1-3"/> <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-4"/> <span child="sp1-4 sp1-3" rule="lex" category="NP" end="4" begin="1" id="sp1-5"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_base3 -> (_base2 | _base1)') self.assertEqual(expected_semantics, lexpr(semantics))
def test_CFG(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule' : '>'}), SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', {'rule' : '>'})] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index) semantics = lexpr(ccg_tree.get('sem', None)) expected_semantics = lexpr(r'(_base1 & _base2) -> (_base3 & _base4)') self.assertEqual(expected_semantics, semantics)
def test_RTG3Paths3Vars(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule' : '>'}), SemanticRule(r'NPNP', r'\F1 F2 F3.((F3 & F2) -> F1)', {'var_paths' : [[0,0], [0,1], [1,0]], 'rule' : '>'})] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index) semantics = lexpr(ccg_tree.get('sem', None)) expected_semantics = lexpr(r'((_base3 & _base2) -> _base1)') self.assertEqual(expected_semantics, semantics)
def test_RTG1Path(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule' : '>'}), SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', {'var_paths' : [[0,1]], 'rule' : '>'})] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index) semantics = lexpr(ccg_tree.get('sem', None)) expected_semantics = lexpr(r'\F2.(_base2 -> F2)') self.assertEqual(expected_semantics, semantics)
def test_predicate2_argument1_and_2Exprs2(self): exprs = [lexpr('language(Python, Scala)'), lexpr('nice(Python)')] dynamic_library = build_dynamic_library(exprs) expected_dynamic_library = \ ['Parameter nice : Entity -> Prop.', 'Parameter Python : Entity.', 'Parameter Scala : Entity.', 'Parameter language : Entity -> Entity -> Prop.'] for item in dynamic_library: self.assertIn(item, expected_dynamic_library) self.assertEqual(len(expected_dynamic_library), len(dynamic_library))
def test_pred2_prop_prop(self): exprs = [lexpr('nice(language(Python, Scala))'), lexpr('fun(language(Python, Scala))')] dynamic_library = build_dynamic_library(exprs) expected_dynamic_library = \ ['Parameter nice : Prop -> Prop.', 'Parameter fun : Prop -> Prop.', 'Parameter Python : Entity.', 'Parameter Scala : Entity.', 'Parameter language : Entity -> Entity -> Prop.'] for item in dynamic_library: self.assertIn(item, expected_dynamic_library) self.assertEqual(len(expected_dynamic_library), len(dynamic_library))
def get_semantic_representation(self, ccg_tree, tokens): rule_pattern = make_rule_pattern_from_ccg_node(ccg_tree, tokens) # Obtain the semantic template. relevant_rules = self.get_relevant_rules(rule_pattern) if not relevant_rules and len(ccg_tree) == 2: return None elif not relevant_rules: semantic_template = build_default_template(rule_pattern, ccg_tree) semantic_rule = None else: semantic_rule = relevant_rules.pop() semantic_template = semantic_rule.semantics # Apply template to relevant (current, child or children) CCG node(s). if len(ccg_tree) == 0: base = rule_pattern.attributes.get('base') surf = rule_pattern.attributes.get('surf') assert base and surf, 'The current CCG node should contain attributes ' \ + '"base" and "surf". CCG node: {0}\nrule_pattern attributes: {1}'\ .format(etree.tostring(ccg_tree, pretty_print=True), rule_pattern.attributes) predicate_string = base if base != '*' else surf predicate = lexpr(predicate_string) semantics = semantic_template(predicate).simplify() # Assign coq types. if semantic_rule != None and 'coq_type' in semantic_rule.attributes: coq_types = semantic_rule.attributes['coq_type'] ccg_tree.set('coq_type', 'Parameter {0} : {1}.'.format(predicate_string, coq_types)) else: ccg_tree.set('coq_type', "") elif len(ccg_tree) == 1: predicate = lexpr(ccg_tree[0].get('sem')) semantics = semantic_template(predicate).simplify() # Assign coq types. ccg_tree.set('coq_type', ccg_tree[0].attrib.get('coq_type', "")) else: var_paths = semantic_rule.attributes.get('var_paths', [[0], [1]]) semantics = semantic_template coq_types_list = [] for path in var_paths: child_node = get_node_at_path(ccg_tree, path) child_semantics = lexpr(child_node.get('sem')) semantics = semantics(child_semantics).simplify() child_coq_types = child_node.get('coq_type', None) if child_coq_types is not None and child_coq_types != "": coq_types_list.append(child_coq_types) if coq_types_list: ccg_tree.set('coq_type', ' ||| '.join(coq_types_list)) return semantics
def coq_string_expr(expression): if isinstance(expression, str): expression = lexpr(expression) expr_coq_str = '' if isinstance(expression, ApplicationExpression): expr_coq_str = coq_string_application_expr(expression) elif isinstance(expression, AbstractVariableExpression): expr_coq_str = coq_string_abstract_variable_expr(expression) elif isinstance(expression, LambdaExpression): expr_coq_str = coq_string_lambda_expr(expression) elif isinstance(expression, QuantifiedExpression): expr_coq_str = coq_string_quantified_expr(expression) elif isinstance(expression, AndExpression): expr_coq_str = coq_string_and_expr(expression) elif isinstance(expression, OrExpression): expr_coq_str = coq_string_or_expr(expression) elif isinstance(expression, NegatedExpression): expr_coq_str = coq_string_not_expr(expression) elif isinstance(expression, BinaryExpression): expr_coq_str = coq_string_binary_expr(expression) elif isinstance(expression, Variable): expr_coq_str = '%s' % expression else: expr_coq_str = str(expression) return expr_coq_str
def test_token_to_const_latin(self): sentence_str = r""" <sentence id="s0"> <tokens> <token base="*" pos="名詞-固有名詞-組織" surf="Scala" id="t0_0"/> </tokens> <ccg root="sp0-3"> <span terminal="t0_0" category="NP[mod=nm,case=nc]" end="1" begin="0" id="sp0-3"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_Scala') self.assertEqual(expected_semantics, lexpr(semantics))
def test_token_to_function_2args(self): sentence_str = r""" <sentence id="s0"> <tokens> <token base="は" pos="助詞-係助詞" surf="は" id="t0_1"/> </tokens> <ccg root="sp0-4"> <span terminal="t0_1" category="(S/S)\NP[mod=nm,case=nc]" end="2" begin="1" id="sp0-4"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'\x y._は(y, x)') self.assertEqual(expected_semantics, lexpr(semantics))
def test_token_to_function_1arg(self): sentence_str = r""" <sentence id="s0"> <tokens> <token base="です" katsuyou="基本形" pos="助動詞" surf="です" id="t0_4"/> </tokens> <ccg root="sp0-10"> <span terminal="t0_4" category="S[mod=nm,form=base]\NP[mod=nm,case=nc]" end="5" begin="4" id="sp0-10"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'\x._です(x)') self.assertEqual(expected_semantics, lexpr(semantics))
def test_token_to_const_japanese(self): sentence_str = r""" <sentence id="s0"> <tokens> <token base="言語" pos="名詞-一般" surf="言語" id="t0_3"/> </tokens> <ccg root="sp0-9"> <span terminal="t0_3" category="NP[mod=nm,case=nc]" end="4" begin="3" id="sp0-9"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_言語') self.assertEqual(expected_semantics, lexpr(semantics))
def test_typeraising_for_unary_pred(self): sentence_str = r""" <sentence id="s0"> <tokens> <token base="良い" katsuyou="基本形" pos="形容詞-自立" surf="良い" id="t0_2"/> </tokens> <ccg root="sp0-7"> <span child="sp0-8" rule="ADN" category="NP[case=nc]/NP[case=nc]" end="3" begin="2" id="sp0-7"/> <span terminal="t0_2" category="S[mod=adn,form=base]" end="3" begin="2" id="sp0-8"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'\P x.(P(x) & _良い(x))') self.assertEqual(expected_semantics, lexpr(semantics))
def test_Lambda1exists1(self): exprs = [lexpr('\P.exist x.P(x)')] dynamic_library = build_dynamic_library(exprs) expected_dynamic_library = \ ['Parameter P : Entity -> Prop.', 'Parameter x : Entity.'] for item in dynamic_library: self.assertIn(item, expected_dynamic_library) self.assertEqual(len(expected_dynamic_library), len(dynamic_library))
def test_func_combination_backward(self): sentence_str = r""" <sentence id="s1"> <tokens> <token base="簡潔" pos="名詞-形容動詞語幹" surf="簡潔" id="t1_3"/> <token base="です" katsuyou="基本形" pos="助動詞" surf="です" id="t1_4"/> </tokens> <ccg root="sp1-7"> <span child="sp1-8 sp1-9" rule="<B" category="S[mod=nm,form=base]\NP[mod=nm,case=ga]" end="5" begin="3" id="sp1-7"/> <span terminal="t1_3" category="S[mod=nm,form=da]\NP[mod=nm,case=ga]" end="4" begin="3" id="sp1-8"/> <span terminal="t1_4" category="S[mod=nm,form=base]\S[mod=nm,form=da]" end="5" begin="4" id="sp1-9"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'\x._です(_簡潔(x))') self.assertEqual(expected_semantics, lexpr(semantics))
def test_func_application_backward(self): sentence_str = r""" <sentence id="s0"> <tokens> <token base="*" pos="名詞-固有名詞-組織" surf="Scala" id="t0_0"/> <token base="は" pos="助詞-係助詞" surf="は" id="t0_1"/> </tokens> <ccg root="sp0-2"> <span child="sp0-3 sp0-4" rule="<" category="S/S" end="2" begin="0" id="sp0-2"/> <span terminal="t0_0" category="NP[mod=nm,case=nc]" end="1" begin="0" id="sp0-3"/> <span terminal="t0_1" category="(S/S)\NP[mod=nm,case=nc]" end="2" begin="1" id="sp0-4"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'\y._は(y, _Scala)') self.assertEqual(expected_semantics, lexpr(semantics))
def test_np_feature_no(self): semantic_index = SemanticIndex(None) semantic_index.rules = [SemanticRule(r'NP', r'\P.P')] sentence_str = r""" <sentence id="s0"> <tokens> <token base="basepred" pos="pos1" surf="surfpred" id="t0_0"/> </tokens> <ccg root="sp0-3"> <span terminal="t0_0" category="NP" end="1" begin="0" id="sp0-3"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_basepred') self.assertEqual(expected_semantics, lexpr(semantics))
def test_func_combination_backwardSimpleTwoArgs(self): sentence_str = r""" <sentence id="s1"> <tokens> <token base="F" pos="pos1" surf="F" id="t1_3"/> <token base="G" katsuyou="katsuyou2" pos="pos2" surf="G" id="t1_4"/> </tokens> <ccg root="sp1-7"> <span child="sp1-8 sp1-9" rule="<B2" category="S[mod=nm,form=base]\NP[mod=nm,case=ga]\NP" end="5" begin="3" id="sp1-7"/> <span terminal="t1_3" category="S[mod=nm,form=da]\NP[mod=nm,case=ga]\NP" end="4" begin="3" id="sp1-8"/> <span terminal="t1_4" category="S[mod=nm,form=base]\S[mod=nm,form=da]" end="5" begin="4" id="sp1-9"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'\y x._G(_F(x, y))') self.assertEqual(expected_semantics, lexpr(semantics))
def test_RTG3Paths2Vars(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule' : '>'}), SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', {'var_paths' : [[0,0], [0,1], [1,0]], 'rule' : '>'})] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index) with self.assertRaises(nltk.sem.logic.LogicalExpressionException): semantics = lexpr(ccg_tree.get('sem', None))
def test_func_application_forward(self): sentence_str = r""" <sentence id="s0"> <tokens> <token base="良い" katsuyou="基本形" pos="形容詞-自立" surf="良い" id="t0_2"/> <token base="言語" pos="名詞-一般" surf="言語" id="t0_3"/> </tokens> <ccg root="sp0-6"> <span child="sp0-7 sp0-9" rule=">" category="NP[mod=nm,case=nc]" end="4" begin="2" id="sp0-6"/> <span child="sp0-8" rule="ADN" category="NP[case=nc]/NP[case=nc]" end="3" begin="2" id="sp0-7"/> <span terminal="t0_2" category="S[mod=adn,form=base]" end="3" begin="2" id="sp0-8"/> <span terminal="t0_3" category="NP[mod=nm,case=nc]" end="4" begin="3" id="sp0-9"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'\x.(_言語(x) & _良い(x))') self.assertEqual(expected_semantics, lexpr(semantics))
def type_raise(function, order = 1): """ Produce a higher order function based on "function". The argument "order" indicates the number of desired arguments of the new function. """ assert order >= 0, 'The order of the type-raising should be >= 0' if isinstance(function, ConstantExpression): type_raiser = lexpr(r'\P X.P(X)') type_raised_function = type_raiser(function).simplify() else: if order == 1: type_raiser = lexpr(r'\P0 P1 X0.P0(P1(X0))') elif order == 2: type_raiser = lexpr(r'\P0 P1 X0 X1.P0(P1(X0, X1))') elif order == 3: type_raiser = lexpr(r'\P0 P1 X0 X1 X2.P0(P1(X0, X1, X2))') else: assert False, 'Type-raising at order > 3 is not supported' type_raised_function = type_raiser(function).simplify() return type_raised_function
def test_Lambda3exists2(self): exprs = [lexpr('\P y.\T.exist x.exists z.T(P(x, y), z)')] dynamic_library = build_dynamic_library(exprs) expected_dynamic_library = \ ['Parameter P : Entity -> Entity -> Prop.', 'Parameter T : Prop -> Entity -> Prop.', 'Parameter x : Entity.', 'Parameter y : Entity.', 'Parameter z : Entity.'] for item in dynamic_library: self.assertIn(item, expected_dynamic_library) self.assertEqual(len(expected_dynamic_library), len(dynamic_library))
def test_lexical_unary(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'N', r'\P.P'), SemanticRule(r'NP', r'\P.(P -> P)', {'rule' : 'lex'})] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> </tokens> <ccg root="sp1-2"> <span terminal="t1_1" category="N" end="2" begin="1" id="sp1-1"/> <span child="sp1-1" rule="lex" category="NP" end="2" begin="1" id="sp1-2"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_base1 -> _base1') self.assertEqual(expected_semantics, lexpr(semantics))
def combine_children_exprs(ccg_tree, tokens, semantic_index): """ Perform forward/backward function application/combination. """ assert len(ccg_tree) >= 2, \ 'There should be at least two children to combine expressions: {0}'\ .format(ccg_tree) # Assign coq types. coq_types_left = ccg_tree[0].attrib.get('coq_type', "") coq_types_right = ccg_tree[1].attrib.get('coq_type', "") if coq_types_left and coq_types_right: coq_types = coq_types_left + ' ||| ' + coq_types_right elif coq_types_left: coq_types = coq_types_left else: coq_types = coq_types_right ccg_tree.set('coq_type', coq_types) semantics = semantic_index.get_semantic_representation(ccg_tree, tokens) if semantics: ccg_tree.set('sem', str(semantics)) return None # Back-off mechanism in case no semantic templates are available: if is_forward_operation(ccg_tree): function_index, argument_index = 0, 1 else: function_index, argument_index = 1, 0 function = lexpr(ccg_tree[function_index].attrib['sem']) argument = lexpr(ccg_tree[argument_index].attrib['sem']) combination_operation = get_combination_op(ccg_tree) if combination_operation == 'function_application': evaluation = function(argument).simplify() elif combination_operation == 'function_combination': num_arguments = get_num_args(ccg_tree) type_raised_function = type_raise(function, num_arguments) evaluation = type_raised_function(argument).simplify() else: assert False, 'This node should be a function application or combination'\ .format(etree.tostring(ccg_tree, pretty_print=True)) ccg_tree.set('sem', str(evaluation)) return None
def test_np_feature_syntactic_featNoSubsume(self): semantic_index = SemanticIndex(None) semantic_index.rules = [ SemanticRule(r'NP[feat1=val1]', r'\P.(P | P)'), SemanticRule(r'NP[feat2=val1]', r'\P.(P & P)') ] sentence_str = r""" <sentence id="s0"> <tokens> <token base="basepred" pos="pos3" surf="surfpred" id="t0_0"/> </tokens> <ccg root="sp0-3"> <span terminal="t0_0" category="NP" end="1" begin="0" id="sp0-3"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_basepred') self.assertEqual(expected_semantics, lexpr(semantics))
def __init__(self, category, semantics, attributes={}): if not isinstance(category, Category): self.category = Category(category) else: self.category = category if semantics and not isinstance(semantics, Expression): self.semantics = lexpr(semantics) else: self.semantics = semantics self.attributes = copy.deepcopy(attributes) if 'surf' in self.attributes: self.attributes['surf'] = normalize_token(self.attributes['surf']) if 'base' in self.attributes: self.attributes['base'] = normalize_token(self.attributes['base'])
def build_default_template(rule_pattern, ccg_tree): category = rule_pattern.category if len(ccg_tree) == 0: num_arguments = category.get_num_args() elif len(ccg_tree) == 1: category2 = Category(ccg_tree.get('category')) num_arguments = category.get_num_args() - category2.get_num_args() variable_names = ['x' + str(i) for i in range(num_arguments)] if not variable_names: template_string = r'\P.P' else: template_string = r'\E O.O' template = lexpr(template_string) return template
def test_Lambda3exists2All1Mixed(self): exprs = [lexpr('\P y.\T.all w.exists z.T(exist x.P(x, y), z, w)')] dynamic_library, _ = build_dynamic_library(exprs) dynamic_library = nltk_sig_to_coq_lib(dynamic_library) expected_dynamic_library = \ ['Parameter P : Entity -> (Entity -> Prop).', 'Parameter T : Prop -> (Entity -> (Entity -> Prop)).', 'Parameter w : Entity.', 'Parameter x : Entity.', 'Parameter y : Entity.', 'Parameter z : Entity.'] for item in dynamic_library: self.assertIn(item, expected_dynamic_library) self.assertEqual(len(expected_dynamic_library), len(dynamic_library))
def __init__(self, category, semantics, attributes = {}): if not isinstance(category, Category): self.category = Category(category) else: self.category = category if semantics and not isinstance(semantics, Expression): self.semantics = lexpr(semantics) else: self.semantics = semantics self.attributes = copy.deepcopy(attributes) if 'surf' in self.attributes: self.attributes['surf'] = normalize_token(self.attributes['surf']) if 'base' in self.attributes: self.attributes['base'] = normalize_token(self.attributes['base'])
def test_func_application_backward(self): # 'は' has category (S/S)\NP[mod=nm,case=nc] which is not in the # unittest semantic templates. Thus, it is assigned the default # \E O.O and 'Scala' becomes the final meaning representation. sentence_str = r""" <sentence id="s0"> <tokens> <token base="*" pos="名詞-固有名詞-組織" surf="Scala" id="t0_0"/> <token base="は" pos="助詞-係助詞" surf="は" id="t0_1"/> </tokens> <ccg root="sp0-2"> <span child="sp0-3 sp0-4" rule="<" category="S/S" end="2" begin="0" id="sp0-2"/> <span terminal="t0_0" category="NP[mod=nm,case=nc]" end="1" begin="0" id="sp0-3"/> <span terminal="t0_1" category="(S/S)\NP[mod=nm,case=nc]" end="2" begin="1" id="sp0-4"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_Scala') self.assertEqual(expected_semantics, lexpr(semantics))
def test_RTG3Paths2Vars(self): semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule': '>'}), SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', { 'var_paths': [[0, 0], [0, 1], [1, 0]], 'rule': '>' }) ] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index) with self.assertRaises(nltk.sem.logic.LogicalExpressionException): semantics = lexpr(ccg_tree.get('sem', None))
def test_lexical_unary(self): semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'N', r'\P.P'), SemanticRule(r'NP', r'\P.(P -> P)', {'rule': 'lex'}) ] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> </tokens> <ccg root="sp1-2"> <span terminal="t1_1" category="N" end="2" begin="1" id="sp1-1"/> <span child="sp1-1" rule="lex" category="NP" end="2" begin="1" id="sp1-2"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_base1 -> _base1') self.assertEqual(expected_semantics, lexpr(semantics))
def test_func_combination_backwardComplexTwoArgs(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'S\NP\NP', r'\P y x e. P(e, x, y)'), SemanticRule(r'S\S', r'\P Q e. AND(past(e), Q(e))')] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token id="s1_4" surf="ほめ" pos="動詞" pos1="自立" pos2="*" pos3="*" inflectionType="一段" inflectionForm="連用形" base="ほめる" reading="ホメ"/> <token id="s1_5" surf="た" pos="助動詞" pos1="*" pos2="*" pos3="*" inflectionType="特殊・タ" inflectionForm="基本形" base="た" reading="タ"/> </tokens> <ccg root="s1_sp9"> <span id="s1_sp9" begin="4" end="6" category="(S[mod=nm,form=base]\NP[mod=nm,case=ga])\NP[mod=nm,case=o]" rule="<B2" child="s1_sp10 s1_sp11"/> <span id="s1_sp10" begin="4" end="5" category="(S[mod=nm,form=cont]\NP[mod=nm,case=ga])\NP[mod=nm,case=o]" terminal="s1_4"/> <span id="s1_sp11" begin="5" end="6" category="S[mod=nm,form=base]\S[mod=nm,form=cont]" terminal="s1_5"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'\y x e.AND(past(e), _ほめる(x, y, e))') self.assertEqual(expected_semantics, lexpr(semantics))
def build_default_template(rule_pattern, ccg_tree): category = rule_pattern.category if len(ccg_tree) == 0: num_arguments = category.get_num_args() elif len(ccg_tree) == 1: category2 = Category(ccg_tree.get('category')) num_arguments = category.get_num_args() - category2.get_num_args() variable_names = ['x' + str(i) for i in range(num_arguments)] if not variable_names: template_string = r'\P.P' else: template_string = r'\P ' + ' '.join(variable_names) \ + '.P(' + ', '.join(reversed(variable_names)) + ')' template = lexpr(template_string) return template
def load_semantic_rules(fn): semantic_rules = [] loaded = None with codecs.open(fn, 'r', 'utf-8') as infile: loaded = yaml.load(infile) if not loaded: raise ValueError("couldn't load file: " + fn) for attributes in loaded: # Compulsory fields. category = attributes['category'] semantics = lexpr(attributes['semantics']) del attributes['category'], attributes['semantics'] for attr_name, attr_val in attributes.items(): if attr_name.endswith('base') or attr_name.endswith('surf'): attributes[attr_name] = normalize_token(attr_val) new_semantic_rule = \ SemanticRule(category, semantics, attributes) semantic_rules.append(new_semantic_rule) return semantic_rules
def load_semantic_rules(fn): semantic_rules = [] loaded = None with codecs.open(fn, 'r', 'utf-8') as infile: loaded = yaml.load(infile, Loader=yaml.SafeLoader) if not loaded: raise ValueError("couldn't load file: " + fn) for attributes in loaded: # Compulsory fields. category = attributes['category'] semantics = lexpr(attributes['semantics']) del attributes['category'], attributes['semantics'] for attr_name, attr_val in attributes.items(): if attr_name.endswith('base') or attr_name.endswith('surf'): attributes[attr_name] = normalize_token(attr_val) new_semantic_rule = \ SemanticRule(category, semantics, attributes) semantic_rules.append(new_semantic_rule) return semantic_rules
def formula_to_tree(expr): if isinstance(expr, str): expr = lexpr(expr) expr_str = str(expr) G = nx.DiGraph() if isinstance(expr, ConstantExpression) or \ isinstance(expr, AbstractVariableExpression) or \ isinstance(expr, Variable): G.graph['head_node'] = next(node_id_gen) type_str = 'constant' if isinstance(expr, ConstantExpression) else 'variable' G.add_node(G.graph['head_node'], label=expr_str, type=type_str) elif isinstance(expr, BinaryExpression): G.graph['head_node'] = next(node_id_gen) G.add_node(G.graph['head_node'], label=expr.getOp(), type='op') graphs = map(formula_to_tree, [expr.first, expr.second]) G = merge_graphs_to(G, graphs) elif isinstance(expr, ApplicationExpression): func, args = expr.uncurry() G = formula_to_tree(func) args_graphs = map(formula_to_tree, args) G = merge_graphs_to(G, args_graphs) elif isinstance(expr, NegatedExpression): G.graph['head_node'] = next(node_id_gen) G.add_node(G.graph['head_node'], label='not', type='op') graphs = map(formula_to_tree, [expr.term]) G = merge_graphs_to(G, graphs) elif isinstance(expr, VariableBinderExpression): quant = '<quant_unk>' if isinstance(expr, QuantifiedExpression): quant = expr.getQuantifier() type = 'quantifier' elif isinstance(expr, LambdaExpression): quant = 'lambda' type = 'binder' G.graph['head_node'] = next(node_id_gen) G.add_node(G.graph['head_node'], label=quant, type=type) var_node_id = next(node_id_gen) G.add_node(var_node_id, label=str(expr.variable), type='variable') G.add_edge(G.graph['head_node'], var_node_id, type='var_bind') graphs = map(formula_to_tree, [expr.term]) G = merge_graphs_to(G, graphs) return G
def build_dynamic_library(exprs, coq_types = {}): """ Create a dynamic library with types of objects that appear in coq formulae. Optionally, it may receive partially specified signatures for objects using the format by NLTK (e.g. {'_john' : e, '_mary' : e, '_love' : <e,<e,t>>}). """ # If expressions are strings, convert them into logic formulae. exprs_logic = [] for expr in exprs: if isinstance(expr, str): exprs_logic.append(lexpr(expr)) else: exprs_logic.append(expr) signatures = [resolve_types(e) for e in exprs_logic] signature = combine_signatures(signatures) signature = remove_reserved_predicates(signature) dynamic_library = [] for predicate, pred_type in signature.items(): library_entry = build_library_entry(predicate, pred_type) dynamic_library.append(library_entry) return list(set(dynamic_library))
def test_Multipredicate_concat_yesPredFSymDash3(self): expr_str = str(lexpr(r'F(F(lithium,ion),F(ion,battery))')) concat_expr_str = resolve_prefix_to_infix_operations( expr_str, 'F', '-') expected_concat_expr_str = 'lithium-ion-ion-battery' self.assertEqual(expected_concat_expr_str, concat_expr_str)
def test_predicate_concat_yesPredFSymDash(self): expr_str = str(lexpr(r'F(lithium,ion)')) concat_expr_str = resolve_prefix_to_infix_operations( expr_str, 'F', '-') expected_concat_expr_str = 'lithium-ion' self.assertEqual(expected_concat_expr_str, concat_expr_str)
def test_predicate_concat_yes(self): expr_str = str(lexpr(r'R(lithium,ion)')) concat_expr_str = resolve_prefix_to_infix_operations(expr_str) expected_concat_expr_str = 'lithiumion' self.assertEqual(expected_concat_expr_str, concat_expr_str)
def test_entity_no_concat(self): expr_str = str(lexpr(r'ion')) concat_expr_str = resolve_prefix_to_infix_operations(expr_str) expected_concat_expr_str = 'ion' self.assertEqual(expected_concat_expr_str, concat_expr_str)
def test_disjunction_predicates2(self): nltk_expr = lexpr(r'(P | Q)') coq_expr = normalize_interpretation(nltk_expr) expected_coq_expr = '(or P Q)' self.assertEqual(expected_coq_expr, coq_expr)
def test_var(self): formula = lexpr(r'x') eG = nx.DiGraph() eG.add_nodes_from([(i, {'label': s}) for i, s in enumerate('x')]) G = formula_to_tree(formula) self.assert_graphs_are_equal(eG, G)
def test_quant_swap(self): formula1 = lexpr(r'forall x. exists y. P(x, y)') formula2 = lexpr(r'exists y. forall x. P(x, y)') graph1 = formula_to_graph(formula1, normalize=True) graph2 = formula_to_graph(formula2, normalize=True) self.assert_graphs_are_equal(graph1, graph2)
def test_universal_args2(self): nltk_expr = lexpr(r'all x y. P(x,y)') coq_expr = normalize_interpretation(nltk_expr) expected_coq_expr = '(forall x y, (P x y))' self.assertEqual(expected_coq_expr, coq_expr)
def test_tautology(self): nltk_expr = lexpr(r'all x y.TrueP') coq_expr = normalize_interpretation(nltk_expr) expected_coq_expr = '(forall x y, True)' self.assertEqual(expected_coq_expr, coq_expr)
def test_predicate1_arg(self): nltk_expr = lexpr(r'P(x)') coq_expr = normalize_interpretation(nltk_expr) expected_coq_expr = '(P x)' self.assertEqual(expected_coq_expr, coq_expr)
def test_predicate3_args1Pred(self): nltk_expr = lexpr(r'P(x,y,R(z))') coq_expr = normalize_interpretation(nltk_expr) expected_coq_expr = '(P x y (R z))' self.assertEqual(expected_coq_expr, coq_expr)
def test_negation_predicate(self): nltk_expr = lexpr(r'-(P)') coq_expr = normalize_interpretation(nltk_expr) expected_coq_expr = '(not P)' self.assertEqual(expected_coq_expr, coq_expr)
def test_Negationpredicate2_args(self): nltk_expr = lexpr(r'-(P(x,y))') coq_expr = normalize_interpretation(nltk_expr) expected_coq_expr = '(not (P x y))' self.assertEqual(expected_coq_expr, coq_expr)
def test_conjunction_predicate2_arg1(self): nltk_expr = lexpr(r'(P(x) & Q)') coq_expr = normalize_interpretation(nltk_expr) expected_coq_expr = '(and (P x) Q)' self.assertEqual(expected_coq_expr, coq_expr)
def test_Multipredicate_concat_yesPredComplexSymDash(self): expr_str = str(lexpr(r'O(C(lithium,ion),CONCAT(ion,battery))')) concat_expr_str = resolve_prefix_to_infix_operations( expr_str, 'CONCAT', '-') expected_concat_expr_str = 'O(C(lithium,ion),ion-battery)' self.assertEqual(expected_concat_expr_str, concat_expr_str)
def test_universal_arg1_proposition(self): nltk_expr = lexpr(r'all x. P') coq_expr = normalize_interpretation(nltk_expr) expected_coq_expr = '(forall x, P)' self.assertEqual(expected_coq_expr, coq_expr)
def test_existentialArgs2(self): nltk_expr = lexpr(r'exists x y. P(x,y)') coq_expr = normalize_interpretation(nltk_expr) expected_coq_expr = '(exists x y, (P x y))' self.assertEqual(expected_coq_expr, coq_expr)
def test_quant_inner(self): formula1 = lexpr(r'forall x. (P(x) | exists y. Q(x, y))') formula2 = lexpr(r'forall x. exists y. (P(x) | Q(x, y))') graph1 = formula_to_graph(formula1, normalize=True) graph2 = formula_to_graph(formula2, normalize=True) self.assert_graphs_are_equal(graph1, graph2)
def test_existentialArg1Proposition(self): nltk_expr = lexpr(r'exists x. P') coq_expr = normalize_interpretation(nltk_expr) expected_coq_expr = '(exists x, P)' self.assertEqual(expected_coq_expr, coq_expr)
def test_disjunction_predicate2_arg1and1(self): nltk_expr = lexpr(r'(P(x) | Q(y))') coq_expr = normalize_interpretation(nltk_expr) expected_coq_expr = '(or (P x) (Q y))' self.assertEqual(expected_coq_expr, coq_expr)
np.random.seed(seed=seed) from keras.models import Model from keras.layers.embeddings import Embedding from graph_emb import make_child_parent_branch logging.basicConfig(level=logging.DEBUG) formulas_str = [ 'exists x. pred1(x)', 'exists y. pred1(y)', 'exists y. all x. (pred1(y) & pred2(x, y))', 'exists y. all x. (pred1(y) & pred2(y, x))', 'exists y. all x. (pred2(y, x) & pred1(y))', 'exists y. all x. (pred2(y, x) & pred1(y))' ] formulas = [lexpr(f) for f in formulas_str] graph_data = GraphData.from_formulas(formulas, emb_dim=3) graph_data.make_matrices() max_nodes = graph_data.get_max_nodes() max_bi_relations = graph_data.get_max_bi_relations() max_tri_relations = graph_data.get_max_treelets() logging.debug('Embeddings shape: {0}'.format(graph_data.node_embs.shape)) token_emb = Embedding( input_dim=graph_data.node_embs.shape[0], output_dim=graph_data.node_embs.shape[1], weights=[graph_data.node_embs], mask_zero=False, # Reshape layer does not support masking. trainable=True, name='token_emb')