Python normalize_token示例，normalization.normalize_token Python示例

示例#1

0

显示文件

文件： semantic_rule.py 项目： izumi-h/ccgcomp

 def __init__(self, category, semantics, attributes={}):
     if not isinstance(category, Category):
         self.category = Category(category)
     else:
         self.category = category
     if semantics and not isinstance(semantics, Expression):
         self.semantics = lexpr(semantics)
     else:
         self.semantics = semantics
     self.attributes = copy.deepcopy(attributes)
     if 'surf' in self.attributes:
         self.attributes['surf'] = normalize_token(self.attributes['surf'])
     if 'base' in self.attributes:
         self.attributes['base'] = normalize_token(self.attributes['base'])

示例#2

0

显示文件

文件： semantic_rule.py 项目： mynlp/ccg2lambda

 def __init__(self, category, semantics, attributes = {}):
     if not isinstance(category, Category):
         self.category = Category(category)
     else:
         self.category = category
     if semantics and not isinstance(semantics, Expression):
         self.semantics = lexpr(semantics)
     else:
         self.semantics = semantics
     self.attributes = copy.deepcopy(attributes)
     if 'surf' in self.attributes:
       self.attributes['surf'] = normalize_token(self.attributes['surf'])
     if 'base' in self.attributes:
       self.attributes['base'] = normalize_token(self.attributes['base'])

示例#3

0

显示文件

文件： semantic_types.py 项目： yuchenlin/ccg2lambda

def merge_dynamic_libraries_(coq_lib, nltk_lib, coq_static_lib_path, doc):
    reserved_predicates = get_reserved_preds_from_coq_static_lib(
        coq_static_lib_path)
    # Get base forms, unless the base form is '*', in which case get surf form.
    base_forms = get_tokens_from_xml_node(doc)
    required_predicates = set(normalize_token(t) for t in base_forms)
    # required_predicates = set(normalize_token(t) for t in doc.xpath('//token/@base'))
    coq_lib_index = {coq_lib_entry.split()[1] : coq_lib_entry \
                       for coq_lib_entry in coq_lib}
    nltk_lib_index = {nltk_lib_entry.split()[1] : nltk_lib_entry \
                        for nltk_lib_entry in nltk_lib}
    result_lib = []
    for predicate in required_predicates:
        if predicate in reserved_predicates:
            continue
        coq_predicate_type = get_predicate_type_from_library(
            predicate, coq_lib_index)
        nltk_predicate_type = get_predicate_type_from_library(
            predicate, nltk_lib_index)
        if coq_predicate_type is not None:
            result_lib.append(coq_predicate_type)
        elif nltk_predicate_type is not None:
            result_lib.append(nltk_predicate_type)
    # Add possible renamed predicates for NLTK signature.
    for coq_style_entry in nltk_lib:
        if re.match(r'\S+_[0-9]', coq_style_entry.split()[1]):
            result_lib.append(coq_style_entry)
    result_lib = list(set(result_lib))
    return result_lib

示例#4

0

显示文件

文件： semantic_types.py 项目： yuchenlin/ccg2lambda

def merge_dynamic_libraries(sig_arbi, sig_auto, coq_static_lib_path, doc):
    reserved_predicates = get_reserved_preds_from_coq_static_lib(
        coq_static_lib_path)
    # Get base forms, unless the base form is '*', in which case get surf form.
    base_forms = get_tokens_from_xml_node(doc)
    required_predicates = set(normalize_token(t) for t in base_forms)
    sig_merged = sig_auto
    sig_merged.update(sig_arbi)  # overwrites automatically inferred types.
    # Remove predicates that are reserved or not required (e.g. variables).
    preds_to_remove = set()
    preds_to_remove.update(reserved_predicates)
    for pred in sig_merged:
        if pred not in required_predicates and not re.match(
                r'\S+_[0-9]', pred):
            preds_to_remove.add(pred)
    for pred in preds_to_remove:
        if pred in sig_merged:
            del sig_merged[pred]
    # Convert into coq style library entries.
    dynamic_library = []
    for predicate, pred_type in sig_merged.items():
        library_entry = build_library_entry(predicate, pred_type)
        dynamic_library.append(library_entry)
    result_lib = list(set(dynamic_library))
    return result_lib

示例#5

0

显示文件

文件： ccg2lambda_tools.py 项目： yuchenlin/ccg2lambda

def normalize_tokens(tokens):
    """
    In our format of XML trees, tokens have their own tree,
    which is separated from the syntactic structure. These
    tokens may need some processing for normalization, such
    as prefixing them with an underscore "_", or copying
    into their base form the surface form when the base form
    is absent (base="*"). For now, only the copy to base forms
    from surface forms is implemented:
    """
    for token in tokens:
        if token.get('base', None) == '*':
            token.set('base', token.get('surf', '*'))
        if 'base' in token.attrib and not token.get('base', '').startswith('_'):
            base = token.get('base', '')
            base_normalized = normalize_token(base)
            token.set('base', base_normalized)
        if 'surf' in token.attrib and not token.get('surf', '').startswith('_'):
            surf = token.get('surf', '')
            surf_normalized = normalize_token(surf)
            token.set('surf', surf_normalized)
    return tokens

示例#6

0

显示文件

文件： semantic_index.py 项目： mynlp/ccg2lambda

def load_semantic_rules(fn):
    semantic_rules = []
    loaded = None
    with codecs.open(fn, 'r', 'utf-8') as infile:
        loaded = yaml.load(infile)
    if not loaded: raise ValueError("couldn't load file: " + fn)

    for attributes in loaded:
        # Compulsory fields.
        category = attributes['category']
        semantics = lexpr(attributes['semantics'])
        del attributes['category'], attributes['semantics']
        for attr_name, attr_val in attributes.items():
          if attr_name.endswith('base') or attr_name.endswith('surf'):
            attributes[attr_name] = normalize_token(attr_val)
        new_semantic_rule = \
          SemanticRule(category, semantics, attributes)
        semantic_rules.append(new_semantic_rule)
    return semantic_rules

示例#7

0

显示文件

def load_semantic_rules(fn):
    semantic_rules = []
    loaded = None
    with codecs.open(fn, 'r', 'utf-8') as infile:
        loaded = yaml.load(infile, Loader=yaml.SafeLoader)
    if not loaded: raise ValueError("couldn't load file: " + fn)

    for attributes in loaded:
        # Compulsory fields.
        category = attributes['category']
        semantics = lexpr(attributes['semantics'])
        del attributes['category'], attributes['semantics']
        for attr_name, attr_val in attributes.items():
            if attr_name.endswith('base') or attr_name.endswith('surf'):
                attributes[attr_name] = normalize_token(attr_val)
        new_semantic_rule = \
          SemanticRule(category, semantics, attributes)
        semantic_rules.append(new_semantic_rule)
    return semantic_rules

示例#8

0

显示文件

文件： semantic_types.py 项目： mynlp/ccg2lambda

def merge_dynamic_libraries(coq_lib, nltk_lib, coq_static_lib_path, doc):
    reserved_predicates = get_reserved_preds_from_coq_static_lib(coq_static_lib_path)
    # Get base forms, unless the base form is '*', in which case get surf form.
    base_forms = doc.xpath("//token[not(@base='*')]/@base | //token[@base='*']/@surf")
    required_predicates = set(normalize_token(t) for t in base_forms)
    # required_predicates = set(normalize_token(t) for t in doc.xpath('//token/@base'))
    coq_lib_index = {coq_lib_entry.split()[1] : coq_lib_entry \
                       for coq_lib_entry in coq_lib}
    nltk_lib_index = {nltk_lib_entry.split()[1] : nltk_lib_entry \
                        for nltk_lib_entry in nltk_lib}
    result_lib = []
    for predicate in required_predicates:
        if predicate in reserved_predicates:
            continue
        coq_predicate_type = get_predicate_type_from_library(predicate, coq_lib_index)
        nltk_predicate_type = get_predicate_type_from_library(predicate, nltk_lib_index)
        if coq_predicate_type is not None:
            result_lib.append(coq_predicate_type)
        elif nltk_predicate_type is not None:
            result_lib.append(nltk_predicate_type)
    result_lib = list(set(result_lib))
    return result_lib