Пример #1
0
def regex_parse(r, config: "SessionConfig", op: ExtendedOp) -> SpacyPattern:
    if op.ignore_case(config):
        d = {"LOWER": {"REGEX": r.lower()}}
    else:
        d = {"TEXT": {"REGEX": r}}

    if not op.empty():
        d["OP"] = op.value
    yield d
Пример #2
0
def any_of_parse(lst, config: "SessionConfig", op: ExtendedOp) -> SpacyPattern:
    if op.ignore_case(config):
        normalized = sorted([item.lower() for item in lst])
        base = {"LOWER": {"REGEX": r"^({0})$".format("|".join(normalized))}}
    else:
        base = {"TEXT": {"REGEX": r"^({0})$".format("|".join(sorted(lst)))}}

    if not op.empty():
        base["OP"] = op.value
    yield base
Пример #3
0
def generic_parse(tag, value, config: "SessionConfig",
                  op: ExtendedOp) -> SpacyPattern:
    d = {}
    if tag == "ORTH" and op.ignore_case(config):
        d["LOWER"] = value.lower()
    else:
        d[tag] = value

    if not op.empty():
        d["OP"] = op.value
    yield d
Пример #4
0
def tag_parse(values, config: "SessionConfig", op: ExtendedOp) -> SpacyPattern:
    """
    For generating POS/TAG patterns based on a Regex
    e.g. TAG("^NN|^JJ") for adjectives or nouns
    also deals with TAG_WORD for tag and word or tag and list
    """
    d = {"TAG": {"REGEX": values["tag"]}}
    if "word" in values:
        if op.ignore_case(config):
            d["LOWER"] = values["word"].lower()
        else:
            d["TEXT"] = values["word"]
    elif "list" in values:
        lst = values["list"]
        if op.ignore_case(config):
            normalized = sorted([item.lower() for item in lst])
            d["LOWER"] = {"REGEX": r"^({0})$".format("|".join(normalized))}
        else:
            d["TEXT"] = {"REGEX": r"^({0})$".format("|".join(sorted(lst)))}
    if not op.empty():
        d["OP"] = op.value
    yield d