Python match示例，compattern.dependency.match Python示例

示例#1

0

显示文件

文件： fast_match.py 项目： fangzheng354/comparison-pattern

def process(fname, chunk, fmt='turboparser'):
    """Process a memmapped chunk of a large file.

    The comparison detection logic is called from here.

    Parameters
    ----------

    fname : string
        The path to the file to be opened.

    chunk : tuple (int, int)
        Beginning and ending offsets in the file.  The code essentially
        processes `f.seek(chunk[0]).read(chunk[1])`.

    fmt : ('turboparser'|'wacky')
        CONLL dependency format to use.

    Returns
    -------
    chunk_matches : list
        List of tuples (sentence, matches) where the second element is a list
        of (pattern_no, dict) containing the slots matched by the pattern.

    """

    global filemap, fileobj
    chunk_matches = []
    if filemap is None or fileobj.name != fname:
        fileobj = open(fname)
        filemap = mmap.mmap(fileobj.fileno(), os.path.getsize(fname),
                            access=mmap.ACCESS_READ)

    filemap.seek(chunk[0])
    lines = filemap.read(chunk[1]).splitlines()
    sents = get_sents_wacky(lines) if fmt == 'wacky' else get_sents(lines)
    for sent in sents:
        try:
            for s, root in read(sent + ["\n"], return_tree=True):
                matches = [(pat_no, m)
                           for pat_no, pat in enumerate(patterns)
                           for m in match(root, pat)]
                if matches:
                    matches = deduplicate(matches)
                    chunk_matches.append((str(s), matches))
        except ValueError:
            pass  # sentence without root
    return chunk_matches

示例#2

0

显示文件

文件： fast_match.py 项目： gabefair/comparison-pattern

def process(fname, chunk, fmt='turboparser'):
    """Process a memmapped chunk of a large file.

    The comparison detection logic is called from here.

    Parameters
    ----------

    fname : string
        The path to the file to be opened.

    chunk : tuple (int, int)
        Beginning and ending offsets in the file.  The code essentially
        processes `f.seek(chunk[0]).read(chunk[1])`.

    fmt : ('turboparser'|'wacky')
        CONLL dependency format to use.

    Returns
    -------
    chunk_matches : list
        List of tuples (sentence, matches) where the second element is a list
        of (pattern_no, dict) containing the slots matched by the pattern.

    """

    global filemap, fileobj
    chunk_matches = []
    if filemap is None or fileobj.name != fname:
        fileobj = open(fname)
        filemap = mmap.mmap(fileobj.fileno(),
                            os.path.getsize(fname),
                            access=mmap.ACCESS_READ)

    filemap.seek(chunk[0])
    lines = filemap.read(chunk[1]).splitlines()
    sents = get_sents_wacky(lines) if fmt == 'wacky' else get_sents(lines)
    for sent in sents:
        try:
            for s, root in read(sent + ["\n"], return_tree=True):
                matches = [(pat_no, m) for pat_no, pat in enumerate(patterns)
                           for m in match(root, pat)]
                if matches:
                    matches = deduplicate(matches)
                    chunk_matches.append((str(s), matches))
        except ValueError:
            pass  # sentence without root
    return chunk_matches

示例#3

0

显示文件

文件： simple_match.py 项目： gabefair/comparison-pattern

This script shows the simple way of using this package to extract
comparisons from a parsed English corpus.
For example, you can run it against the 'data/hanks_tp_lemma.conll' file
provided.

By default this prints the dependency root of each comparison slot (topic,
vehicle, etc) but the entire subtrees are extracted and available.
"""
from __future__ import print_function
import fileinput

from compattern.dependency import match
from compattern.dependency.seed_patterns import patterns


def _lemma_or_form(tok):
    return tok.form.lower() if tok.lemma == '_' else tok.lemma.lower()


if __name__ == '__main__':
    from compattern.dependency.conll import read

    sents = read(fileinput.input(), return_tree=True)
    for sent, root in sents:
        print(sent)
        for pat in patterns:
            for m in match(root, pat):
                print("\n".join("{}: {}".format(key, val.form)
                                for key, val in m.items()))
                print()

示例#4

0

显示文件

文件： minibnc.py 项目： hbuschme/comparison-pattern

                 'w',
                 encoding='utf-8')
    try:
        tree = GlarfTree.glarf_parse(gf, gt)
        args = [get_args(*node) for node in find_comparison_nodes(tree)]
        args = [
            arg_dict for arg_dict in args
            if arg_dict['C'].lower() == sys.argv[1] and 'V' in arg_dict
            and arg_dict['V'].strip() != ""
        ]
    except ValueError:
        args = []
        continue

    dep_args = [
        m for pat in patterns[:2] for m in match(dep, pat)
        if m['C'].form.lower() == sys.argv[1]
    ]

    if args:
        matches += 1
    if dep_args:
        dep_matches += 1

    print_to = [f]
    if args and not dep_args:
        print_to.append(only_glarf)
    elif dep_args and not args:
        print_to.append(only_dep)

    for dest in print_to:

示例#5

0

显示文件

def test_as():
    sent, root = read(example_as, return_tree=True)[0]
    matches = match(root, seed_patterns.as_1)
    assert_greater(len(matches), 0)

示例#6

0

显示文件

文件： simple_match.py 项目： fangzheng354/comparison-pattern

This script shows the simple way of using this package to extract
comparisons from a parsed English corpus.
For example, you can run it against the 'data/hanks_tp_lemma.conll' file
provided.

By default this prints the dependency root of each comparison slot (topic,
vehicle, etc) but the entire subtrees are extracted and available.
"""
from __future__ import print_function
import fileinput

from compattern.dependency import match
from compattern.dependency.seed_patterns import patterns


def _lemma_or_form(tok):
    return tok.form.lower() if tok.lemma == '_' else tok.lemma.lower()

if __name__ == '__main__':
    from compattern.dependency.conll import read

    sents = read(fileinput.input(), return_tree=True)
    for sent, root in sents:
        print(sent)
        for pat in patterns:
            for m in match(root, pat):
                print("\n".join("{}: {}".format(key, val.form)
                                for key, val in m.items()))
                print()

示例#7

0

显示文件

def test_like_t2():
    sent, root = read(example_like_t2, return_tree=True)[0]
    matches = match(root, seed_patterns.like_t2)
    assert_greater(len(matches), 0)

示例#8

0

显示文件

def test_than():
    sent, root = read(example_rbr, return_tree=True)[0]
    matches = match(root, seed_patterns.than_2)
    assert_greater(len(matches), 0)

示例#9

0

显示文件

def test_aussi_lemma():
    sent, root = read(ex_aussi, return_tree=True)[0]
    matches = match(root, aussi)
    assert_greater(len(matches), 0)

示例#10

0

显示文件

def test_like():
    sent, root = read(example_like, return_tree=True)[0]
    matches = match(root, seed_patterns.like)
    assert_greater(len(matches), 0)
    assert_in('T', list(matches[0].keys()))

示例#11

0

显示文件

文件： minibnc.py 项目： fangzheng354/comparison-pattern

        print '.'
        if f:
            f.close()
        f = open('bnc_similes/{}/{:03d}.txt'.format(sys.argv[1], ii / 20), 'w',
                 encoding='utf-8')
    try:
        tree = GlarfTree.glarf_parse(gf, gt)
        args = [get_args(*node) for node in find_comparison_nodes(tree)]
        args = [arg_dict for arg_dict in args
                if arg_dict['C'].lower() == sys.argv[1]
                and 'V' in arg_dict and arg_dict['V'].strip() != ""]
    except ValueError:
        args = []
        continue

    dep_args = [m for pat in patterns[:2] for m in match(dep, pat)
                if m['C'].form.lower() == sys.argv[1]]

    if args:
        matches += 1
    if dep_args:
        dep_matches += 1

    print_to = [f]
    if args and not dep_args:
        print_to.append(only_glarf)
    elif dep_args and not args:
        print_to.append(only_dep)

    for dest in print_to:
        print >> dest, sent

示例#12

0

显示文件

文件： test_dep_pattern.py 项目： fangzheng354/comparison-pattern

def test_as():
    sent, root = read(example_as, return_tree=True)[0]
    matches = match(root, seed_patterns.as_1)
    assert_greater(len(matches), 0)

示例#13

0

显示文件

文件： test_dep_pattern.py 项目： fangzheng354/comparison-pattern

def test_than():
    sent, root = read(example_rbr, return_tree=True)[0]
    matches = match(root, seed_patterns.than_2)
    assert_greater(len(matches), 0)

示例#14

0

显示文件

文件： test_dep_pattern.py 项目： fangzheng354/comparison-pattern

def test_like_t2():
    sent, root = read(example_like_t2, return_tree=True)[0]
    matches = match(root, seed_patterns.like_t2)
    assert_greater(len(matches), 0)

示例#15

0

显示文件

文件： test_dep_pattern.py 项目： fangzheng354/comparison-pattern

def test_like():
    sent, root = read(example_like, return_tree=True)[0]
    matches = match(root, seed_patterns.like)
    assert_greater(len(matches), 0)
    assert_in('T', matches[0].keys())