Python ChunkRule примеры использования

Язык программирования: Python

Пространство имен/Пакет: nltk.chunk.regexp

Класс/Тип: ChunkRule

Примеров на hotexamples.com: 8

Python ChunkRule - 8 примеров найдено. Это лучшие примеры Python кода для nltk.chunk.regexp.ChunkRule, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ChunkRule(5)

apply(2)

Основные методы

ChunkRule (5)

apply (2)

Пример #1

Показать файл

    def __init__(self, lst_treetagger=None):
        """
        MWEChunker constructor
        :param lst_treetagger: list generated by tree-tagger POS tagging method 
        """
        self.MLE_THR = 0.05
        self._list_tt = lst_treetagger

        self._new_list_tt = []
        self._raw_mwes = []
        self._counter = {}

        self.DICE_THR = 0.065
        # Prepositional phrases
        self._pp_rule_set = [ChunkRule("<IN><NP>", "PrepPHR")]

        # Noun compounds
        # 2-gram rules
        self._nc_2gram_set = [
            ChunkRule("<NN><NN.?>", descr="(SUB(Plur)?)? (SUB(Plur)?)?"),
            ChunkRule("<JJ><NN.?>", descr="ADJ (SUB(Plur)?)?"),
            ChunkRule("<PPH><NN.?>", descr="AAN (SUB(Plur)?)?"),
            ChunkRule("<NN.?><JJ>", descr="(SUB(Plur)?)? ADJ"),
            ChunkRule("<NN.?><PPH>", descr="(SUB(Plur)?)? AAN")
        ]

        # n-gram rules
        self._nc_ngram_set = [
            ChunkRule("(<JJ.?>|<PPH>)+<NN><NN.?>?",
                      descr="(ADJ|PrepPHR)+ SUB SUB?"),
            ChunkRule("<NN><NN.?>(<JJ.?>|<PPH>)*",
                      descr="SUB SUB (ADJ|PrepPHR)*")
        ]

Пример #2

Показать файл

Файл: TMSplit.py Проект: MittagQI/nectm

    def __init__(self, w_pattern, w_split_left, w_split_right):

        self._pattern = ChunkRule(w_pattern,
                                  'chunk compose clause between conjunction')
        self._split = SplitRule(right_tag_pattern=w_split_right,
                                left_tag_pattern=w_split_left,
                                descr='split the subordinate clause')

Пример #3

Показать файл

# Loading Libraries
from nltk.chunk.regexp import ChunkString, ChunkRule, ChinkRule
from nltk.tree import Tree

# ChunkString() starts with the flat tree
tree = Tree('S', [('the', 'DT'), ('book', 'NN'), ('has', 'VBZ'),
                  ('many', 'JJ'), ('chapters', 'NNS')])

# Initializing ChunkString()
chunk_string = ChunkString(tree)
print("Chunk String : ", chunk_string)

# Initializing ChunkRule
chunk_rule = ChunkRule('<DT><NN.*><.*>*<NN.*>', 'chunk determiners and nouns')
chunk_rule.apply(chunk_string)
print("\nApplied ChunkRule : ", chunk_string)

# Another ChinkRule
ir = ChinkRule('<VB.*>', 'chink verbs')
ir.apply(chunk_string)
print("\nApplied ChinkRule : ", chunk_string, "\n")

# Back to chunk sub-tree
chunk_string.to_chunkstruct()

Пример #4

Показать файл

s = [('the', 'DT'), ('book', 'NN'), ('has', 'VBZ'), ('many', 'JJ'),
     ('chapters', 'NNS')]
# forth
chunker = RegexpParser(r'''
NP:
    {<DT><NN.*><.*>*<NN.*>}
    }<VB.*>{''')

print(chunker.parse(s))

# back
t = Tree('S', s)
cs = ChunkString(t)
print(cs)

ur = ChunkRule('<DT><NN.*><.*>*<NN.*>', 'chunk determiners and nouns')
ur.apply(cs)
print(cs)

ir = ChinkRule('<VB.*>', 'chink verbs')
ir.apply(cs)
print(cs)

print(cs.to_chunkstruct())
# cs.to_chunkstruct().draw()

chunker = RegexpChunkParser([ur, ir])
print(chunker.parse(t))

# set chunk name
chunker = RegexpChunkParser([ur, ir], chunk_label='CP')

Пример #5

Показать файл

Файл: Processing.py Проект: joshuacc1/rss_reader

    rssproc.summarizetexts('htmltext')

    # filcounts = filter(rssproc.filtercount,counts.values())
    # highestcount = [x for x in filcounts][-1]['count']
    # filcounts = filter(rssproc.filtercount, counts.values())
    # filweights = {}
    # for x in filcounts:
    #     item =x
    #     item['count'] = item['count']/highestcount
    #     filweights[x['word']] = item
    # print([x for x in filweights.values()])
    senttokens = rssproc.senttokenizedtext[1]['htmltext']

    s = 'there are 12 boxes in the closet'

    ur = ChunkRule('<CD>', 'single noun')
    el = ExpandLeftRule('<NNS>', '<CD>', 'get left determiner')
    er = ExpandRightRule('<CD>', '<NNS>', 'get right plural noun')
    un = UnChunkRule('<DT><NN.*>*', 'unchunk everything')

    chunker = RegexpChunkParser([ur, el, er])

    print(chunker.parse(pos_tag(word_tokenize(s))))

    d = []
    for sent in senttokens:
        tk = word_tokenize(sent)
        tkpos = pos_tag(tk)
        for x in tkpos:

            if 'CD' in x:

Пример #6

Показать файл

Файл: chunk_type_parser.py Проект: vm-iiit/Unsupervised-chunking-in-Indian-languages

# Loading Libraries
from nltk.chunk.regexp import ChunkString, ChunkRule, ChinkRule
from nltk.tree import Tree
from nltk.chunk import RegexpChunkParser

# ChunkString() starts with the flat tree
tree = Tree('S', [('the', 'DT'), ('book', 'NN'), ('has', 'VBZ'),
                  ('many', 'JJ'), ('chapters', 'NNS')])

# Initializing ChunkRule
chunk_rule = ChunkRule('<DT><NN.*><.*>*<NN.*>', 'chunk determiners and nouns')

# Another ChinkRule
chink_rule = ChinkRule('<VB.*>', 'chink verbs')

# Applying RegexpChunkParser
chunker = RegexpChunkParser([chunk_rule, chink_rule], chunk_label='CP')
chunker.parse(tree)

Пример #7

Показать файл

Файл: chunks.py Проект: anderscui/nlpy

s = [('the', 'DT'), ('book', 'NN'), ('has', 'VBZ'), ('many', 'JJ'), ('chapters', 'NNS')]
# forth
chunker = RegexpParser(r'''
NP:
    {<DT><NN.*><.*>*<NN.*>}
    }<VB.*>{'''
)

print(chunker.parse(s))

# back
t = Tree('S', s)
cs = ChunkString(t)
print(cs)

ur = ChunkRule('<DT><NN.*><.*>*<NN.*>', 'chunk determiners and nouns')
ur.apply(cs)
print(cs)

ir = ChinkRule('<VB.*>', 'chink verbs')
ir.apply(cs)
print(cs)

print(cs.to_chunkstruct())
# cs.to_chunkstruct().draw()

chunker = RegexpChunkParser([ur, ir])
print(chunker.parse(t))

# set chunk name
chunker = RegexpChunkParser([ur, ir], chunk_label='CP')

Пример #8

Показать файл

from nltk import word_tokenize, HunposTagger
from nltk.chunk.regexp import ChunkRule, RegexpChunkParser

from talkytalky.util.util import get_project_root

"""
Rules for making phrases from tokens already labelled with parts of speech
"""
# Grammar source: https://github.com/ICTRC/Parsivar/blob/master/parsivar/chunker.py
PARSIVAR_CHUNK_RULES = [
    ChunkRule('<ADJ_SIM><V_PRS>', 'VP'),
    ChunkRule('<ADJ_INO><V.*>', 'VP'),
    ChunkRule('<V_PRS><N_SING><V_SUB>', 'VP'),
    ChunkRule('<N_SING><ADJ.*><N_SING>', 'NP'),
    ChunkRule('<N.*><PRO>', 'NP'),
    ChunkRule('<N_SING><V_.*>', 'VP'),
    ChunkRule('<V.*>+', 'VP'),
    ChunkRule('<ADJ.*>?<N.*>+ <ADJ.*>?', 'NP'),
    ChunkRule('<DET><NP>', 'DNP'),
    ChunkRule('<ADJ_CMPR><P>', 'PP'),
    ChunkRule('<ADJ_SIM><P>', 'PP'),
    ChunkRule('<P><N_SING>', 'PP'),
    ChunkRule('<P>*', 'PP'),
    ChunkRule('<NP><DNP>', 'DDNP'),
    ChunkRule('<PP><NP>+', 'NPP')
]

# Grammar source: https://github.com/nicolashernandez/PyRATA/blob/master/do_benchmark.py
# Doesn't appear to work for clauses.
PYRATA_CHUNK_RULES = [
    ChunkRule('<DT|JJ|NN.*>+', 'NP'),