Python NltkTools.pos_tag примеры использования

Язык программирования: Python

Пространство имен/Пакет: langtools.nltk.nltktools

Класс/Тип: NltkTools

Метод/Функция: pos_tag

Примеров на hotexamples.com: 3

Python NltkTools.pos_tag - 3 примера найдено. Это лучшие примеры Python кода для langtools.nltk.nltktools.NltkTools.pos_tag, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

NltkTools(5)

pos_tag(2)

stem(2)

tag_raw(2)

filter_long_sentences(1)

sen_tokenize(1)

starts_with_upper(1)

word_tokenize(1)

Пример #1

Показать файл

Файл: tool_wrapper.py Проект: WajihCZ/hunmisc

class HunposPosTagger(PosTaggerWrapper):
    """
    Wraps NltkTools, which wraps HunPos as a POS tagger :).
    
    In order for NLTK to find the hunpos executable, the $HUNPOS environment
    variable must point to the directory with the hunpos-tag executable in it.

    The following parameters are used:
    - hunpos_model: the hunpos model file. Default is $HUNPOS/english.model;
    - hunpos_encoding: the encoding used by the hunpos model file. Default is
      iso-8859-1.
    """
    def __init__(self, params):
        self.nt = NltkTools(pos=True, pos_model=params['hunpos_model'])
        self.encoding = params.get('hunpos_encoding', 'iso-8859-1')

    def pos_tag(self, tokens):
        for sen_i, sen in enumerate(tokens):
            tagged_sen = self.nt.pos_tag([tok[0].encode(self.encoding) for tok in sen])
            for tok_i, tagged_tok in enumerate(tagged_sen):
                try:
                    tok, pos = [x.decode(self.encoding) for x in tagged_tok]
                except ValueError:
                    continue
                tokens[sen_i][tok_i].append(pos)

Пример #2

Показать файл

class HunposPosTagger(PosTaggerWrapper):
    """
    Wraps NltkTools, which wraps HunPos as a POS tagger :).
    
    In order for NLTK to find the hunpos executable, the $HUNPOS environment
    variable must point to the directory with the hunpos-tag executable in it.

    The following parameters are used:
    - hunpos_model: the hunpos model file. Default is $HUNPOS/english.model;
    - hunpos_encoding: the encoding used by the hunpos model file. Default is
      iso-8859-1.
    """
    def __init__(self, params):
        self.nt = NltkTools(pos=True, pos_model=params['hunpos_model'])
        self.encoding = params.get('hunpos_encoding', 'iso-8859-1')

    def pos_tag(self, tokens):
        for sen_i, sen in enumerate(tokens):
            tagged_sen = self.nt.pos_tag(
                [tok[0].encode(self.encoding) for tok in sen])
            for tok_i, tagged_tok in enumerate(tagged_sen):
                try:
                    tok, pos = [x.decode(self.encoding) for x in tagged_tok]
                except ValueError:
                    continue
                tokens[sen_i][tok_i].append(pos)

Пример #3

Показать файл

pageSep = "%%#PAGE"
actPage = None
starter = False
for line in sys.stdin:
    l = line.strip().decode("utf-8")
    if l.startswith(pageSep):
        if actPage is not None:
            print
        
        actPage = l.split(" ", 1)[1]
        starter = True
        print l.encode("utf-8").replace(" ", "\t", 1)
        print "%%#Field\tTitle"
        titleTokens = nt.word_tokenize(actPage)
        titleTokensWithPos = list(nt.pos_tag(titleTokens))
        stemmedTitleTokens = nt.stem(titleTokensWithPos)
        hardStemmedTitleTokens = list(nt.stem(((x[0][0].lower() + x[0][1:] if x[0][0].isupper() and x[0][1:].islower() else x[0]), x[1]) for x in titleTokensWithPos))
        for i, (tok, pos, stem) in enumerate(stemmedTitleTokens):
            print u"{0}\t{1}\t{2}\t{3}\t{4}\t{5}".format(tok, "word", "0", pos, stem, hardStemmedTitleTokens[i][2]).encode("utf-8")
        print
    elif starter and l.startswith("Templates:"):
        try:
            templates = l.split("\t", 1)[1]
            print u"%%#Templates\t{0}".format(templates).encode("utf-8")
        except IndexError:
            pass
    elif starter and l.startswith("REDIRECT"):
        print "%%#Redirect"
    else:
        if starter: