Python BerkeleyParser примеры использования

Язык программирования: Python

Пространство имен/Пакет: util.berkely

Класс/Тип: BerkeleyParser

Примеров на hotexamples.com: 5

Python BerkeleyParser - 5 примеров найдено. Это лучшие примеры Python кода для util.berkely.BerkeleyParser, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

BerkeleyParser(3)

parse(3)

Основные методы

BerkeleyParser (3)

parse (3)

Пример #1

Показать файл

Файл: segmenter.py Проект: NLP-Discourse-SoochowU/t2d_discourseparser

 def __init__(self, model):
     self._eos = ['！', '。', '？']
     self._pairs = {'“': "”", "「": "」"}
     self.model = model
     self.model.eval()
     self.parser = BerkeleyParser()
     self.dep_parser = LTPParser()

Пример #2

Показать файл

class RNNSegmenter(SegmenterI):
    def __init__(self, model):
        self._eos = ['！', '。', '？']
        self._pairs = {'“': "”", "「": "」"}
        self.model = model
        self.model.eval()
        self.parser = BerkeleyParser()

    def cut(self, text):
        sentences = self.cut_sent(text)
        for i, sent in enumerate(sentences):
            sentences[i] = Sentence(self.cut_edu(sent))
        return Paragraph(sentences)

    def cut_sent(self, text, sid=None):
        last_cut = 0
        sentences = []
        for i in range(0, len(text) - 1):
            if text[i] in self._eos:
                sentences.append(Sentence([TEXT(text[last_cut:i + 1])]))
                last_cut = i + 1
        if last_cut < len(text) - 1:
            sentences.append(Sentence([TEXT(text[last_cut:])]))
        return sentences

    def cut_edu(self, sent):
        if (not hasattr(sent, "words")) or (not hasattr(sent, "tags")):
            if hasattr(sent, "parse"):
                parse = getattr(sent, "parse")
            else:
                parse = self.parser.parse(sent.text)
            children = list(
                parse.subtrees(
                    lambda t: t.height() == 2 and t.label() != '-NONE-'))
            setattr(sent, "words", [child[0] for child in children])
            setattr(sent, "tags", [child.label() for child in children])
        word_ids = [self.model.word_vocab[word] for word in sent.words]
        pos_ids = [self.model.pos_vocab[pos] for pos in sent.tags]
        word_ids = torch.tensor([word_ids]).long()
        pos_ids = torch.tensor([pos_ids]).long()
        if self.model.use_gpu:
            word_ids = word_ids.cuda()
            pos_ids = pos_ids.cuda()
        pred = self.model(word_ids, pos_ids).squeeze(0)
        labels = [self.model.tag_label.id2label[t] for t in pred.argmax(-1)]

        edus = []
        last_edu_words = []
        last_edu_tags = []
        for word, pos, label in zip(sent.words, sent.tags, labels):
            last_edu_words.append(word)
            last_edu_tags.append(pos)
            if label == "B":
                text = "".join(last_edu_words)
                edu = EDU([TEXT(text)])
                setattr(edu, "words", last_edu_words)
                setattr(edu, "tags", last_edu_tags)
                edus.append(edu)
                last_edu_words = []
                last_edu_tags = []
        if last_edu_words:
            text = "".join(last_edu_words)
            edu = EDU([TEXT(text)])
            setattr(edu, "words", last_edu_words)
            setattr(edu, "tags", last_edu_tags)
            edus.append(edu)
        return edus

Пример #3

Показать файл

Файл: segmenter.py Проект: NLP-Discourse-SoochowU/t2d_discourseparser

 def __init__(self, model):
     self._eos = ['！', '。', '？']
     self._pairs = {'“': "”", "「": "」"}
     self.model = model
     self.candidate = model.candidate
     self.parser = BerkeleyParser()

Пример #4

Показать файл

Файл: segmenter.py Проект: NLP-Discourse-SoochowU/t2d_discourseparser

class SVMSegmenter(SegmenterI):
    def __init__(self, model):
        self._eos = ['！', '。', '？']
        self._pairs = {'“': "”", "「": "」"}
        self.model = model
        self.candidate = model.candidate
        self.parser = BerkeleyParser()

    def cut(self, text):
        sentences = self.cut_sent(text)
        for i, sent in enumerate(sentences):
            sentences[i] = Sentence(self.cut_edu(sent))
        return Paragraph(sentences)

    def cut_sent(self, text: str, sid=None) -> List[Sentence]:
        last_cut = 0
        sentences = []
        for i in range(0, len(text) - 1):
            if text[i] in self._eos:
                sentences.append(Sentence([TEXT(text[last_cut:i + 1])]))
                last_cut = i + 1
        if last_cut < len(text) - 1:
            sentences.append(Sentence([TEXT(text[last_cut:])]))
        return sentences

    def cut_edu(self, sent: Sentence) -> List[EDU]:
        if not hasattr(sent, "parse"):
            print(sent.text)
            parse = self.parser.parse(sent.text)
        else:
            parse = getattr(sent, "parse")
        parse = ParentedTree.fromstring(parse.pformat())
        children = list(
            parse.subtrees(
                lambda t: t.height() == 2 and t.label() != '-NONE-'))
        edus = []
        last_edu_words = []
        last_edu_tags = []
        offset = 0
        for child in children:
            if child[0] == '-LRB-':
                child[0] = '('
            if child[0] == '-RRB-':
                child[0] = ')'
            last_edu_words.append(child[0])
            last_edu_tags.append(child.label())
            if child[0] in self._eos or (child[0] in self.candidate and
                                         self.model.predict(offset, parse)):
                text = "".join(last_edu_words)
                edu = EDU([TEXT(text)])
                setattr(edu, "words", last_edu_words)
                setattr(edu, "tags", last_edu_tags)
                edus.append(edu)
                last_edu_words = []
                last_edu_tags = []
            offset += len(child[0])
        if last_edu_words:
            text = "".join(last_edu_words)
            edu = EDU([TEXT(text)])
            setattr(edu, "words", last_edu_words)
            setattr(edu, "tags", last_edu_tags)
            edus.append(edu)
        return edus

Пример #5

Показать файл

# coding: UTF-8
import re
import os
from nltk.tree import Tree as ParseTree
from util.berkely import BerkeleyParser
from tqdm import tqdm


if __name__ == '__main__':
    ctb_dir = "data/CTB"
    save_dir = "data/CTB_auto"
    encoding = "UTF-8"
    ctb = {}
    s_pat = re.compile(r"<S ID=(?P<sid>\S+?)>(?P<sparse>.*?)</S>", re.M | re.DOTALL)
    parser = BerkeleyParser()
    for file in tqdm(os.listdir(ctb_dir)):
        if os.path.isfile(os.path.join(save_dir, file)):
            continue
        print(file)
        with open(os.path.join(ctb_dir, file), "r", encoding=encoding) as fd:
            doc = fd.read()
        parses = []
        for match in s_pat.finditer(doc):
            sid = match.group("sid")
            sparse = ParseTree.fromstring(match.group("sparse"))
            pairs = [(node[0], node.label()) for node in sparse.subtrees()
                     if node.height() == 2 and node.label() != "-NONE-"]
            words, tags = list(zip(*pairs))
            print(sid, " ".join(words))
            if sid == "5133":
                parse = sparse