def cut_sent(self, text: str, sid=None) -> List[Sentence]: last_cut = 0 sentences = [] for i in range(0, len(text) - 1): if text[i] in self._eos: sentences.append(Sentence([TEXT(text[last_cut:i + 1])])) last_cut = i + 1 if last_cut < len(text) - 1: sentences.append(Sentence([TEXT(text[last_cut:])])) return sentences
def evaluate(dataset, model): model.eval() segmenter = RNNSegmenter(model) golds = [] segs = [] for paragraph in chain(*dataset): seged_sents = [] for sentence in paragraph.sentences(): # make sure sentence has edus if list(sentence.iterfind(node_type_filter(EDU))): seged_sents.append(Sentence(segmenter.cut_edu(sentence))) if seged_sents: segs.append(Paragraph(seged_sents)) golds.append(paragraph) return edu_eval(segs, golds)
def cut(self, text): sentences = self.cut_sent(text) for i, sent in enumerate(sentences): sentences[i] = Sentence(self.cut_edu(sent)) return Paragraph(sentences)