示例#1
0
        if result is not None and len(result.dictionary) == 0:
            self.Warning.no_token_left()
            result = None
        self.send(Output.PP_CORPUS, result)
        self.progressBarFinished(None)

    def set_minimal_width(self):
        max_width = 250
        for widget in self.stages:
            if widget.enabled:
                max_width = max(max_width, widget.sizeHint().width())
        self.scroll.setMinimumWidth(max_width + 20)

    @pyqtSlot()
    def settings_invalidated(self):
        self.set_minimal_width()
        self.commit()

    def send_report(self):
        self.report_items('Preprocessor', self.preprocessor.report())


if __name__ == '__main__':
    app = QApplication([])
    widget = OWPreprocess()
    widget.show()
    corpus = Corpus.from_file('bookexcerpts')
    widget.set_data(corpus)
    app.exec()
    widget.saveSettings()
示例#2
0
            self.is_preprocessed = ''
            self.is_pos_tagged = ''
            self.ngram_range = ''

    def commit(self):
        if self.corpus is not None:
            matched = self.corpus[self.output_mask]
            output_mask = set(self.output_mask)
            unmatched_mask = [
                i for i in range(len(self.corpus)) if i not in output_mask
            ]
            unmatched = self.corpus[unmatched_mask]
            self.Outputs.matching_docs.send(matched)
            self.Outputs.other_docs.send(unmatched)
        else:
            self.Outputs.matching_docs.send(None)
            self.Outputs.other_docs.send(None)


if __name__ == '__main__':
    from orangecontrib.text.tag import pos_tagger
    app = QApplication([])
    widget = OWCorpusViewer()
    widget.show()
    corpus = Corpus.from_file('book-excerpts')
    corpus = corpus[:3]
    corpus = pos_tagger.tag_corpus(corpus)
    corpus.ngram_range = (1, 2)
    widget.set_data(corpus)
    app.exec()
示例#3
0
            output_mask = set(self.output_mask)
            unmatched_mask = [
                i for i in range(len(self.corpus)) if i not in output_mask
            ]
            unmatched = self.corpus[unmatched_mask]
            self.Outputs.matching_docs.send(matched)
            self.Outputs.other_docs.send(unmatched)
        else:
            self.Outputs.matching_docs.send(None)
            self.Outputs.other_docs.send(None)

    def send_report(self):
        self.report_items((
            ("Query", self.regexp_filter),
            ("Matching documents", self.n_matching),
        ))


if __name__ == '__main__':
    from orangecontrib.text.tag.pos import AveragedPerceptronTagger
    app = QApplication([])
    widget = OWCorpusViewer()
    widget.show()
    corpus = Corpus.from_file('book-excerpts')
    corpus = corpus[:3]
    tagger = AveragedPerceptronTagger()
    tagged_corpus = tagger.tag_corpus(corpus)
    tagged_corpus.ngram_range = (1, 2)
    widget.set_data(tagged_corpus)
    app.exec()