Пример #1
0
    def test_word_appearance(self):
        corpus = self.create_corpus([
            "Lorem ipsum dolor sit ipsum, consectetur adipiscing elit.",
            "Sed eu sollicitudin velit lorem.",
            "lorem ipsum eu",
        ])
        words = create_words_table(["lorem", "ipsum", "eu"])
        self.send_signal(self.widget.Inputs.corpus, corpus)
        self.send_signal(self.widget.Inputs.words, words)
        # unselect word_frequency and select word_ratio
        self.widget.controls.word_frequency.click()
        self.widget.controls.word_appearance.click()
        self.wait_until_finished()
        self.assertListEqual([x[1] for x in self.widget.model],
                             [2 / 3, 2 / 3, 1])

        cb_aggregation = self.widget.controls.aggregation
        simulate.combobox_activate_item(cb_aggregation, "Max")
        self.wait_until_finished()
        self.assertListEqual([x[1] for x in self.widget.model], [1, 1, 1])

        simulate.combobox_activate_item(cb_aggregation, "Min")
        self.wait_until_finished()
        self.assertListEqual([x[1] for x in self.widget.model], [0, 0, 1])

        simulate.combobox_activate_item(cb_aggregation, "Median")
        self.wait_until_finished()
        self.assertListEqual([x[1] for x in self.widget.model], [1, 1, 1])
Пример #2
0
 def test_input_words(self):
     words = create_words_table(["foo", "graph", "minors", "trees"])
     self.send_signal(self.widget.Inputs.corpus, self.corpus)
     self.send_signal(self.widget.Inputs.words, words)
     self.wait_until_finished()
     output = self.get_output(self.widget.Outputs.words)
     self.assertListEqual(list(output.metas[:, 0]),
                          ['graph', 'minors', 'trees'])
Пример #3
0
 def setUp(self):
     self.patcher = patch(
         "orangecontrib.text.widgets.owsemanticviewer."
         "SemanticSearch",
         new=DummySearch)
     self.patcher.start()
     self.widget = self.create_widget(OWSemanticViewer)
     self.corpus = Corpus.from_file("deerwester")
     self.words = create_words_table(["foo", "graph", "minors", "trees"])
Пример #4
0
 def test_send_report(self):
     self.send_signal(self.widget.Inputs.corpus, self.corpus)
     self.wait_until_finished()
     self.widget.send_report()
     words = create_words_table(["human", "graph", "minors", "trees"])
     self.send_signal(self.widget.Inputs.words, words)
     self.wait_until_finished()
     self.widget.send_report()
     self.send_signal(self.widget.Inputs.corpus, None)
     self.widget.send_report()
Пример #5
0
 def test_output_unique(self):
     corpus = Corpus.from_file("book-excerpts")
     var = ContinuousVariable("Word count")
     corpus = corpus.add_column(var,
                                np.array([1 for _ in range(len(corpus))]))
     words = create_words_table(["doctor", "rum", "house"])
     self.send_signal(self.widget.Inputs.corpus, corpus)
     self.send_signal(self.widget.Inputs.words, words)
     self.wait_until_finished()
     output = self.get_output(self.widget.Outputs.selected_documents)
     self.assertTrue("Word count (1)" in output.domain)
Пример #6
0
    def commit(self):
        selection = self._get_selected_words_indices()
        self.selected_words = set(np.array(self.words_model)[selection])

        words, selected_words = None, None
        if self.words_model:
            _words = create_words_table(self.words_model)
            if selection:
                selected_words = _words[selection]
            words = create_annotated_table(_words, selection)
        self.Outputs.words.send(words)
        self.Outputs.selected_words.send(selected_words)
Пример #7
0
 def commit(self):
     if not self.sig_words:
         self.Outputs.words.send(None)
     # retrieve the data except the header
     tree = np.array(self.tree_to_table(), dtype=object)[1:]
     attrs = (ContinuousVariable("p-values"),
              ContinuousVariable("FDR values"))
     words = create_words_table(tree[:, 0])
     words = words.transform(Domain(attrs, metas=words.domain.metas))
     if len(tree[:, 0]):
         with words.unlocked(words.X):
             words.X[:, 0] = tree[:, 1]
             words.X[:, 1] = tree[:, 2]
     self.Outputs.words.send(words)
Пример #8
0
 def test_embedding_similarity(self):
     corpus = self.create_corpus([
         "Lorem ipsum dolor sit ipsum, consectetur adipiscing elit.",
         "Sed eu sollicitudin velit lorem.",
         "lorem ipsum eu",
     ])
     words = create_words_table(["lorem", "ipsum", "eu"])
     self.send_signal(self.widget.Inputs.corpus, corpus)
     self.send_signal(self.widget.Inputs.words, words)
     # unselect word_frequency and select embedding_similarity
     self.widget.controls.word_frequency.click()
     self.widget.controls.embedding_similarity.click()
     self.wait_until_finished()
     self.assertTrue(all(isclose(x[1], 1) for x in self.widget.model))
Пример #9
0
    def test_input_words(self):
        get_ontology_data = self.widget._OWOntology__ontology_view.get_data

        words = create_words_table(["foo"])
        self.send_signal(self.widget.Inputs.words, words)

        self.assertEqual(self.widget._OWOntology__get_selected_row(), 0)
        self.assertEqual(get_ontology_data(), self._ontology_1)

        self.widget._OWOntology__set_selected_row(1)
        self.assertEqual(self.widget._OWOntology__get_selected_row(), 1)
        self.assertEqual(get_ontology_data(), self._ontology_2)

        self.widget._OWOntology__set_selected_row(0)
        self.assertEqual(self.widget._OWOntology__get_selected_row(), 0)
        self.assertEqual(get_ontology_data(), self._ontology_1)
Пример #10
0
    def commit(self):
        out = None
        if self.corpus is not None:
            rows = [
                i for i, doc in enumerate(self.corpus.ngrams)
                if any(word in doc for word in self.selected_words)
            ]
            out = self.corpus[rows]
        self.Outputs.corpus.send(out)

        words_table = None
        words = list(self.selected_words)
        if words:
            words_table = create_words_table(words)
            words_table.name = "Selected Words"
        self.Outputs.selected_words.send(words_table)
Пример #11
0
    def setUp(self) -> None:
        self.widget: OWScoreDocuments = self.create_widget(OWScoreDocuments)

        # create corpus
        self.corpus = Corpus.from_file("book-excerpts")
        pp_list = [
            preprocess.LowercaseTransformer(),
            preprocess.StripAccentsTransformer(),
            preprocess.UrlRemover(),
            preprocess.SnowballStemmer(),
        ]
        for p in pp_list:
            self.corpus = p(self.corpus)

        # create words table
        words = ["house", "doctor", "boy", "way", "Rum"]
        self.words = create_words_table(words)
Пример #12
0
    def test_output_words(self):
        def select_words(indices):
            onto_view = self.widget._OWOntology__ontology_view
            model = onto_view._EditableTreeView__model
            tree = onto_view._EditableTreeView__tree
            selection = QItemSelection()
            sel_model = tree.selectionModel()
            for i in indices:
                selection.append(QItemSelectionRange(model.index(i, 0)))
            sel_model.select(selection, QItemSelectionModel.ClearAndSelect)

        self.assertIsNone(self.get_output(self.widget.Outputs.words))

        select_words(range(1))
        words = create_words_table(["bar1", "baz1", "foo1"])
        output = self.get_output(self.widget.Outputs.words)
        self.assert_table_equal(words, output)
Пример #13
0
    def send_scores(self):
        table = None

        if self.clusters.scores is not None:
            keywords, scores, p_values = self.clusters.scores
            table = create_words_table(keywords)
            table.name = "Scores"

            for i, key in enumerate(self.clusters.groups):
                label = index_to_cluster_label(key)

                var = ContinuousVariable(f"Score({label})")
                table = table.add_column(var, scores[i])

                var = ContinuousVariable(f"p_value({label})")
                table = table.add_column(var, p_values[i])

        self.Outputs.scores.send(table)
Пример #14
0
    def commit(self):
        words = None
        if self.selected_words:
            sort_column, reverse = self.sort_column_order
            model = self.model
            attrs = [ContinuousVariable(model.headerData(i, Qt.Horizontal))
                     for i in range(1, model.columnCount())]

            data = sorted(model, key=lambda a: a[sort_column], reverse=reverse)
            words_data = [s[0] for s in data if s[0] in self.selected_words]

            words = create_words_table(words_data)
            words = words.transform(Domain(attrs, metas=words.domain.metas))
            with words.unlocked(words.X):
                for i in range(len(attrs)):
                    words.X[:, i] = [data[j][i + 1] for j in range(len(data))
                                     if data[j][0] in self.selected_words]

        self.Outputs.words.send(words)
Пример #15
0
        elif self.sel_method == SelectionMethods.N_BEST:
            n_sel = min(self.n_selected, n_rows)
            selection = QItemSelection(
                proxy_model.index(0, 0),
                proxy_model.index(n_sel - 1, n_columns - 1))
        else:
            raise NotImplementedError

        self.view.selectionModel().select(selection,
                                          QItemSelectionModel.ClearAndSelect)


if __name__ == "__main__":
    from orangewidget.utils.widgetpreview import WidgetPreview

    from orangecontrib.text import preprocess

    corpus = Corpus.from_file("book-excerpts")
    # corpus.set_title_variable("Text")

    pp_list = [
        preprocess.LowercaseTransformer(),
        preprocess.StripAccentsTransformer(),
        preprocess.SnowballStemmer(),
    ]
    for p in pp_list:
        corpus = p(corpus)

    words = create_words_table(["house", "doctor", "boy", "way", "Rum"])
    WidgetPreview(OWScoreDocuments).run(set_data=corpus, set_words=words)
Пример #16
0
            mask = np.zeros(len(self.corpus), dtype=bool)
            mask[self.selection] = True
            matched = self.corpus[mask] if sum(mask) else None
            other = self.corpus[~mask] if sum(~mask) else None
            annotated = create_annotated_table(self.corpus, self.selection)
        self.Outputs.matching_docs.send(matched)
        self.Outputs.other_docs.send(other)
        self.Outputs.corpus.send(annotated)

    def send_report(self):
        if not self.corpus:
            return
        self.report_data("Corpus", self.corpus)
        if self.words is not None:
            self.report_paragraph("Words", ", ".join(self.words))
            self.report_table(self._list_view, num_format="{:.3f}")

    def copy_to_clipboard(self):
        text = self._web_view.selectedText()
        QApplication.clipboard().setText(text)


if __name__ == "__main__":
    # pylint: disable=ungrouped-imports
    from Orange.widgets.utils.widgetpreview import WidgetPreview

    words_ = create_words_table(["human", "graph", "minors", "trees"])
    WidgetPreview(OWSemanticViewer).run(
        set_corpus=Corpus.from_file("deerwester"),  # deerwester book-excerpts
        set_words=words_)
Пример #17
0
        "Stvar",
        "Agent",
        "Organ",
        "Državni organ",
        "Drug državni organ",
        "Organ državne uprave",
        "Organ lokalne skupnosti",
        "Organ občine",
        "Organ upravljanja",
        "Registrski organ",
        "Nadzorni organ",
        "Upravni organ",
        "Ministrstvo",
        "Organ v sestavi ministrstva",
        "Upravna enota",
        "Bančni račun",
        "Transakcijski račun",
        "Delež",
        "Delež v družbi",
        "Lastniški delež",
        "Dovoljenje",
        "Dražba",
        "Izplačilo",
        "Plača",
        "Pravni akt",
        "Odločba",
        "Sklep",
    ]
    words_ = create_words_table(ls)
    WidgetPreview(OWOntology).run(words_)
Пример #18
0
 def _create_output_table(words: List[str]) -> Optional[Table]:
     if not words:
         return None
     return create_words_table(words)