Exemplo n.º 1
0
    def test_soa_doc_freq(self):
        data_csr_matrix = data_converter.DataConverter(
        ).convert_multi_docs2document_frequency_matrix(
            labeled_documents=self.input_dict, n_jobs=5)
        assert isinstance(data_csr_matrix, data_converter.DataCsrMatrix)
        label2id_dict = data_csr_matrix.label2id_dict
        csr_matrix_ = data_csr_matrix.csr_matrix_
        n_docs_distribution = data_csr_matrix.n_docs_distribution
        vocabulary = data_csr_matrix.vocabulary

        scored_matrix_doc_freq = soa_python3.SOA().fit_transform(
            X=csr_matrix_, unit_distribution=n_docs_distribution, verbose=True)

        soa_scores_doc_freq = ScoredResultObject(
            scored_matrix=scored_matrix_doc_freq,
            label2id_dict=label2id_dict,
            feature2id_dict=vocabulary).convert_score_matrix2score_record()
        self.assertTrue(isinstance(soa_scores_doc_freq, list))
Exemplo n.º 2
0
    def test_soa_doc_freq(self):
        data_csr_matrix = data_converter.DataConverter(
        ).labeledMultiDocs2DocFreqMatrix(labeled_documents=self.input_dict,
                                         n_jobs=5)
        assert isinstance(data_csr_matrix, data_converter.DataCsrMatrix)
        label2id_dict = data_csr_matrix.label2id_dict
        csr_matrix_ = data_csr_matrix.csr_matrix_
        n_docs_distribution = data_csr_matrix.n_docs_distribution
        vocabulary = data_csr_matrix.vocabulary

        scored_matrix_doc_freq = soa_python3.SOA().fit_transform(
            X=csr_matrix_, unit_distribution=n_docs_distribution, verbose=True)

        soa_scores_doc_freq = ScoredResultObject(
            scored_matrix=scored_matrix_doc_freq,
            label2id_dict=label2id_dict,
            feature2id_dict=vocabulary).ScoreMatrix2ScoreDictionary()

        import pprint
        print('doc freq based soa')
        pprint.pprint(soa_scores_doc_freq)