Python LogOddsRatioInformativeDirichletPrior示例

编程语言: Python

命名空间/包名称: scattertext

类/类型: LogOddsRatioInformativeDirichletPrior

hotexamples.com的示例: 3

Python LogOddsRatioInformativeDirichletPrior - 已找到3个示例。这些是从开源项目中提取的最受好评的scattertext.LogOddsRatioInformativeDirichletPrior现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

LogOddsRatioInformativeDirichletPrior(3)

示例#1

显示文件

文件： test_PriorFactory.py 项目： weizhao-BME/scattertext

 def test_align_to_target(self):
     full_corpus = get_test_corpus()
     corpus = full_corpus.remove_categories(['swift'])
     priors = PriorFactory(full_corpus).use_all_categories().get_priors()
     with self.assertRaises(ValueError):
         (LogOddsRatioInformativeDirichletPrior(priors).get_scores(
             *corpus.get_term_freq_df().values.T))
     priors = (PriorFactory(full_corpus).use_all_categories().
               align_to_target(corpus).get_priors())
     (LogOddsRatioInformativeDirichletPrior(priors).get_scores(
         *corpus.get_term_freq_df().values.T))

示例#2

显示文件

import scattertext as st
from scattertext import LogOddsRatioInformativeDirichletPrior

fn = 'rotten_fresh2.html'
df = st.SampleCorpora.RottenTomatoes.get_data()
corpus = (st.CorpusFromPandas(df,
                              category_col='category',
                              text_col='text',
                              nlp=st.whitespace_nlp_with_sentences).build())
priors = (st.PriorFactory(corpus,
                          category='fresh',
                          not_categories=['rotten'],
                          starting_count=1).use_general_term_frequencies().
          use_all_categories().get_priors())
(open(fn, 'wb').write(
    st.produce_fightin_words_explorer(
        corpus,
        category='fresh',
        not_categories=['rotten'],
        metadata=df['movie_name'],
        term_scorer=LogOddsRatioInformativeDirichletPrior(priors, alpha_w=10),
    ).encode('utf-8')))
print(fn)

示例#3

显示文件

文件： demo_log_odds_ratio_prior.py 项目： xuezhizeng/scattertext

from scattertext.termcompaction.CompactTerms import CompactTerms

import scattertext as st
from scattertext import LogOddsRatioInformativeDirichletPrior

fn = 'demo_log_odds_ratio_prior.html'
df = st.SampleCorpora.RottenTomatoes.get_data()
corpus = (st.CorpusFromPandas(df,
                              category_col='category',
                              text_col='text',
                              nlp=st.whitespace_nlp_with_sentences)
          .build())
priors = (st.PriorFactory(corpus,
                          category='fresh',
                          not_categories=['rotten'],
                          starting_count=1)
          #.use_general_term_frequencies()
          .use_all_categories()
          .get_priors())
(open(fn, 'wb')
	.write(
	st.produce_frequency_explorer(
		corpus,
		category='fresh',
		not_categories=['rotten'],
		metadata=df['movie_name'],
		term_scorer=LogOddsRatioInformativeDirichletPrior(priors, 1),
	).encode('utf-8'))
)
print(fn)