Python LdaModel.bound примеры использования

Язык программирования: Python

Пространство имен/Пакет: gensim.models.ldamodel

Класс/Тип: LdaModel

Метод/Функция: bound

Примеров на hotexamples.com: 4

Python LdaModel.bound - 4 примера найдено. Это лучшие примеры Python кода для gensim.models.ldamodel.LdaModel.bound, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

LdaModel(30)

get_document_topics(30)

show_topics(30)

save(30)

print_topics(30)

load(30)

show_topic(24)

log_perplexity(16)

get_topic_terms(14)

print_topic(10)

get_topics(9)

update(9)

sync_state(5)

top_topics(4)

bound(3)

__getitem__(2)

do_estep(1)

get_term_topics(1)

inference(1)

build_vocab(1)

__init__(1)

train(1)

Пример #1

Показать файл

Файл: topic_modeling.py Проект: LauraCarter/amnesty

def train_model(texts, **kwargs):

  # parse args
  filter_stopwords = kwargs.get('filter_stopwords', True)
  normalizer = kwargs.get('normalizer', 'porter')
  tfidf = kwargs.get('tfidf', True)
  num_topics = kwargs.get('num_topics', 20)
  min_freq = kwargs.get('min_freq', 2)
  use_pickle = kwargs.get('use_pickle', True)
  update_pickle = kwargs.get('update_pickle', True)
  report = kwargs.get('report', True)
  distributed = kwargs.get('distributed', False)
  
  # build corpus or read it in from pickle
  if use_pickle:
    print "INFO: loading pickled corpus and word hash"
    corpus = pickle.load( open( "pickles/corpus.p", "rb" ) )
    id2word = pickle.load( open( "pickles/id2word.p", "rb" ) )
            
  else:
    print "INFO: processing text and building corpus..."
    corpus, id2word = process_texts(
      texts = texts, 
      filter_stopwords = filter_stopwords,
      normalizer = normalizer,
      min_freq = min_freq
    )

    if update_pickle:
      # pickle files
      print "INFO: updating pickled coprus and word hash"
      pickle.dump(corpus, open( "pickles/corpus.p", "wb" ) )
      pickle.dump(id2word, open( "pickles/id2word.p", "wb" ) )

  # optional tfidf transformation
  if tfidf:
    print "INFO: applying tfidf transformation..."
    tfidf = TfidfModel(corpus)
    corpus = tfidf[corpus]

  # fit model
  print "INFO: fitting model..."
  lda = LdaModel(
    corpus = corpus, 
    id2word = id2word, 
    num_topics = num_topics,
    distributed = distributed
  )

  # report
  if report:
    perplexity = lda.bound(corpus)
    print "RESULTS:"
    print "\nperplexity: ", perplexity, "\n"
    topics = lda.show_topics(num_topics)
    for i, t in enumerate(topics):
      print "topic %d:" % i
      print t

  return lda, corpus, id2word

Пример #2

Показать файл

def train_model(texts, **kwargs):

    # parse args
    filter_stopwords = kwargs.get('filter_stopwords', True)
    normalizer = kwargs.get('normalizer', 'porter')
    tfidf = kwargs.get('tfidf', True)
    num_topics = kwargs.get('num_topics', 20)
    min_freq = kwargs.get('min_freq', 2)
    use_pickle = kwargs.get('use_pickle', True)
    update_pickle = kwargs.get('update_pickle', True)
    report = kwargs.get('report', True)
    distributed = kwargs.get('distributed', False)

    # build corpus or read it in from pickle
    if use_pickle:
        print "INFO: loading pickled corpus and word hash"
        corpus = pickle.load(open("pickles/corpus.p", "rb"))
        id2word = pickle.load(open("pickles/id2word.p", "rb"))

    else:
        print "INFO: processing text and building corpus..."
        corpus, id2word = process_texts(texts=texts,
                                        filter_stopwords=filter_stopwords,
                                        normalizer=normalizer,
                                        min_freq=min_freq)

        if update_pickle:
            # pickle files
            print "INFO: updating pickled coprus and word hash"
            pickle.dump(corpus, open("pickles/corpus.p", "wb"))
            pickle.dump(id2word, open("pickles/id2word.p", "wb"))

    # optional tfidf transformation
    if tfidf:
        print "INFO: applying tfidf transformation..."
        tfidf = TfidfModel(corpus)
        corpus = tfidf[corpus]

    # fit model
    print "INFO: fitting model..."
    lda = LdaModel(corpus=corpus,
                   id2word=id2word,
                   num_topics=num_topics,
                   distributed=distributed)

    # report
    if report:
        perplexity = lda.bound(corpus)
        print "RESULTS:"
        print "\nperplexity: ", perplexity, "\n"
        topics = lda.show_topics(num_topics)
        for i, t in enumerate(topics):
            print "topic %d:" % i
            print t

    return lda, corpus, id2word

Пример #3

Показать файл

def ldamodel(doc_clean,n_topics,n_words,description,tfidfmodel=False,unseen_docs=None):
    doc_clean = [min_char(doc).split() for doc in doc_clean]

    dictionary = corpora.Dictionary(doc_clean)
    # Converting list of documents (corpus) into Document Term Matrix using dictionary prepared above.
    corpus = [dictionary.doc2bow(doc) for doc in doc_clean]
    compute_coherence_values(dictionary=dictionary, corpus=corpus, texts=doc_clean, start=2, limit=40, step=6)
    if tfidfmodel:
       tfidf = TfidfModel(corpus,id2word=dictionary,smartirs='ntc')
       corpus = tfidf[corpus]

    ldamodel = LdaModel(corpus, num_topics=16, id2word=dictionary,random_state=1,passes=50,per_word_topics=True)
    print("#Tópicos LDA")
    for i in range(0, n_topics):
        temp = ldamodel.show_topic(i, n_words)
        terms = []
        for term in temp:
            terms.append(term)
        print("Topic #" + str(i) + ": ", ", ".join([t + '*' + str(i) for t, i in terms]))
    print('Bound: ',ldamodel.bound(corpus))
    # Compute Perplexity
    print('Perplexity: ',ldamodel.log_perplexity(corpus))
    # Compute Coherence Score
    coherence_model_lda = CoherenceModel(model=ldamodel, texts=doc_clean, dictionary=dictionary, coherence='c_v')
    coherence_lda = coherence_model_lda.get_coherence()
    print('\nCoherence Score: ', coherence_lda)
    if unseen_docs:
        corpus_new = [dictionary.doc2bow(doc) for doc in unseen_docs]
        for i, unseen_doc in enumerate(corpus_new):
            topic = None
            score = 0
            inference_doc = ldamodel[unseen_doc]
            print(unseen_docs[i])
            for index,tmpScore in inference_doc[0]:
                if tmpScore > score:
                    score = tmpScore
                    topic = ldamodel.print_topic(index, 5)
            print ("Score: {}\t Topic: {}".format(score, topic))
        print("Log perplexity for new corpus is", ldamodel.log_perplexity(corpus_new))

    print_result(ldamodel, doc_clean, corpus, n_topics, description)
    pickle.dump(corpus, open(description+'.pkl', 'wb'))
    dictionary.save(description+'dictionary.gensim')
    ldamodel.save(description+'_ldamodel.gensim')

Пример #4

Показать файл

Файл: check_coherence_perplexity.py Проект: yasunobuigarashi/try_samples

sentences = [s for s in word2vec.LineSentence(data_file) if len(s) >= 2]

dic = Dictionary(sentences)

corpus = [dic.doc2bow(s) for s in sentences]

print('topic_num,avg,bound,perplexity,coherence')

for i in range(1, max_topic_num + 1):

    lda = LdaModel(corpus=corpus,
                   id2word=dic,
                   num_topics=i,
                   alpha=alpha,
                   random_state=1)

    avg_topics = mean([len(t) for t in [lda[c] for c in corpus]])

    bound = lda.bound(corpus)

    perwordbound = lda.log_perplexity(corpus)
    perplexity = np.exp2(-perwordbound)

    cm = CoherenceModel(model=lda,
                        corpus=corpus,
                        coherence='u_mass',
                        processes=1)
    coherence = cm.get_coherence()

    print(f"{i},{avg_topics},{bound},{perplexity},{coherence}")