if os.path.exists('browser/static/data'):
    shutil.rmtree('browser/static/data')
os.makedirs('browser/static/data')

# Export topic cloud
utils.save_topic_cloud(topic_model, 'browser/static/data/topic_cloud.json')

# Export details about topics
for topic_id in range(topic_model.nb_topics):
    utils.save_word_distribution(topic_model.top_words(topic_id, 20),
                                 'browser/static/data/word_distribution' + str(topic_id) + '.tsv')
    utils.save_affiliation_repartition(topic_model.affiliation_repartition(topic_id),
                                       'browser/static/data/affiliation_repartition' + str(topic_id) + '.tsv')
    evolution = []
    for i in range(2012, 2016):
        evolution.append((i, topic_model.topic_frequency(topic_id, date=i)))
    utils.save_topic_evolution(evolution, 'browser/static/data/frequency' + str(topic_id) + '.tsv')

# Export details about documents
for doc_id in range(topic_model.corpus.size):
    utils.save_topic_distribution(topic_model.topic_distribution_for_document(doc_id),
                                  'browser/static/data/topic_distribution_d' + str(doc_id) + '.tsv')

# Export details about words
for word_id in range(len(topic_model.corpus.vocabulary)):
    utils.save_topic_distribution(topic_model.topic_distribution_for_word(word_id),
                                  'browser/static/data/topic_distribution_w' + str(word_id) + '.tsv')

# Associate documents with topics
topic_associations = topic_model.documents_per_topic()
示例#2
0
        topic_model.print_topics(num_words=10)

        # Export topic cloud
        utils.save_topic_cloud(topic_model, path.join(timeframe_dir, 'topic_cloud.json'))

        # Export details about topics
        for topic_id in range(topic_model.nb_topics):
            custom_save_word_distribution(custom_top_words(topic_model, topic_id, 20),
                                          path.join(timeframe_dir,'word_distribution' + str(topic_id) + '.tsv'))
            utils.save_affiliation_repartition(topic_model.affiliation_repartition(topic_id),
                                               path.join(timeframe_dir,
                                                         'affiliation_repartition' + str(topic_id) + '.tsv'))
            evolution = []
            for i in range(timeframe):
                d = today - dt.timedelta(days=timeframe)+dt.timedelta(days=i)
                evolution.append((d.strftime("%Y-%m-%d"), topic_model.topic_frequency(topic_id, date=d.strftime("%Y-%m-%d"))))
            utils.save_topic_evolution(evolution, path.join(timeframe_dir,'frequency' + str(topic_id) + '.tsv'))

        # Export details about documents
        for doc_id in range(topic_model.corpus.size):
            utils.save_topic_distribution(topic_model.topic_distribution_for_document(doc_id),
                                          path.join(timeframe_dir,'topic_distribution_d' + str(doc_id) + '.tsv'))

        # Export details about words
        for word_id in range(len(topic_model.corpus.vocabulary)):
            utils.save_topic_distribution(
                topic_model.topic_distribution_for_word(word_id),
                path.join(timeframe_dir, 'topic_distribution_w' + str(word_id) + '.tsv'))

        # Associate documents with topics
        topic_associations = topic_model.documents_per_topic()