Пример #1
0
    def load(self, model_name):
        self.__modelName = model_name

        if model_name == 'lda':
            self.__model = LdaMulticore.load(self.__modelFile)
        elif model_name == 'nmf':
            self.__model = Nmf.load(self.__modelFile)
Пример #2
0
# TODO Several useful descriptive insight methods in this article
# https://www.machinelearningplus.com/nlp/topic-modeling-gensim-python/#15visualizethetopicskeywords
# https://www.machinelearningplus.com/nlp/topic-modeling-visualization-how-to-present-results-lda-models/#14.-pyLDAVis

if not os.path.exists("img"):
    os.mkdir("img")

#####################################################################################
# Wordcloud by topic
#####################################################################################

# Load model(s)
# lda = LdaModel.load("models/lda")
dct = gensim.utils.SaveLoad.load("models/dct")
corpus = gensim.corpora.MmCorpus("models/corpus")
nmf = Nmf.load("models/nmf")

# Generate word cloud for final model
labels = {
    1: "Economic activity",
    2: "Policy action",
    3: "Economic outlook",
    4: " Employment",
    5: "Financial Markets",
    6: "Inflation"
}
for topic in range(0, NUM_TOPICS):
    termsnmf = nmf.show_topic(topic, topn=50)
    # Model returns list of tuples, wordcloud wants a dictionary instead
    wordcloudnmf = WordCloud(
        background_color="white").generate_from_frequencies(dict(termsnmf))
# import argparse

from gensim.corpora import Dictionary, MmCorpus
from gensim.models.nmf import Nmf
from gensim.models import TfidfModel

from codebase.utils import TweetRawCorpusStream
from codebase.topic_utilities import export_dtm

if __name__ == "__main__":

    corpora_path = "./corpora/"
    model_path = "./models/"
    num_topics = 50
    model_suffix = "-{}topics".format(num_topics)
    modelTag = "Seventh-and-EighthWeek-Tweets-Rolling"

    nmf = Nmf.load("{}{}{}.model".format(model_path, modelTag, model_suffix))

    fileTag_list = ["First-and-SecondWeek-Tweets-Rolling"]
    for fileTag in fileTag_list:
        tfidf_corpus = MmCorpus('{}{}-tf-idf.mm'.format(corpora_path, fileTag))
        export_dtm(nmf=nmf, corpus=tfidf_corpus,\
            out_path="{}{}{}-dtm.csv".format(model_path, fileTag, model_suffix),\
            stop_at=None)