def load(self, model_name): self.__modelName = model_name if model_name == 'lda': self.__model = LdaMulticore.load(self.__modelFile) elif model_name == 'nmf': self.__model = Nmf.load(self.__modelFile)
# TODO Several useful descriptive insight methods in this article # https://www.machinelearningplus.com/nlp/topic-modeling-gensim-python/#15visualizethetopicskeywords # https://www.machinelearningplus.com/nlp/topic-modeling-visualization-how-to-present-results-lda-models/#14.-pyLDAVis if not os.path.exists("img"): os.mkdir("img") ##################################################################################### # Wordcloud by topic ##################################################################################### # Load model(s) # lda = LdaModel.load("models/lda") dct = gensim.utils.SaveLoad.load("models/dct") corpus = gensim.corpora.MmCorpus("models/corpus") nmf = Nmf.load("models/nmf") # Generate word cloud for final model labels = { 1: "Economic activity", 2: "Policy action", 3: "Economic outlook", 4: " Employment", 5: "Financial Markets", 6: "Inflation" } for topic in range(0, NUM_TOPICS): termsnmf = nmf.show_topic(topic, topn=50) # Model returns list of tuples, wordcloud wants a dictionary instead wordcloudnmf = WordCloud( background_color="white").generate_from_frequencies(dict(termsnmf))
# import argparse from gensim.corpora import Dictionary, MmCorpus from gensim.models.nmf import Nmf from gensim.models import TfidfModel from codebase.utils import TweetRawCorpusStream from codebase.topic_utilities import export_dtm if __name__ == "__main__": corpora_path = "./corpora/" model_path = "./models/" num_topics = 50 model_suffix = "-{}topics".format(num_topics) modelTag = "Seventh-and-EighthWeek-Tweets-Rolling" nmf = Nmf.load("{}{}{}.model".format(model_path, modelTag, model_suffix)) fileTag_list = ["First-and-SecondWeek-Tweets-Rolling"] for fileTag in fileTag_list: tfidf_corpus = MmCorpus('{}{}-tf-idf.mm'.format(corpora_path, fileTag)) export_dtm(nmf=nmf, corpus=tfidf_corpus,\ out_path="{}{}{}-dtm.csv".format(model_path, fileTag, model_suffix),\ stop_at=None)