def visualize_topics(self, ): topics = self.get_topics() visualize_semantic_netwrok(topics,[], visualize_method='plotly', filename="../outputs/lsi_out.html", title='Latent Semantic Indexing')
lda = LDAModel(config) num_topics = sum(lda.get_theta_matrix()[doc_id, :] != 0) ###########visualize lda_config_file = "/home/rohola/codes/topical_language_generation/configs/generated_fake_alexa_lda_config.json" config = LDAConfig.from_json_file(lda_config_file) config.num_topics = num_topics ##save the generate text to disk if not os.path.isdir(config.dataset_dir): os.mkdir(config.dataset_dir) with open(os.path.join(config.dataset_dir, "generated_text.txt"), 'w') as file_writer: file_writer.write(all_text) lda = LDAModel(config, build=True) all_topic_tokens = lda.get_all_topic_tokens(num_words=15) # clean up words topic_words = [[(t[0].strip('Ġ'), t[1]) for t in tw] for tw in all_topic_tokens] for tw in topic_words: print(topic_words) plot_config = PlotConfig.from_json_file("configs/lda_plot_config.json") fig = visualize_semantic_netwrok(plot_config, topic_words)
state = np.random.RandomState(random_seed) lda = LdaModel(corpus=corpus, id2word=dictionary, num_topics=config.num_topics, random_state=state, update_every=1, passes=10, alpha=config.alpha, eta='auto') topic_words = lda.show_topics(config.num_topics_to_show, num_words=config.num_words, formatted=False) topic_words = [j for (i, j) in topic_words] for topic in topic_words: for word, p in topic: print(word) print('\n') visualize_method = "" if config.dimension == 2: visualize_method = 'plotly' elif config.dimension == 3: visualize_method = 'plotly3d' else: raise ("Wrong dimension, can accept only 2 or 3") topic_modeling_semantic_network.visualize_semantic_netwrok( config, topic_words, visualize_method=visualize_method)
#config_file = "configs/alexa_lda_config.json" #config_file = "configs/nytimes_lda_config.json" #config_file = "configs/anes_lda_config.json" config_file = "configs/congress_lda_config.json" lda = LDAModel(config_file=config_file) lda._start() all_topic_tokens = lda.get_all_topic_tokens(num_words=15) #clean up words topic_words = [[(t[0].strip('Ġ'), t[1]) for t in tw] for tw in all_topic_tokens] for topic in topic_words: print(topic) #todo remove dataclass and replace it with VisualizationConfig class @dataclass class config: dimension: int = 2 threshold: float = 0.00001 node_size: float = 30 color_scale: str = "Viridis" title: str = "LDA" out_file_name: str = lda.config.cached_dir + "/lda_viz.html" visualize_semantic_netwrok(config, topic_words)
session_state.config.num_topics = session_state.num_topics session_state.config.alpha = session_state.alpha lda = LDAModel(session_state.config, build=True) all_topic_tokens = lda.get_all_topic_tokens(num_words=15) a = lda.get_psi_matrix() print("first time", a.max()) # clean up words session_state.topic_words = [[(t[0].strip('Ġ'), t[1]) for t in tw] for tw in all_topic_tokens] plot_config = PlotConfig.from_json_file( "configs/lda_plot_config.json") fig = visualize_semantic_netwrok(plot_config, session_state.topic_words, auto_open=False) st.plotly_chart(fig) session_state.fig = fig elif session_state.topic_model == "lsi": session_state.config = get_draft_config(session_state.topic_model, session_state.dataset) session_state.config.num_topics = session_state.num_topics lsi = LSIModel(session_state.config, build=True) tw = lsi.get_topic_words(num_words=10) topic_words = [t[1] for t in tw] # clean up words session_state.topic_words = [[(t[0].strip('Ġ'), t[1]) for t in tw] for tw in topic_words]
def visualize_topics(self, ): topics, word_based_on_topic = self.get_topics() visualize_semantic_netwrok(topics, word_based_on_topic, visualize_method='plotly', filename="../outputs/lda_out.html", title="Latent Dirichlet Analysis")