def load_dataset(cfg=config.default_config()): """Load or generate dataset. - Return: F vocab N M Phi_r Theta_r - Used params: load_data data_name? """ if cfg['load_data'] == 'uci' or cfg['load_data'] == 1: print("uci") F, vocab = data.load_uci(cfg['data_name'], cfg) N, M = F.shape cfg['N'], cfg['M'] = F.shape print('Dimensions of F:', N, M) print('Checking assumption on F:', np.sum(F, axis=0).max()) return F, vocab, N, M, None, None elif cfg['load_data'] == 2: F, Phi_r, Theta_r = gen_real(cfg) print(Phi_r) print('Checking assumption on F:', np.sum(F, axis=0).max()) return F, None, F.shape[0], F.shape[1], Phi_r, Theta_r elif cfg['load_data'] == 3: print("uci halfmodel", cfg["alpha"]) F, vocab = data.load_uci(cfg['data_name'], cfg) N, M = F.shape cfg['N'], cfg['M'] = F.shape Phi_r, Theta_r = load_obj('Phi_'+cfg['data_name']), load_obj('Theta_'+cfg['data_name']) F_merged = merge_halfmodel(F, Phi_r, Theta_r, cfg) print('Dimensions of F:', N, M) print('Checking assumption on F:', np.sum(F_merged, axis=0).max()) return F_merged, vocab, N, M, Phi_r, Theta_r elif cfg['load_data'] == 4: F = np.eye(cfg['T']) cfg['N'], cfg['M'] = F.shape Phi_r = np.eye(cfg['T']) Theta_r = np.eye(cfg['T']) return F, None, cfg['T'], cfg['T'], Phi_r, Theta_r elif cfg['load_data'] == 5: cfg['real_theta_sparsity'] = 1. cfg['real_phi_sparsity'] = 1. F, Phi_r, Theta_r = gen_real(cfg) print('Checking assumption on F:', np.sum(F, axis=0).max()) return F, None, F.shape[0], F.shape[1], Phi_r, Theta_r
def save_topics(W, filename, vocab=None, topic_idxs=None): if not vocab: vocab = range(W.shape[0]) if not topic_idxs: topic_idxs = range(W.shape[1]) with open(filename, 'w') as f: for topic_idx in topic_idxs: words = np.argsort(-W[:, topic_idx]) print('topic #', topic_idx, ':', file=f) str_words = [' ' + str(vocab[i]) + ':' + str(W[i, topic_idx]) for i in words] print('\n'.join(str_words), file=f) if __name__ != '__main__': ml.rcdefaults() # cбрасываем настройки на "по умолчанию" ml.rcParams['font.family'] = 'fantasy' ml.rcParams['font.fantasy'] = 'Times New Roman', 'Ubuntu','Arial','Tahoma','Calibri' else: import os from os.path import join from data import load_uci ml.rcdefaults() # cбрасываем настройки на "по умолчанию" ml.rcParams['font.family'] = 'fantasy' ml.rcParams['font.fantasy'] = 'Times New Roman', 'Ubuntu','Arial','Tahoma','Calibri' cfg = config.load() _, vocab = load_uci(cfg['data_name'], cfg) W = np.loadtxt(join(cfg['result_dir'], cfg['experiment'] + '_W.csv')) res = plot_matrix(W, u'Распределение слов в темах', vocab=vocab) filename = join(cfg['result_dir'], cfg['experiment']+'_W.pdf') plt.savefig(filename, format='pdf') plt.show()
vocab = range(W.shape[0]) if not topic_idxs: topic_idxs = range(W.shape[1]) with open(filename, "w") as f: for topic_idx in topic_idxs: words = np.argsort(-W[:, topic_idx]) print("topic #", topic_idx, ":", file=f) str_words = [" " + str(vocab[i]) + ":" + str(W[i, topic_idx]) for i in words] print("\n".join(str_words), file=f) if __name__ != "__main__": ml.rcdefaults() # cбрасываем настройки на "по умолчанию" ml.rcParams["font.family"] = "fantasy" ml.rcParams["font.fantasy"] = "Times New Roman", "Ubuntu", "Arial", "Tahoma", "Calibri" else: import os from os.path import join from data import load_uci ml.rcdefaults() # cбрасываем настройки на "по умолчанию" ml.rcParams["font.family"] = "fantasy" ml.rcParams["font.fantasy"] = "Times New Roman", "Ubuntu", "Arial", "Tahoma", "Calibri" cfg = config.load() _, vocab = load_uci(cfg["data_name"], cfg) W = np.loadtxt(join(cfg["result_dir"], cfg["experiment"] + "_W.csv")) res = plot_matrix(W, u"Распределение слов в темах", vocab=vocab) filename = join(cfg["result_dir"], cfg["experiment"] + "_W.pdf") plt.savefig(filename, format="pdf") plt.show()