def load_lda_parameters(mdl_cfg): dictionary_file = mdl_cfg['CORPUS']['dict_file'] path_index_file = mdl_cfg['CORPUS']['path_index_file'] lda_mdl_file = mdl_cfg['LDA']['lda_model_file'] lda_cos_index_file = mdl_cfg['LDA']['lda_cos_index_file'] if nexists(dictionary_file) and nexists(path_index_file): lda_file_path_index = load_file_paths_index(path_index_file) lda_dictionary = load_dictionary(dictionary_file) if nexists(lda_mdl_file) and nexists(lda_cos_index_file): lda_mdl, lda_index = load_lda_variables(lda_mdl_file, lda_cos_index_file) lda_theta_file = mdl_cfg['LDA']['lda_theta_file'] lda_theta = np.loadtxt(lda_theta_file) # loads the LDA theta from the model theta file num_docs, num_topics = lda_theta.shape min_lda_theta = np.min(np.min(lda_theta)) print 'LDA-theta is loaded: # of documents:', num_docs, \ '# of topics:', num_topics, 'min(Theta):', min_lda_theta lda_beta_file = mdl_cfg['LDA']['lda_beta_file'] lda_beta = np.loadtxt(lda_beta_file) # loads the LDA theta from the model theta file num_topics, vocab_size = lda_beta.shape min_lda_beta = np.min(np.min(lda_beta)) print 'LDA-beta is loaded: # of topics:', num_topics, \ '# of terms in the vocabulary:', vocab_size, \ 'min(Bheta):', min_lda_beta print return lda_dictionary, lda_mdl, lda_index, lda_file_path_index, lda_theta, lda_beta
def load_tm(mdl_cfg): dictionary_file = mdl_cfg['CORPUS']['dict_file'] path_index_file = mdl_cfg['CORPUS']['path_index_file'] lda_mdl_file = mdl_cfg['LDA']['lda_model_file'] lda_cos_index_file = mdl_cfg['LDA']['lda_cos_index_file'] if nexists(dictionary_file) and nexists(path_index_file): lda_file_path_index = load_file_paths_index(path_index_file) lda_dictionary = load_dictionary(dictionary_file) if nexists(lda_mdl_file) and nexists(lda_cos_index_file): lda_mdl, lda_index = load_lda_variables(lda_mdl_file, lda_cos_index_file) return lda_dictionary, lda_mdl, lda_index, lda_file_path_index