def load_lda_parameters(mdl_cfg):
    
    dictionary_file = mdl_cfg['CORPUS']['dict_file']
    path_index_file = mdl_cfg['CORPUS']['path_index_file']
    lda_mdl_file = mdl_cfg['LDA']['lda_model_file']
    lda_cos_index_file = mdl_cfg['LDA']['lda_cos_index_file']
    
    if nexists(dictionary_file) and nexists(path_index_file):       
        lda_file_path_index = load_file_paths_index(path_index_file)
        lda_dictionary = load_dictionary(dictionary_file)
        
    if nexists(lda_mdl_file) and nexists(lda_cos_index_file): 
        lda_mdl, lda_index = load_lda_variables(lda_mdl_file, lda_cos_index_file)
        
    lda_theta_file = mdl_cfg['LDA']['lda_theta_file']
    lda_theta = np.loadtxt(lda_theta_file) # loads the LDA theta from the model theta file 
    num_docs, num_topics = lda_theta.shape
    min_lda_theta = np.min(np.min(lda_theta))
    print 'LDA-theta is loaded: # of documents:', num_docs, \
        '# of topics:', num_topics, 'min(Theta):', min_lda_theta  
    
    lda_beta_file = mdl_cfg['LDA']['lda_beta_file']
    lda_beta = np.loadtxt(lda_beta_file) # loads the LDA theta from the model theta file 
    num_topics, vocab_size = lda_beta.shape
    min_lda_beta = np.min(np.min(lda_beta))
    print 'LDA-beta is loaded: # of topics:', num_topics, \
        '# of terms in the vocabulary:', vocab_size, \
        'min(Bheta):', min_lda_beta
    print     
    
    return lda_dictionary, lda_mdl, lda_index, lda_file_path_index, lda_theta, lda_beta
示例#2
0
def load_tm(mdl_cfg):
    
    dictionary_file = mdl_cfg['CORPUS']['dict_file']
    path_index_file = mdl_cfg['CORPUS']['path_index_file']
    lda_mdl_file = mdl_cfg['LDA']['lda_model_file']
    lda_cos_index_file = mdl_cfg['LDA']['lda_cos_index_file']
    
    if nexists(dictionary_file) and nexists(path_index_file):       
        lda_file_path_index = load_file_paths_index(path_index_file)
        lda_dictionary = load_dictionary(dictionary_file)
        
    if nexists(lda_mdl_file) and nexists(lda_cos_index_file): 
        lda_mdl, lda_index = load_lda_variables(lda_mdl_file, lda_cos_index_file)
        
    return lda_dictionary, lda_mdl, lda_index, lda_file_path_index