path_test='20news-bydate-test'

#perprocess the test data
test,test_label=ps.data_perprocess(path_test)
#get test word-document matrix 
corpus_test=ps.corpus(dictionary,test)
test=ps.word_document(corpus_test,token_id)

#calculate ppl for test
#load the rsm_1 model from disk
result=dsl.load('result/rsm_result_5')
w_vh=result['w_vh']
w_v=result['w_v']
w_h=result['w_h']
# return the perplexity which is to assess the topic model
Eppl_CD5=ppl.rsmppl(w_v,w_h,w_vh,test)
print("Eppl_CD5=",Eppl_CD5)

#load the rsm_1 model from disk
result=dsl.load('result/rsm_result_1')
w_vh=result['w_vh']
w_v=result['w_v']
w_h=result['w_h']
Eppl_CD1=ppl.rsmppl(w_v,w_h,w_vh,test)
print("Eppl_CD1=",Eppl_CD1)


dsl.save(text,'result/text')
dsl.save(train_label,'result/train_label')
dsl.save(dictionary,'result/dictionary')
dsl.save(token_id,'result/token_id')
path_test = '20news-bydate-test'

#perprocess the test data
test, test_label = ps.data_perprocess(path_test)
#get test word-document matrix
corpus_test = ps.corpus(dictionary, test)
test = ps.word_document(corpus_test, token_id)

#calculate ppl for test
#load the rsm_1 model from disk
result = dsl.load('result/rsm_result_5')
w_vh = result['w_vh']
w_v = result['w_v']
w_h = result['w_h']
# return the perplexity which is to assess the topic model
Eppl_CD5 = ppl.rsmppl(w_v, w_h, w_vh, test)
print("Eppl_CD5=", Eppl_CD5)

#load the rsm_1 model from disk
result = dsl.load('result/rsm_result_1')
w_vh = result['w_vh']
w_v = result['w_v']
w_h = result['w_h']
Eppl_CD1 = ppl.rsmppl(w_v, w_h, w_vh, test)
print("Eppl_CD1=", Eppl_CD1)

dsl.save(text, 'result/text')
dsl.save(train_label, 'result/train_label')
dsl.save(dictionary, 'result/dictionary')
dsl.save(token_id, 'result/token_id')
dsl.save(train, 'result/train')
Пример #3
0
    ppl_lda.append(pplt)

ppl_lda = np.array(ppl_lda)
dsl.save(ppl_lda, 'result/ppl_lda')

#load the rsm_5 model from disk
result = dsl.load('result/rsm_result_5')
w_vh = result['w_vh']
w_v = result['w_v']
w_h = result['w_h']

#calculate the ppl of rsm_5 model
ppl_rsm_5 = []
for i in xrange(sample):
    test_sample = test[sample_id[i]]
    pplt = ppl.rsmppl(w_v, w_h, w_vh, test_sample)
    ppl_rsm_5.append(pplt)

ppl_rsm_5 = np.array(ppl_rsm_5)
dsl.save(ppl_lda, 'result/ppl_rsm_5')

#load the rsm_1 model from disk
result = dsl.load('result/rsm_result_1')
w_vh = result['w_vh']
w_v = result['w_v']
w_h = result['w_h']

#calculate the ppl of rsm_1 model
ppl_rsm_1 = []
for i in xrange(sample):
    test_sample = test[sample_id[i]]
    ppl_lda.append(pplt)

ppl_lda=np.array(ppl_lda)
dsl.save(ppl_lda,'result/ppl_lda')

#load the rsm_5 model from disk
result=dsl.load('result/rsm_result_5')
w_vh=result['w_vh']
w_v=result['w_v']
w_h=result['w_h']

#calculate the ppl of rsm_5 model
ppl_rsm_5=[]
for i in xrange(sample):
    test_sample=test[sample_id[i]]
    pplt=ppl.rsmppl(w_v,w_h,w_vh,test_sample)
    ppl_rsm_5.append(pplt)

ppl_rsm_5=np.array(ppl_rsm_5)
dsl.save(ppl_lda,'result/ppl_rsm_5')


#load the rsm_1 model from disk
result=dsl.load('result/rsm_result_1')
w_vh=result['w_vh']
w_v=result['w_v']
w_h=result['w_h']


#calculate the ppl of rsm_1 model
ppl_rsm_1=[]