Exemplo n.º 1
0
    def compute(filename):
        gold_doc = Document(LDATester.PATH + filename + "_gold.txt")
        doc = Document(LDATester.PATH + filename + ".txt")

        ## Get random summary
        indices = [x for x in range(len(doc.sentences))]
        random.shuffle(indices)
        indices = indices[0 : len(gold_doc.sentences)]
        sentences = [doc.sentences[i] for i in indices]
        calibration = [doc.getSentenceOrginal(sentence) for sentence in sentences]
        calibration = " ".join(calibration)
        return BLEU.computeNormalize(gold_doc.document, calibration)
Exemplo n.º 2
0
print 'Conditional Test previous'
print
## Test Conditional Frequency Distribution Previous
print 'most common word to follow Start', doc.cfdistPrev['Start'].max() ## most common word after Start
print 'most common word after',doc.freq_dist.max(),doc.cfdistPrev[doc.freq_dist.max()].max() ## most common word after long

print 'Conditional Test after'
print

## Test Conditional Frequency Distribution Next
print 'most common word to precede End', doc.cfdistNext['End'].max() ## most common word after Start
print 'most common word before',doc.freq_dist.max(),doc.cfdistNext[doc.freq_dist.max()].max() ## most common word after long

## get closest sentences to doc freq dist. WE WANT LDA DIST
sent = doc.setencesByFreqCloseness()
print '1', doc.getSentenceOrginal(sent[0])
print '2', doc.getSentenceOrginal(sent[1])
print '3', doc.getSentenceOrginal(sent[2])
print '4', doc.getSentenceOrginal(sent[3])
print '5', doc.getSentenceOrginal(sent[4])

with open(str(filename) + '_output.txt', 'w') as file:
    for i in [0,1,2,3,4]:
        file.write(doc.getSentenceOrginal(sent[i]) + ' ')
print ' '


## LDA

from collections import Counter
## get MAP sentences by lda topic i
Exemplo n.º 3
0
from Project import Document
from collections import Counter
import numpy as np
# filename = raw_input("file name ")
filename = 'economist1'
doc = Document(filename + '.txt')

## Get key sentence
sent = doc.setencesByFreqCloseness()
maxSent = sent[0]
print doc.getSentenceOrginal(maxSent)



doc.getLDA(5)
topicAndScore = doc.getTopicAndScore()
maxTopic, maxScore = topicAndScore[maxSent]
print topicAndScore[maxSent]

sentByTopics = {}

for key in topicAndScore:
    
    value = topicAndScore[key]
    topic = value[0]
    if topic in sentByTopics:

        sentByTopics[topic] += [key]
    else:

        sentByTopics[topic] = [key]