def compute(filename): gold_doc = Document(LDATester.PATH + filename + "_gold.txt") doc = Document(LDATester.PATH + filename + ".txt") ## Get random summary indices = [x for x in range(len(doc.sentences))] random.shuffle(indices) indices = indices[0 : len(gold_doc.sentences)] sentences = [doc.sentences[i] for i in indices] calibration = [doc.getSentenceOrginal(sentence) for sentence in sentences] calibration = " ".join(calibration) return BLEU.computeNormalize(gold_doc.document, calibration)
print 'Conditional Test previous' print ## Test Conditional Frequency Distribution Previous print 'most common word to follow Start', doc.cfdistPrev['Start'].max() ## most common word after Start print 'most common word after',doc.freq_dist.max(),doc.cfdistPrev[doc.freq_dist.max()].max() ## most common word after long print 'Conditional Test after' print ## Test Conditional Frequency Distribution Next print 'most common word to precede End', doc.cfdistNext['End'].max() ## most common word after Start print 'most common word before',doc.freq_dist.max(),doc.cfdistNext[doc.freq_dist.max()].max() ## most common word after long ## get closest sentences to doc freq dist. WE WANT LDA DIST sent = doc.setencesByFreqCloseness() print '1', doc.getSentenceOrginal(sent[0]) print '2', doc.getSentenceOrginal(sent[1]) print '3', doc.getSentenceOrginal(sent[2]) print '4', doc.getSentenceOrginal(sent[3]) print '5', doc.getSentenceOrginal(sent[4]) with open(str(filename) + '_output.txt', 'w') as file: for i in [0,1,2,3,4]: file.write(doc.getSentenceOrginal(sent[i]) + ' ') print ' ' ## LDA from collections import Counter ## get MAP sentences by lda topic i
from Project import Document from collections import Counter import numpy as np # filename = raw_input("file name ") filename = 'economist1' doc = Document(filename + '.txt') ## Get key sentence sent = doc.setencesByFreqCloseness() maxSent = sent[0] print doc.getSentenceOrginal(maxSent) doc.getLDA(5) topicAndScore = doc.getTopicAndScore() maxTopic, maxScore = topicAndScore[maxSent] print topicAndScore[maxSent] sentByTopics = {} for key in topicAndScore: value = topicAndScore[key] topic = value[0] if topic in sentByTopics: sentByTopics[topic] += [key] else: sentByTopics[topic] = [key]