def summaryGenerator(class_name, tweets, folder_mode, ranker): tweets=tweetCleaner(tweets) #Some cleaning #print "Set of Tweets=>", len(tweets) #tweetlist=[tweet for tweet in tweets] #print "List of tweets", tweetlist genSentences=wg.retrieveNewSentences(tweets, stopwords) wordScores=createDict(mainDatafolder+"/"+folder_mode['Extract']+"/"+class_name+"/"+class_name+"_weight.txt") #emptysentences=[sent for sent in genSentences if len(sent.strip())==0] #print "EMPTY::::", len(emptysentences) ''' This is where the ILP works to select the best sentences and form the summary ''' finalSentencesRetained=wg.solveILP(genSentences,wordScores, lm, stopwords, ranker, intraGenSimThreshold=0.25, l_max=200 ) return finalSentencesRetained
def summaryGenerator(class_name, tweets, folder_mode, ranker): tweets = tweetCleaner(tweets) #Some cleaning #print "Set of Tweets=>", len(tweets) #tweetlist=[tweet for tweet in tweets] #print "List of tweets", tweetlist genSentences = wg.retrieveNewSentences(tweets, stopwords) wordScores = createDict(mainDatafolder + "/" + folder_mode['Extract'] + "/" + class_name + "/" + class_name + "_weight.txt") #emptysentences=[sent for sent in genSentences if len(sent.strip())==0] #print "EMPTY::::", len(emptysentences) ''' This is where the ILP works to select the best sentences and form the summary ''' finalSentencesRetained = wg.solveILP(genSentences, wordScores, lm, stopwords, ranker, intraGenSimThreshold=0.25, l_max=200) return finalSentencesRetained
#for text in v: origSentences.append(text) text=re.sub(p, '', text) text=text.strip().split('./PUNCT') for r in text: if len(r.strip())!=0: tweets.append(r.strip()+' ./PUNCT') tweets=set(tweets) #print len(tweets), tweets genSentences=WGGraph.retrieveNewSentences(tweets, english_postagger, stopwords) gengroupList.append(genSentences) print "Done with ", clustNum print gengroupList print 'Num of clusters', len(gengroupList) finalSentencesRetained=WGGraph.solveILP(gengroupList,lm, stopwords, origSentences, intraGenSimThreshold=0.2, l_max=10) txtSummary="" for sent in finalSentencesRetained: sent=sentenceCapitalize(sent) txtSummary=txtSummary+"\n"+sent #print 'Number of tweets', len(tweets) , tweets txtSummary=txtSummary.strip() writeAbstracts.write("==========="+eventfile+"~"+str(i)+":========\n"+txtSummary+"\n\n") writeAbstracts.close()
for r in text: if len(r.strip()) != 0: tweets.append(r.strip() + ' ./PUNCT') tweets = set(tweets) #print len(tweets), tweets genSentences = WGGraph.retrieveNewSentences( tweets, english_postagger, stopwords) gengroupList.append(genSentences) print "Done with ", clustNum print gengroupList print 'Num of clusters', len(gengroupList) finalSentencesRetained = WGGraph.solveILP(gengroupList, lm, stopwords, origSentences, intraGenSimThreshold=0.2, l_max=10) txtSummary = "" for sent in finalSentencesRetained: sent = sentenceCapitalize(sent) txtSummary = txtSummary + "\n" + sent #print 'Number of tweets', len(tweets) , tweets txtSummary = txtSummary.strip() writeAbstracts.write("===========" + eventfile + "~" + str(i) + ":========\n" + txtSummary + "\n\n") writeAbstracts.close()