import sys import summarisation if len(sys.argv) < 2: print "usage: python read.py filename" quit() filename = sys.argv[1] sentences = summarisation.summarise(filename, debug_output=False) print summarisation.outputspl(sentences) # print sentences
"cd stanford-corenlp && java -cp stanford-corenlp-3.2.0.jar:stanford-corenlp-3.2.0-models.jar:xom.jar:joda-time.jar:jollyday.jar -Xmx3g edu.stanford.nlp.pipeline.StanfordCoreNLP -file " + filename) sentences, coref = splitAndParse.splitSentencesAndParse("stanford-corenlp/" + filename + ".xml") order = range(len(sentences)) random.shuffle(order) def cutoff_words(order, sentences, wc): r = [] w = 0 for k in order: sentence = sentences[k] swc = summarisation.words(sentence) if swc + w > wc: r.append((k, summarisation.cut_words(sentence, wc - w))) w = wc else: r.append((k, sentence)) w += swc if w == wc: break r.sort() return [rr[1] for rr in r] print summarisation.outputspl(cutoff_words(order, sentences, 200))