Пример #1
0
def main():
	#inputs = ['ip1.txt','ip2.txt']
	#inputs = ['ip3.txt','ip4.txt']
	#inputs = ['sachin1.txt']
	#inputs = ['mal1.txt']
	inputs = ['ip5.txt','ip6.txt','ip7.txt']
	no_of_clusters = int(sys.argv[1])
	doc = Document(inputs,no_of_clusters)
	count = 0
	print "Number of Sentences :"
	print len(doc.sentences)
	#print doc.sent_no_swords
	#print len(doc.sent_no_swords)
	'''

	print "Initial cluster sentences:"

	for i in range(len(doc.clusters)):
		print doc.clusters[i][0],
	'''	

	print "Selecting sentence from each cluster..."
	doc.cluster_vector()
	doc.find_clust_similar_sent()
	#print ""
	#print "Cluster sentences:\n"
	#print doc.clust_sentences

	#print "Assigning weights to cluster sentences:"
	#doc.select_cluster_sentences()

	

	#doc.printclust_sentences()
	#doc.print_rogue_clust_sentences()
	print "Ordering...."
	for input_file in inputs:
		count = count +1
	if count == 1:
		doc.print_sent_ordered()

	#ordering
	

	first = ordering.precedence_ordering(doc,doc.clust_sentences)

	tempv = doc.clust_sentences[0]
	doc.clust_sentences[0] = doc.clust_sentences[first]
	doc.clust_sentences[first] = tempv

	ordered_sentences=ordering.similarity_ordering(doc,doc.clust_sentences)
	#print doc.clust_sentences,ordered_sentences

	#****exchange 1st sentence in the cluster with first


	for i in ordered_sentences:
		print doc.sentences[i].lstrip().capitalize(),". ",
Пример #2
0
def main():
    #inputs = ['ip1.txt','ip2.txt']
    #inputs = ['ip3.txt','ip4.txt']
    #inputs = ['sachin1.txt']
    #inputs = ['mal1.txt']
    inputs = ['ip5.txt', 'ip6.txt', 'ip7.txt']
    no_of_clusters = int(sys.argv[1])
    doc = Document(inputs, no_of_clusters)
    count = 0
    print "Number of Sentences :"
    print len(doc.sentences)
    #print doc.sent_no_swords
    #print len(doc.sent_no_swords)
    '''

	print "Initial cluster sentences:"

	for i in range(len(doc.clusters)):
		print doc.clusters[i][0],
	'''

    print "Selecting sentence from each cluster..."
    doc.cluster_vector()
    doc.find_clust_similar_sent()
    #print ""
    #print "Cluster sentences:\n"
    #print doc.clust_sentences

    #print "Assigning weights to cluster sentences:"
    #doc.select_cluster_sentences()

    print "Initial Summary"
    doc.printclust_sentences()
    #doc.print_rogue_clust_sentences()
    print "Ordering...."
    for input_file in inputs:
        count = count + 1
    if count == 1:
        doc.print_sent_ordered()

    #ordering

    print "Summary after ordering"
    first = ordering.precedence_ordering(doc, doc.clust_sentences)

    tempv = doc.clust_sentences[0]
    doc.clust_sentences[0] = doc.clust_sentences[first]
    doc.clust_sentences[first] = tempv

    ordered_sentences = ordering.similarity_ordering(doc, doc.clust_sentences)
    #print doc.clust_sentences,ordered_sentences

    #****exchange 1st sentence in the cluster with first

    for i in ordered_sentences:
        print doc.sentences[i].lstrip().capitalize(), ". ",
Пример #3
0
def summarize(inpdir,no_of_clusters,task):
	doc = Document(ques_root_directory+inpdir,no_of_clusters)
	print "Number of Sentences :"
	print len(doc.sentences)
	#print doc.sent_no_swords
	#print len(doc.sent_no_swords)

	print "Initial cluster sentences:"

	for i in range(len(doc.clusters)):
		print doc.clusters[i][0],

	#doc.printinit_clust()


	doc.cluster_vector()
	doc.find_clust_similar_sent()
	print ""
	print "Simi based cluster sentences:"
	print doc.clust_sentences
	doc.printclust_sentences()


	print "###"
	'''

	print "weight cluster sentences:"
	doc.select_cluster_sentences()
	print doc.clust_sentences
	doc.printclust_sentences()
	'''

	'''
	print "document cluster sentences:"
	doc.clust_doc_sent()
	print doc.clust_sentences
	doc.printclust_sentences()
	'''

	#doc.printclust_sentences()
	#doc.print_rogue_clust_sentences()
	print "Ordering...."

	'''
	for input_file in inputs:
		count = count +1
	if count == 1:
		doc.print_sent_ordered()
	'''

	#ordering
	

	first = ordering.precedence_ordering(doc,doc.clust_sentences)

	tempv = doc.clust_sentences[0]
	doc.clust_sentences[0] = doc.clust_sentences[first]
	doc.clust_sentences[first] = tempv

	ordered_sentences=ordering.similarity_ordering(doc,doc.clust_sentences)
	#print doc.clust_sentences,ordered_sentences

	#****exchange 1st sentence in the cluster with first

	print ""
	print "SUMMARY of",no_of_clusters," :"

	for i in ordered_sentences:
		print doc.sentences[i],". "
	



	#doc.print_rogue_clust_sentences()
	print("writing op of task "+str(task))
	doc.write_rogue_clust_sentences(sys_dir,task)