Python SimpleSummarizer.SimpleSummarizer примеры использования

Язык программирования: Python

Пространство имен/Пакет: summarize

Класс/Тип: SimpleSummarizer

Метод/Функция: SimpleSummarizer

Примеров на hotexamples.com: 2

Python SimpleSummarizer.SimpleSummarizer - 2 примера найдено. Это лучшие примеры Python кода для summarize.SimpleSummarizer.SimpleSummarizer, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

SimpleSummarizer(2)

get_summarized(1)

summarize(1)

Пример #1

Показать файл

                  action="store",
                  default=5,
                  help="Specifies the number of sentences to write")

(options, args) = parser.parse_args()

if options.id:
    query = 'id:%s' % (options.id)
elif options.wiki:
    query = "host:'%s' AND ns:0" % (options.wiki)
else:
    raise Exception('A wiki  or ID is required, passed as host name')

conn = SolrConnection('http://search-s10.prod.wikia.net:8983/solr')

response = conn.query(
    query, fields=['html_en', 'nolang_txt', 'html', 'title', 'title_en', 'id'])
paginator = SolrPaginator(response)

summarizer = SimpleSummarizer()

for page in paginator.page_range:
    for doc in paginator.page(page).object_list:
        text = doc.get('html_en', doc.get('nolang_txt', doc.get('html')))
        title = doc.get('title_en', doc.get('title', doc['id']))
        summed = summarizer.get_summarized(text, options.num_sents)
        print "\t\t=======", title, "======="
        print "\t" + "\n\t".join(
            [sent for sent in summed if not sent.startswith('Contents')])
        print "\t\t====================================="

Пример #2

Показать файл

Файл: main.py Проект: xujinhui2015/py-nltk-dev

def run(path):
	global fp

	# load article text
	article = data.Article(path)
	utils.load_data(article.text)

	fp = file("results.txt", "w")

	# show article text
	print_to_screen_and_file("-"*80)
	print_to_screen_and_file("Original article:\n")
	print_to_screen_and_file(article.text)
	print_to_screen_and_file("-"*80)
	
	print_to_screen_and_file("Categories:\n")
	top5 = pickle.load(open(config.TOP5_CATEGORIES, "r")); # list of: [catname, count, tag]
	print_to_screen_and_file("In article: " + str(article.cats))
	print_to_screen_and_file("Top5: " + str(top5))
	ground_truth = [tag for cat, count, tag in top5 if cat in article.cats]
	print_to_screen_and_file("Present from Top5: " + str(ground_truth))
	print_to_screen_and_file("-"*80)

	# make the summary & show in console
	print_to_screen_and_file("I Summary:\n")
	
	instance = SimpleSummarizer()
	# shorten the original article by one third
	print_to_screen_and_file(instance.summarize(article.text, len(utils.sentences) / 3))
	print_to_screen_and_file("-"*80)

	print_to_screen_and_file("II Summary:\n")
	print_to_screen_and_file(" ".join(ph_reduction.PhraseReductor().find(utils.tagged_sentences)))
	print_to_screen_and_file("-"*80)
	
	# classification
	print_to_screen_and_file("Multiclass classification:\n")
	stemmer = nltk.stem.WordNetLemmatizer()
	words = nltk.tokenize.wordpunct_tokenize(article.text)
	feats = utils.bag_of_words(words, article.text, stemmer)
	
	classifier = pickle.load(file(config.BAYES_CLASSIFIER_FILE, 'r'))
	b_class = classifier.classify(feats)
	print_to_screen_and_file("BayesClassifier class: " + b_class + ", is correct? " + str(b_class in ground_truth))
	
	classifier = pickle.load(file(config.MAXENT_CLASSIFIER_FILE, 'r'))
	m_class = classifier.classify(feats)
	print_to_screen_and_file("MaxEntClassifier class: " + m_class + ", is correct? " + str(m_class in ground_truth))
	
	classifier = pickle.load(file(config.DTREE_CLASSIFIER_FILE, 'r'))
	d_class = classifier.classify(feats)
	print_to_screen_and_file("DecisionTreeClassifier class: " + d_class + ", is correct? " + str(d_class in ground_truth))
	print_to_screen_and_file("-"*80)
	
	print_to_screen_and_file("Binary classification:\n")
	title = ["BayesClassifier: ", "MaxEntClassifier: ", "DecisionTreeClassifier: "]
	classifiers = [config.BAYES_CLASSIFIER_FILE_PATTERN, config.MAXENT_CLASSIFIER_FILE_PATTERN, config.DTREE_CLASSIFIER_FILE_PATTERN]
	tags = ["A", "B", "C", "D", "E", "OTHER"]
	for index, typename in enumerate(classifiers):
		results = {}
		accuracy = 0
		for tag in tags:
			fname = typename%(tag)
			classifier = pickle.load(file(fname, 'r'))
			results[tag] = classifier.classify(feats)
			if results[tag] == "yes":
				if (tag in ground_truth): accuracy += 1
			elif results[tag] == "no":
				if (tag not in ground_truth): accuracy += 1
			
		print_to_screen_and_file(title[index] + str(results)+", accuracy: " + str(accuracy*100/len(tags)) + "%")
	print_to_screen_and_file("-"*80)

	# people actions
	print_to_screen_and_file("People and their actions:\n")
	work = action.Actions().find(utils.tagged_words, utils.tagged_sentences, utils.people)
	# print the updated info with people actions
	for i, (key, value) in enumerate(work.items()):
		print_to_screen_and_file("[%d] - %s = %s"%(i+1, key, value))
	print_to_screen_and_file("-"*80)

	# anaphora
	print_to_screen_and_file("Anaphoras:\n")
	refs = references.References().find(utils.people, utils.sentences, utils.tagged_sentences)
	for ref, fullname, index in refs:
		print_to_screen_and_file("Sentence["+str(index+1)+"]: " + ref + " - "+ fullname)
	print_to_screen_and_file("-"*80)

	# interactions
	print_to_screen_and_file("People interactions:\n")
	inter = interactions.Interactor().find(refs, utils.tagged_sentences)
	for index, item in enumerate(inter):
		who, prp, what = item['who'], item['prp'], item['what']
		s = "["+str(index+1)+"]:"
		for i in xrange(len(who)):
			if prp[i] and who[i]: s += " " + who[i] + "(" + prp[i] + "), "
			elif prp[i]: s += prp[i] + ", "
			elif who[i]: s += " " + who[i] + ", "
		s += " - " + ", ".join(what)
		print_to_screen_and_file(s)

	print_to_screen_and_file("-"*80)
	print "Finished."

	fp.close()