Exemplo n.º 1
0
			years_test.append(year)
			plots_test.append(plots[i])
			titles_test.append(titles[i])
	dw, wc = P2.all_x_all_y(years_train, plots_train)

	# 3a. Determine Top 10 words of each decade
	top_words = get_top_words(wc, train_sample_size, 10)
	for decade in top_words:
		print decade, ':', top_words[decade].keys()

	# 3b. Test classifer without top 100 words
	top_words = get_top_words(wc, train_sample_size, 100)
	correct_count_wo = [0.]*bin_num
	correct_count_w = [0.]*bin_num
	for i, plot in enumerate(plots_test):
		predicted_decade_wo, decade_probs_wo = P2.predict_decade(wc, dw, years_train, train_sample_size, plot=plot, skip_words=top_words)
		predicted_decade_w, decade_probs_w = P2.predict_decade(wc, dw, years_train, train_sample_size, plot=plot)

		actual_year = years_test[i]
		for i, decade in enumerate(decade_probs_wo):
			correct_count_wo[i] += 1. if decade[0] == actual_year else 0.
		for i, decade in enumerate(decade_probs_w):
			correct_count_w[i] += 1. if decade[0] == actual_year else 0.

	print "Accuracy on test (with words): ", correct_count_w[0]/len(plots_test)
	print "Accuracy on test (without words): ", correct_count_wo[0]/len(plots_test)