years_test.append(year) plots_test.append(plots[i]) titles_test.append(titles[i]) dw, wc = P2.all_x_all_y(years_train, plots_train) # 3a. Determine Top 10 words of each decade top_words = get_top_words(wc, train_sample_size, 10) for decade in top_words: print decade, ':', top_words[decade].keys() # 3b. Test classifer without top 100 words top_words = get_top_words(wc, train_sample_size, 100) correct_count_wo = [0.]*bin_num correct_count_w = [0.]*bin_num for i, plot in enumerate(plots_test): predicted_decade_wo, decade_probs_wo = P2.predict_decade(wc, dw, years_train, train_sample_size, plot=plot, skip_words=top_words) predicted_decade_w, decade_probs_w = P2.predict_decade(wc, dw, years_train, train_sample_size, plot=plot) actual_year = years_test[i] for i, decade in enumerate(decade_probs_wo): correct_count_wo[i] += 1. if decade[0] == actual_year else 0. for i, decade in enumerate(decade_probs_w): correct_count_w[i] += 1. if decade[0] == actual_year else 0. print "Accuracy on test (with words): ", correct_count_w[0]/len(plots_test) print "Accuracy on test (without words): ", correct_count_wo[0]/len(plots_test)