def train(): train, test = d.get_data(General.TRAIN_SET, General.TEST_SET) clf = BaseMethod(train, **SubjectivityFeatures.CLASSIFIER) #clf = Combined(SubjectivityFeatures.CLASSIFIER, PolarityFeatures.CLASSIFIER, train) print "Finished training in", "%.2f" % (time.time()-start_time), "sec" return clf
f = open("results.txt", "w") f.write("Generated at: " + str(strftime("%Y-%m-%d %H:%M")) + "\n") f.write("Performed SVM grid search in %0.3fs\n" % (time() - t0)) f.write("Best grid search CV score: {:0.3f}\n".format(100*grid.best_score_)) f.write("Best parameters set:\n") best_parameters = grid.best_estimator_.get_params() for param_name in sorted(parameters.keys()): f.write("\t%s: %r\n" % (param_name, best_parameters[param_name])) cat_score = defaultdict(list) for s in grid.grid_scores_: for cat in s.parameters.keys(): cat_score[cat + "=" + str(s.parameters[cat])].append(s.cv_validation_scores.mean()*100) f.write("\nEfficient params:\n") cat_score = {key: sum(val)/len(val) for key, val in cat_score.items()} for k, v in sorted(cat_score.items(), key=itemgetter(0), reverse=True): f.write("\t" + k + "\t" + str(v) + "\n") f.write("\nParam scores:\n") for s in grid.grid_scores_: f.write(str.format("{0:.3f}", s.cv_validation_scores.mean()*100) + "\t" + str.format("{0:.3f}", s.cv_validation_scores.std()*100) + "\t" + str(s.parameters) + "\n") f.close() if __name__ == '__main__': train, test = d.get_data("../data/all.tsv", General.TEST_SET) grid_search(SubjectivityFeatures.CLASSIFIER['clf'], SubjectivityFeatures.CLASSIFIER['feature_union'], train[:, 0], train[:, 1])