def process_arguments(self): parser = argparse.ArgumentParser(description='Classifier meta-parameter optimization') parser.add_argument('train', help='Train dataset') parser.add_argument('test', help='Test dataset') parser.add_argument('model', help='File to save best model') parser.add_argument('scores', default=None, help='File to save scores of tested models') parser.add_argument('-t', '--type', default='grid', choices=['grid', 'random', 'pso'], help='Search type') parser.add_argument('-i', '--iterations', default=self.iterations, type=int, help='Iterations amount for pso and random search') parser.add_argument('-j', '--jobs', default=-1, type=int, help='Processes amount for learning') args = parser.parse_args() trainData, trainLabel = loadDataset(args.train) testData, testLabel = loadDataset(args.test) self.initialize_optimizer(args.type, args.model, trainData, trainLabel, testData, testLabel, args.jobs, args.iterations, args.scores)
classifiers = [ ('LogisticRegression', LogisticRegression), # ('SVC', SVC), # ('NuSVC', NuSVC), ('RandomForestClassifier', RandomForestClassifier), ] for name, cl in classifiers: c = cl() print name test_classifier(c, trainData, trainLabel, testData, testLabel) def test_svc(trainData, trainLabel, testData, testLabel): c = NuSVC(nu=0.05) test_classifier(c, trainData, trainLabel, testData, testLabel) if __name__ == '__main__': import sys if len(sys.argv) < 4: print 'USAGE:\n\t' + sys.argv[0] + ' train.csv test.csv classifier.pkl' sys.exit(1) trainX, trainY = loadDataset(sys.argv[1]) testX, testY = loadDataset(sys.argv[2]) # test_svc(trainX, trainY, testX, testY) # test_bunch_of_classifiers(trainX, trainY, testX, testY) cl = RandomForestClassifier() # cl = LogisticRegression() cl = test_classifier(cl, trainX, trainY, testX, testY) joblib.dump(cl, sys.argv[3])
r.fit(trainData, trainLabel) print("Best parameters set found on development set:") print(r.best_estimator_) print("Grid scores on development set:") for params, mean_score, scores in r.grid_scores_: print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() / 2, params)) return r if __name__ == '__main__': import sys trainData, trainLabel = loadDataset(sys.argv[1]) testData, testLabel = loadDataset(sys.argv[2]) classifier = SVC() param_grid = [ # {'C': [50, 300, 1000, 2500, 5000], 'kernel': ['linear']}, {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']}, ] # gridSearch = gridSearch(classifier, param_grid, trainData, trainLabel) # clf = gridSearch.best_estimator_ # rs = randomizedSearch(classifier, trainData, trainLabel) # clf = rs.best_estimator_ po = pso_svc_optimization(trainData, trainLabel)