import sys from sklearn.cross_validation import cross_val_score from util import load_validation_data from sklearn.naive_bayes import GaussianNB from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier, RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.linear_model import LogisticRegression, SGDClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import LinearSVC if __name__ == "__main__": # Get training data Xt, Yt, Xunused = load_validation_data() # Cross validation, 5-fold cvf = 5 # Initialize classifiers classifiers = { "Naive Bayes" : GaussianNB(), "Gradient Boost" : GradientBoostingClassifier(), "Adaboost" : AdaBoostClassifier(DecisionTreeClassifier(max_depth=1)), "Decision Tree" : DecisionTreeClassifier(), "Extra Random Trees" : ExtraTreesClassifier(n_estimators=300), "Logistic Regression" : LogisticRegression(), "K-Nearest-Neighbors" : KNeighborsClassifier(), "SGD" : SGDClassifier(), "SVM" : LinearSVC(),
verbose=True, weightdecay=0.01) trainer.trainUntilConvergence() #trainer.trainEpochs(5) print "trained" #trainer.trainEpochs(5) # Return a functor that wraps calling predict return NeuralNetworkClassifier(trainer) if __name__ == "__main__": # First obtain our training and testing data # Training has 50K samples, Testing 100K Xt, Yt, Xv = load_validation_data() # Run Neural Network over training data classifier = classify(Xt, Yt) # Prepare validation data and predict tstdata = ClassificationDataSet(Xv.shape[1], 1, nb_classes=2) tstdata.setField('input', Xv) tstdata._convertToOneOfMany() # one output neuron per class predictions = classifier.predict(tstdata) # Write prediction to file write_test_prediction("out_nn.txt", np.array(majority))
# build neural net and train it net = buildNetwork(trndata.indim, n_hidden, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) trainer.trainUntilConvergence() #trainer.trainEpochs(5) print "trained" #trainer.trainEpochs(5) # Return a functor that wraps calling predict return NeuralNetworkClassifier(trainer) if __name__ == "__main__": # First obtain our training and testing data # Training has 50K samples, Testing 100K Xt, Yt, Xv = load_validation_data() # Run Neural Network over training data classifier = classify(Xt, Yt) # Prepare validation data and predict tstdata = ClassificationDataSet(Xv.shape[1], 1, nb_classes=2) tstdata.setField('input', Xv) tstdata._convertToOneOfMany() # one output neuron per class predictions = classifier.predict(tstdata) # Write prediction to file write_test_prediction("out_nn.txt", np.array(majority))
""" gbc = GradientBoostingClassifier(verbose=1) parameters = {'max_depth' : range(3,11),'n_estimators' : [400,500]} classifier = GridSearchCV(gbc, parameters, scoring=metric) classifier.fit(Xtrain, Ytrain) return classifier if __name__ == "__main__": # Let's take our training data and train a decision tree # on a subset. Scikit-learn provides a good module for cross- # validation. Xt, Xv, Yt, Yv = get_split_training_dataset() Classifier = train(Xt, Yt) print "Gradient Boost Classifier" suite(Yv, Classifier.predict(Xv)) # smaller feature set Xtimp, features = fclassify.get_important_data_features(Xt, Yt) Xvimp = fclassify.compress_data_to_important_features(Xv, features) ClassifierImp = train(Xtimp,Yt) print "Gradient Boosts Classiifer, 25 important features" suite(Yv, ClassifierImp.predict(Xvimp)) # save predictions on test data X, Y, validation_data = load_validation_data() predictions = Classifier.predict(validation_data) filename = 'gradient_boost_predictions.txt' write_test_prediction(filename, np.array(predictions))
predictions = classifier.predict(Xv) print "Neural Net Test Accuracy:", acc(Yv, predictions), "%" if __name__ == "__main__": if len(sys.argv) < 2: training = '../data/raw/phy_train.dat' print "Usage: $ python neural_network.py /path/to/data/file/" print "Using default data file:", training else: training = sys.argv[1] impute_data = False # load data from file, imputing data and/or removing some features if requested, # then shuffle and split into test and validation X, Y, test_data = load_validation_data() if impute_data: X = remove_features_missing_data(X) test_data = remove_features_missing_data(test_data) Xt, Xv, Yt, Yv = shuffle_split(X, Y) # get the top features, running in parallel children = [] for n_features in [20]: '''for n_features in [23, 21, 19, 17]: children.append(os.fork()) if children[-1]: continue''' X, features = get_important_data_features(X, Y, max_features=n_features)
print "Neural Net Train Accuracy:",acc(Yt, predictions),"%" predictions = classifier.predict(Xv) print "Neural Net Test Accuracy:",acc(Yv, predictions),"%" if __name__ == "__main__": if len(sys.argv) < 2: training = '../data/raw/phy_train.dat' print "Usage: $ python neural_network.py /path/to/data/file/" print "Using default data file:", training else: training = sys.argv[1] impute_data = False # load data from file, imputing data and/or removing some features if requested, # then shuffle and split into test and validation X, Y, test_data = load_validation_data() if impute_data: X = remove_features_missing_data(X) test_data = remove_features_missing_data(test_data) Xt, Xv, Yt, Yv = shuffle_split(X,Y) # get the top features, running in parallel children = [] for n_features in [20]: '''for n_features in [23, 21, 19, 17]: children.append(os.fork()) if children[-1]: continue''' X, features = get_important_data_features(X, Y, max_features=n_features) print X.shape # Do it for test data too...