def kaggle_submit: trees = rf.raise_forest(Xtrain, ytrain, 100, Xtrain.shape[0], len(attributes)) [ensemble_error, ensemble_pred] = rf.ensemble(Xtest, None, trees) with open ('test_set_predictions.csv', 'wb') as csvfile: writer = csv.writer(csvfile, delimiter = ',', quotechar = '|', quoting = csv.QUOTE_MINIMAL) for i in xrange(Xtest.shape[0]): writer.writerow([str(i), str(ensemble_pred[i])])
sets = range(k) del(sets[j]) #train on k-1 sets, test on last set training_block = numpy.vstack(([partitions[i] for i in sets])) training_label_block = numpy.vstack(([label_partitions[i] for i in sets])) test_block = partitions[j] test_label_block = label_partitions[j] train_examples = zip(training_block, training_label_block.T.tolist()[0]) test_examples = zip(test_block, test_label_block.T.tolist()[0]) #cross validation for random forest print "Cross Validating Random Forest..." train_size = int(training_block.shape[0]) att_size = int(len(attributes)) forest_size = 100 [ensemble_error, ensemble_pred] = rf.ensemble(test_block, test_label_block, rf.raise_forest(training_block,training_label_block, forest_size, train_size, att_size)) error[0]+= (1.0/k) * ensemble_error #cross validation for decision tree print "Cross Validating Decision Tree..." dec_tree = id3.id3(train_examples, attributes) dec_tree_errors = 0 for i in xrange(len(test_block)): if id3.classify(dec_tree, test_block[i]) != test_label_block[i]: dec_tree_errors += 1 error[1] += (1.0/k) * (float(dec_tree_errors) / set_size) print "Cross Validating AdaBoost..." adaboost_classifier = adaboost.adaboost(train_examples, adaboost_rounds) adaboost_errors = 0 for i in xrange(len(test_block)):