Пример #1
0
 def test_adaboost(self):
     X, Y = ada.parse_spambase_data("tiny.spam.train")
     Y2 = ada.new_label(self.y)
     trees, weights = ada.adaboost(X, Y2, 2)
     self.assertEqual(len(trees), 2)
     self.assertEqual(len(weights), 2)
     self.assertTrue(
         isinstance(trees[0], sklearn.tree.tree.DecisionTreeClassifier))
     x = np.array([[0, -1], [1, 0], [-1, 0]])
     y = np.array([-1, 1, 1])
     trees, weights = ada.adaboost(x, y, 1)
     h = trees[0]
     pred = h.predict(x)
     for i in range(len(y)):
         self.assertEqual(pred[i], y[i])
Пример #2
0
 def test_adaboost_predict(self):
     x = np.array([[0, -1], [1, 0], [-1, 0]])
     y = np.array([-1, 1, 1])
     trees, weights = ada.adaboost(x, y, 1)
     pred = ada.adaboost_predict(x, trees, weights)
     for i in range(len(y)):
         self.assertEqual(pred[i], y[i])
Пример #3
0
def main():
    filename = sys.argv[1]
    hypothesisFile = sys.argv[2]
    learning_type = sys.argv[3]

    input_data = []
    print("opening training file", filename)
    file = open(filename, "r", encoding="utf8")
    for line in file.readlines():
        input_data.append(line.rstrip())

    training_dataset = get_attributes(input_data)

    attribute_list = [i for i in range(len(training_dataset[0]) - 1)]

    if learning_type == "dt":
        print("Calling decisionTree...")
        root = decisiontree.decision_tree(training_dataset,
                                          attribute_list,
                                          depth=5)
        print("Decision Tree Model ready..")
    elif learning_type == "ada":
        print("Calling Adaboost...")
        root = adaboost.adaboost(training_dataset, attribute_list, K=8)
        print("Adaboost Model ready..")
    else:
        print("Invalid option!")
        sys.exit()

    with open(hypothesisFile, "wb") as output_file:
        pickle.dump(root, output_file)
Пример #4
0
    def train(self):

        """
        Adaboost, where 75 strong classifiers (decision trees with maxdepth = 10) are boosted,
        will be used.
        """
        self.boostedTrees, self.adaptiveParams = adaboost(self.trainDat, 200)
        fp1 = open("proc_data/boostedTreesFinalChal.pkl", "wb")
        fp2 = open("proc_data/adaptiveParamsFinalChal.pkl", "wb")
        pkl.dump(self.boostedTrees, fp1)
        pkl.dump(self.adaptiveParams, fp2)
        fp1.close()
        fp2.close()
Пример #5
0
 def train(self):
     
     '''
     Adaboost, where 75 strong classifiers (decision trees with maxdepth = 10) are boosted,
     will be used.
     '''
     self.boostedTrees, self.adaptiveParams = adaboost(self.trainDat, 200)
     fp1 = open('proc_data/boostedTreesFinalChal.pkl', 'wb')
     fp2 = open('proc_data/adaptiveParamsFinalChal.pkl', 'wb')
     pkl.dump(self.boostedTrees, fp1)
     pkl.dump(self.adaptiveParams, fp2)
     fp1.close()
     fp2.close()
Пример #6
0
N = 200
mat = sio.loadmat('../mnist.mat')

print mat['test_X'].shape[0]
chooseTrain = np.random.permutation(mat['train_X'].shape[0])
chooseTest = np.random.permutation(mat['test_X'].shape[0])
maxIter = 20
train_X = mat['train_X']
train_Y = mat['train_Y']
test_X = mat['test_X']
test_Y = mat['test_Y']
print train_X.shape
print train_Y.shape
print chooseTrain.shape
e_train, e_test, maxIter = adaboost(train_X[chooseTrain[0:10000], :],
                                    train_Y[0, chooseTrain[0:10000]],
                                    test_X[chooseTest[0:1000], :],
                                    test_Y[0, chooseTest[0:1000]], maxIter)
print e_train
print e_test

# evenly sampled time at 200ms intervals
t = range(maxIter)

# red dashes, blue squares and green triangles
plt.plot(t, e_train, 'b:', t, e_test, 'k-', t, e_train[0] * np.ones(maxIter),
         'b--', e_test[0] * np.ones(maxIter), 'b--')
plt.show()
#plot
Пример #7
0
        '--test_users',
        action=readFile,
        help='Indexes in the trained model. Type = list, in json file')
    parser.add_argument(
        '-ai',
        '--available_items',
        default=slice(None),
        action=readFile,
        help='Indexes in the trained model. Type = list, in json file')
    # parser.add_argument('-bs','--batch_size', type=int, default=5000, help='Size of user batch')
    # parser.add_argument('-th','--thread', type=int, default=6, help='# of threads for multi processing')
    args = parser.parse_args()

    if args.command == "train":
        ensemble = adaboost(args.opt_data,
                            args.data,
                            args.n_iter,
                            saveTime=args.save_time,
                            modelList=args.model)  # retrain:ensemble=ensemble
        with open("data/ensemble2", "wb") as f:
            pickle.dump(ensemble, f)
    elif args.command == "test":
        recall = get_recall(args.model, args.data, args.opt_data,
                            args.recall_at, args.test_users,
                            args.available_items, args.in_out)
        print(recall)
    elif args.command == "rec":
        topidx = getRecList(args.model, args.n_rec, args.opt_data,
                            args.test_users, args.available_items)
        print(topidx.shape)
Пример #8
0
# =============================================================================
# ################ Main Function ################
# =============================================================================
maxDepth = 20  # Online\Avg Perceptron Loop number
fileName1 = "pa3_train_reduced_bo.csv"  # Training File name
fileName2 = "pa3_valid_reduced.csv"  # Validate File name
warnings.filterwarnings("error")
print("\n ------------ ImportDaTa ------------")
trainData = hp.importCsv(fileName1)
validateData = hp.importCsv(fileName2)

for l in [1, 5, 10, 20]:
# for l in [1]:
    print("\n ------------ Adaboost-{0} ------------{1}".format(l, datetime.datetime.now()))
    adaClass = ada.adaboost(ftrNum=trainData.shape[1] - 1, depth=1, lNum=l, dataNum=trainData.shape[0])
    adaClass.runAdaboost(df=trainData)
    print(adaClass.computeFinalAccNumRate(df=trainData))

# for d, m, n in [(9, 20, 1), (9, 20, 2), (9, 20, 5), (9, 20, 10), (9, 20, 25)]:
# for d, m, n in [(9, 50, 1), (9, 50, 2), (9, 50, 5), (9, 50, 10), (9, 50, 25)]:
# for d, m, n in [(9, 10, 1), (9, 10, 2), (9, 10, 5), (9, 10, 10), (9, 10, 25)]:
#     print("\n ------------ Build Forest{0} ------------{1}".format(n, datetime.datetime.now()))
#     ftClass = ft.randomForest(treeNum=n, ftrNum=m, depth=d, dataNum=trainData.shape[0])
#     ftClass.buildRandomForest(df=trainData)
#     ftClass.predicDataResult(df=trainData)

# print("\n ------------ Build DT ------------{0}".format(datetime.datetime.now()))
# dtClass = dt.decesionTree(maxDepth, trainData.shape[0])
# root1 = root2 = cur = Node((None, None, dtClass.getLabelFromLargeData(trainData)))
# cl, cr = dtClass.getResultInfo(trainData)
Пример #9
0
	train_size = int(training_block.shape[0])
	att_size = int(len(attributes))
	forest_size = 100
	[ensemble_error, ensemble_pred] = rf.ensemble(test_block, test_label_block, rf.raise_forest(training_block,training_label_block, forest_size, train_size, att_size))
	error[0]+= (1.0/k) * ensemble_error

	#cross validation for decision tree
	print "Cross Validating Decision Tree..."
	dec_tree = id3.id3(train_examples, attributes)
	dec_tree_errors = 0
	for i in xrange(len(test_block)):
		if id3.classify(dec_tree, test_block[i]) != test_label_block[i]:
			dec_tree_errors += 1
	error[1] += (1.0/k) * (float(dec_tree_errors) / set_size)

	print "Cross Validating AdaBoost..."
	adaboost_classifier = adaboost.adaboost(train_examples, adaboost_rounds)
	adaboost_errors = 0
	for i in xrange(len(test_block)):
		if adaboost.classify(adaboost_classifier, test_block[i]) != test_label_block[i]:
			adaboost_errors += 1
	error[2] += (1.0/k) * (float(adaboost_errors) / set_size)



print (1-error[0]), (1-error[1])
print 'Estimated accuracy of Random Forest:', (1-error[0])
print 'Estimated accuracy of Decision Tree:', (1-error[1])
print 'Estimated accuracy of AdaBoost:', (1-error[2])
	
	This method is specific to the format of the classifiers
	"""
    output = {key: [] for key in classifiers}
    N = len(X)
    for cf in classifiers:
        for i in range(N):
            cf_classification = cf[2](X[i])
            if cf_classification != Y[i]:
                # output[cf].append(X[i])
                output[cf].append(adaboost.key_from_value(ids_to_points, X[i]))

    return output


digits_classifiers = digits_make_classifiers(X, y)
digits_ids_to_points = adaboost.make_point_identifiers(X)
digits_classifiers_to_misclassified = digits_make_classifiers_to_misclassified(
    X, y, digits_classifiers, digits_ids_to_points)

digits_points = digits_ids_to_points.keys()
resulting_classifier = adaboost.adaboost(digits_points,
                                         digits_classifiers_to_misclassified,
                                         max_num_rounds=20)
print 'resulting_classifier', resulting_classifier
features_chosen = [i[0][0] for i in resulting_classifier]
print 'features_chosen', features_chosen

# Testing
# feature_test = lambda x,cutoff: x[0] > cutoff
# print test_feature(feature_test,X,y,0)
# print sum(y)