def setUp(self): """Set up test data. """ self.restaurant = { 'restaurants': [0] * 6 + [1] * 6, 'split_patrons': [[0, 0], [1, 1, 1, 1], [1, 1, 0, 0, 0, 0]], 'split_food_type': [[0, 1], [0, 1], [0, 0, 1, 1], [0, 0, 1, 1]] } self.dataset = dt.load_csv('part23_data.csv') self.train_features, self.train_classes = self.dataset
def a1(): dataset = dt.load_csv('challenge_train.csv',class_index=0) train_features,train_classes= dataset sums=0 for _ in range(10): tree2=dt.ChallengeClassifier() tree2.fit(train_features,train_classes) a=tree2.classify(train_features) for i in range(len(train_classes)): if a[i]==train_classes[i]: sums+=1 print(sums/10/len(train_classes))
def a2(): dataset = dt.load_csv('challenge_train.csv',class_index=0) train_features,train_classes= dataset sums=0 for _ in range(5): tree2=dt.RandomForest(10,5,0.8,0.8) tree2.fit(train_features,train_classes) a=tree2.classify(train_features) for i in range(len(train_classes)): if a[i]==train_classes[i]: sums+=1 print(sums/5/len(train_classes))
def setUp(self): #Set up test data. # #print("in setup") self.restaurant = {'restaurants': [0] * 6 + [1] * 6, 'split_patrons': [[0, 0], [1, 1, 1, 1], [1, 1, 0, 0, 0, 0]], 'split_food_type': [[0, 1], [0, 1], [0, 0, 1, 1], [0, 0, 1, 1]]} self.dataset = dt.load_csv('challenge_train.csv', class_index = 0) #self.dataset = dt.load_csv('part23_data.csv', class_index = -1) self.train_features, self.train_classes = self.dataset
def test_clf(params): dataset = dt.load_csv('challenge_train.csv', 0) # pdb.set_trace() train_features, train_classes = dataset folds = dt.generate_k_folds(dataset, 5) accuracy = [] for idx, fold in enumerate(folds): training_set, test_set = fold clf = dt.ChallengeClassifier(**params) clf.fit(training_set[0], training_set[1]) preds = clf.classify(test_set[0]) accuracy.append(dt.accuracy(preds, test_set[1])) # print("Fold %d" %idx) # print("accuracy %f" %(dt.accuracy(preds, test_set[1]))) # print("precision %f" %(dt.precision(preds, test_set[1]))) # print("recall %f" %(dt.recall(preds, test_set[1]))) print(params, np.mean(accuracy))
def setUp(self): """Set up test data. """ self.vector = dt.Vectorization() self.data = dt.load_csv('vectorize.csv', 1)