def test(): data_train = pd.read_csv('./data_set/iris_1_3.csv', header=0) train_data = np.array(data_train ,'float') # X = train_data[:,:-1] y = train_data[:, -1] X_train, X_test, y_train, y_true = train_test_split(X, y,test_size=1 / 3., random_state= 6) d_tree = DecisionTreeClassifier(criterion = 'gini') #使用有放回抽样,抽样数据进行训练树,包外数据进行验证 X_subset, y_subset, out_of_bag_data = sampling_bagging(X_train,y_train) d_tree.fit(X_subset, y_subset) #使用袋外数据进行树的调整; print('y_true : ', y_true.tolist()) pre_lab = d_tree.predict(X_test) print('pre_lab: ',pre_lab.tolist()) # print('test_data\n',np.column_stack((X_test,y_true))) print('The accuracy was ',100 * accuracy_score(y_true,pre_lab),'% on the test ')
def test_fit(self): clf = DecisionTreeClassifier() clf.fit(self.X_train, self.y_train, self.feature_names) # verify the decision tree looks like this # # feature: # outlook # / | \ # / | \ # rainy / overcast \ sunny # / | \ # / | \ # feature: class: feature: # windy True humidity # / \ / \ # False / \ True high / \ normal # / \ / \ # class: class: class: class: # True False False True assert clf.root.feature == 'outlook' rainy_node = clf.root.children_by_attribute['rainy'] overcast_node = clf.root.children_by_attribute['overcast'] sunny_node = clf.root.children_by_attribute['sunny'] assert rainy_node.feature == 'windy' assert overcast_node.classification == True assert sunny_node.feature == 'humidity' assert rainy_node.children_by_attribute['False'].classification == True assert rainy_node.children_by_attribute['True'].classification == False assert sunny_node.children_by_attribute['high'].classification == False assert sunny_node.children_by_attribute[ 'normal'].classification == True
def main(): curr_dir = os.path.dirname(__file__) csv_file = os.path.join(curr_dir, 'data/play.csv') df = pd.read_csv(csv_file, index_col='Dia') X, y = df.loc[:, df.columns != 'Jogar'], df['Jogar'] clf = DecisionTreeClassifier() clf.fit(X, y) print(clf.rules())
def test_decision_tree_classifier_numerical_split(self): model = DecisionTreeClassifier() # feature 3, 1 -> label 1 # feature 2, 0 -> label 0 features = np.array([[3, 0], [3, 0], [3, 0], [2, 0], [2, 0], [2, 0], [1, 0], [1, 0], [1, 0], [0, 0], [0, 0]]) labels = np.array([1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0]) model.fit(features, labels) """
def test_decision_tree_classifier_exhaustive_categorical_split(self): model = DecisionTreeClassifier(categorical_feature_indeces=[0, 1]) # feature[0] 3, 1 -> label 1 # feature[0] 2, 0 -> label 0, 2 features = np.array([[3, 0], [3, 0], [3, 0], [2, 0], [2, 0], [2, 0], [1, 0], [1, 0], [1, 0], [0, 0], [0, 1]]) labels = np.array([1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 2]) model.fit(features, labels) """
def __generate_trees(self, df, n_baggings): ''' Given 'n' baggings, generate one Tree model for each of them ''' for i in n_baggings: clf = DecisionTreeClassifier( self.target_attribute, self.n_random_attributes ) clf.fit(df.iloc[n_baggings[i],:]) self.trees[i] = clf
def result(): payload = request.get_json() diagnosis_data = DiagnosisRequest(payload) x_test = diagnosis_data.to_np_array() tree_model = DecisionTree.query.order_by(DecisionTree.id.desc()).first() decision_tree = tree_model.tree tree_classifier = DecisionTreeClassifier(initial_tree=decision_tree) prediction = tree_classifier.predict(x_test) response = {'result': prediction.tolist()[0]} return jsonify(response)
def test_predict(self): clf = DecisionTreeClassifier() clf.fit(self.X_train, self.y_train, self.feature_names) expected_for_x = [ (np.array(['sunny', 'hot', 'high', False]), False), # sunny outlook + high humidity -> don't play (np.array(['sunny', 'hot', 'normal', False]), True), # sunny outlook + normal humidity -> play (np.array(['overcast', 'hot', 'high', False]), True), # overcast outlook -> don't play ] for x, expected in expected_for_x: output = clf.predict(x) assert output == expected
def fit(self, X, y): self.forest = [] N = len(y) N_sub_data = int(N * self.bootstrap) for i in range(self.trees_num): self.shuffle(X, y) X_sub = X[:N_sub_data] y_sub = y[:N_sub_data] decision_tree = DecisionTreeClassifier(self.features_num, self.max_depth) decision_tree.fit(X_sub, y_sub) # 得られた決定木をforestのリストに追加 self.forest.append(decision_tree)
def test_decision_tree_classifier_numerical_split_hard(self): model = DecisionTreeClassifier() # feature 3, 1 -> label 1 # feature 2, 0 -> label 0 features = np.array([[0, 0], [0, 0], [1, 0], [1, 0], [2, 0], [2, 0], [3, 0], [3, 0], [4, 0], [4, 0], [5, 0], [5, 0], \ [6, 0], [6, 0], [7, 0], [7, 0], [8, 0], [8, 0], [9, 0], [9, 0], [10, 0], [10, 0], [11, 0], [11, 0]]) labels = np.array([ 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, ]) model.fit(features, labels) """ print("test_decision_tree_classifier_numerical_split_hard") from pprint import pprint pprint(model._node) """ predictions = model.predict( np.array([[0, 0], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0], [9, 0], [10, 0], [11, 0]])) self.assertEqual(predictions.tolist(), [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0])
def visualize(model, max_depth=None): iris_dataset = datasets.load_iris() petal_features = iris_dataset['data'][:, 2:] targets = iris_dataset['target'] if max_depth is None: # 決定木の最大深度は制限しない # アヤメのデータセットの場合は、データ数やクラス数が少ないため、深度を制限しなくても計算時間はあまりかからない if model == 'decision_tree': clf = DecisionTreeClassifier() else: clf = RandomForestClassifier() else: if model == 'decision_tree': clf = DecisionTreeClassifier(max_depth=max_depth) else: clf = RandomForestClassifier(max_depth=max_depth) clf.fit(petal_features, targets) # データの取りうる範囲 +-1 を計算する x_min = max(0, petal_features[:, 0].min() - 1) y_min = max(0, petal_features[:, 1].min() - 1) x_max = petal_features[:, 0].max() + 1 y_max = petal_features[:, 1].max() + 1 # 教師データの取りうる範囲でメッシュ状の座標を作る grid_interval = 0.2 xx, yy = np.meshgrid(np.arange(x_min, x_max, grid_interval), np.arange(y_min, y_max, grid_interval)) # メッシュの座標を学習したモデルで判定させる Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # 各点の判定結果をグラフに描画する plt.contourf(xx, yy, Z.reshape(xx.shape), cmap=plt.cm.rainbow, alpha=0.4) # データもプロット for c in np.unique(targets): plt.scatter(petal_features[targets == c, 0], petal_features[targets == c, 1]) feature_names = iris_dataset['feature_names'] plt.xlabel(feature_names[2]) plt.ylabel(feature_names[3]) if max_depth is None: plt.title('Max Depth : No Limitation') plt.savefig('figures/iris/{}_no_limit.png'.format(model)) else: plt.title('Max Depth : ' + str(max_depth)) plt.savefig('figures/iris/{}_depth_{}.png'.format(model, max_depth)) plt.close()
def _compare_sklearn_dataset(self, dataset): dataset = load_iris() features = dataset.data labels = dataset.target model_sklearn = DecisionTreeClassifierSklearn() model_sklearn.fit(features, labels) predictions_sklearn = model_sklearn.predict(features) model = DecisionTreeClassifier() model.fit(features, labels) predictions = model.predict(features) self.assertEqual(predictions.tolist(), predictions_sklearn.tolist())
def test_decision_tree_classifier_fit(self): model = DecisionTreeClassifier() # XOR problem features = np.array([[0, 0], [0, 0], [0, 1], [0, 1], [0, 1], [1, 0], [1, 0], [1, 0], [1, 1], [1, 1], [1, 1]]) labels = np.array([0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0]) model.fit(features, labels) """ from pprint import pprint pprint(model._node) """ predictions = model.predict(np.array([[0, 0], [0, 1], [1, 0], [1, 1]])) self.assertEqual(predictions.tolist(), [0, 1, 1, 0])
def compare_performance(trees_num, max_depth, bootstrap): dataset = datasets.load_digits() X_train, X_test, y_train, y_test = train_test_split(dataset['data'], dataset['target'], test_size=0.3, random_state=0) # 決定木 print('##### 決定木の性能 #####') decision_tree = DecisionTreeClassifier(max_depth=max_depth) dt_lr_start = time.time() # 学習開始時間を記録 decision_tree.fit(X_train, y_train) dt_lr_time = time.time() - dt_lr_start # 学習時間 dt_est_start = time.time() # 推論開始時間を記録 y_est = decision_tree.predict(X_test) dt_est_time = time.time() - dt_est_start # 推論時間 print('学習時間 : {:.6f} [sec] 推論時間 : {:.6f} [sec]'.format(dt_lr_time, dt_est_time)) dt_train_accuracy = decision_tree.accuracy_score(X_train, y_train) dt_test_accuracy = decision_tree.accuracy_score(X_test, y_test) print('train accuracy : {:.4f} test_accuracy : {:.4f}'.format(dt_train_accuracy, dt_test_accuracy)) # ランダムフォレスト print('##### ランダムフォレストの性能 #####') random_forest = RandomForestClassifier(trees_num=trees_num, max_depth=max_depth, bootstrap=bootstrap) rf_lr_start = time.time() # 学習開始時間を記録 random_forest.fit(X_train, y_train) rf_lr_time = time.time() - rf_lr_start # 学習時間 rf_est_start = time.time() # 推論開始時間を記録 y_est = random_forest.predict(X_test) rf_est_time = time.time() - rf_est_start # 推論時間 print('学習時間 : {:.6f} [sec] 推論時間 : {:.6f} [sec]'.format(rf_lr_time, rf_est_time)) rf_train_accuracy = random_forest.accuracy_score(X_train, y_train) rf_test_accuracy = random_forest.accuracy_score(X_test, y_test) print('train accuracy : {:.4f} test_accuracy : {:.4f}'.format(rf_train_accuracy, rf_test_accuracy))
def main(): args = get_cmd_ln_arguments() num_columns, num_rows, training_data = parse_txt_file(args.filename) feature_names = get_feature_names(num_columns) decision_tree = DecisionTreeClassifier() print('Training a decision tree classifier...') decision_tree.fit(feature_names, training_data) print('Decision tree classifier trained:') decision_tree.print() print() print( 'Entering a loop to query the decision tree. Press ctrl-c at anytime to exit.' ) while True: sample = input( 'Enter a sample ({} numbers separated by a space): '.format( num_columns)) try: sample = line_to_int_list(sample) except ValueError: print( 'Input was not {} numbers separated by a space. Please try again. ' .format(num_columns)) continue prediction = decision_tree.predict(sample) print('Prediction: {}'.format(prediction))
def createTree(data, maximumDepth, currentDepth, tree, m): print("At depth: {}".format(currentDepth)) # starttime = time.time() if currentDepth == maximumDepth: # If you have reached maximum depth, store the prediction based on number of positive and negative examples label = calcLabel(data[:, 0]) tree.insert(None, None, True, label) return u_root = giniIndex(data[:, 0]) gain = 0 threshold = 0 best_feature = 0 i = 0 for featureIndex in sample(range(1, data.shape[1]), m): print("Calculating Gain for Feature Number:{}".format(i)) currentGain, currentThreshold = getInfoGain(data[:, 0], data[:, featureIndex], u_root) if currentGain > gain: gain = currentGain threshold = currentThreshold best_feature = featureIndex i = i + 1 if gain == 0: label = calcLabel(data[:, 0]) tree.insert(None, None, True, label) return trueExamples = data[data[:, best_feature] >= threshold] falseExamples = data[data[:, best_feature] < threshold] label = calcLabel(data[:, 0]) tree.insert(best_feature, threshold, False, label) tree.left = DecisionTreeClassifier() tree.right = DecisionTreeClassifier() currentDepth = currentDepth + 1 # print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Time for depth: {} = {}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~".format(currentDepth, time.time() - starttime)) createTree(trueExamples, maximumDepth, currentDepth, tree.left, m) createTree(falseExamples, maximumDepth, currentDepth, tree.right, m)
def createTree_adaboost(data, maximumDepth, currentDepth, tree): print("At depth: {}".format(currentDepth)) # starttime = time.time() if currentDepth == maximumDepth: label = calcLabel(data[:, 0:2]) tree.insert(None, None, True, label) return u_root = giniIndex(data[:, 0:2]) gain = 0 threshold = 0 best_feature = 0 for featureIndex in range(2, data.shape[1]): print("Calculating Gain for Feature Number: {}".format(featureIndex - 2)) currentGain, currentThreshold = getInfoGain(data[:, 0:2], data[:, featureIndex], u_root) if currentGain > gain: gain = currentGain threshold = currentThreshold best_feature = featureIndex if gain == 0: label = calcLabel(data[:, 0:2]) tree.insert(None, None, True, label) return trueExamples = data[data[:, best_feature] >= threshold] falseExamples = data[data[:, best_feature] < threshold] label = calcLabel(data[:, 0:2]) tree.insert(best_feature, threshold, False, label) tree.left = DecisionTreeClassifier() tree.right = DecisionTreeClassifier() currentDepth = currentDepth + 1 # print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Time for depth: {} = {}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~".format(currentDepth, time.time() - starttime)) createTree_adaboost(trueExamples, maximumDepth, currentDepth, tree.left) createTree_adaboost(falseExamples, maximumDepth, currentDepth, tree.right)
def fit(self, X, y): # print('===', sys._getframe().f_code.co_filename, sys._getframe().f_code.co_name, sys._getframe().f_lineno,"===") self.forest = [] self.n_calss_num = len(set(y)) #有几个类 self.n_calss = list(set(y)) #类标签集合 for i in range(self.n_estimators): #随机的取数据 self.bootstrap 比率 ,表示抽取样本集的比例 X_subset, y_subset = sampling_with_reset(X, y, self.bootstrap) ########################################### tree = DecisionTreeClassifier(self.max_features, self.criterion, self.max_depth, self.min_samples_split, self.min_impurity_split) #打印树的信息 print('tree_' + str(i)) tree.fit(X_subset, y_subset) self.forest.append(tree) #树的集合
def test_benchmark_numerical(): print('\n** Numerical benchmark **') df = pd.read_csv(DATA_PATH + 'dados_benchmark_v2.csv', sep=';') dt = DecisionTreeClassifier( target_attribute = 'Joga', n_random_attributes=4 ) dt.fit(df) dt.print_tree()
def compare_depth(): dataset = datasets.load_digits() X_train, X_test, y_train, y_test = train_test_split(dataset['data'], dataset['target'], test_size=0.3, random_state=0) # ランダムフォレストに関して、最良のハイパーパラメータを調べる trees_num, bootstrap = grid_search_RF() # 決定木の深度の制限を変えて、調べる depth_list = [i for i in range(21)] # 深さの制限0~20まで調べる dt_train_acc_list = [] dt_test_acc_list = [] rf_train_acc_list = [] rf_test_acc_list = [] for depth in tqdm(depth_list): print('***** max_depth = {} *****'.format(depth)) # 決定木 decision_tree = DecisionTreeClassifier(max_depth=depth) decision_tree.fit(X_train, y_train) dt_train_accuracy = decision_tree.accuracy_score(X_train, y_train) dt_test_accuracy = decision_tree.accuracy_score(X_test, y_test) # accuracyをリストに追加 dt_train_acc_list.append(dt_train_accuracy) dt_test_acc_list.append(dt_test_accuracy) print('決定木 train accuracy : {:.4f} test_accuracy : {:.4f}'.format(dt_train_accuracy, dt_test_accuracy)) # ランダムフォレスト random_forest = RandomForestClassifier(trees_num=trees_num, max_depth=depth, bootstrap=bootstrap) random_forest.fit(X_train, y_train) rf_train_accuracy = random_forest.accuracy_score(X_train, y_train) rf_test_accuracy = random_forest.accuracy_score(X_test, y_test) # accuracyをリストに追加 rf_train_acc_list.append(rf_train_accuracy) rf_test_acc_list.append(rf_test_accuracy) print('ランダムフォレスト train accuracy : {:.4f} test_accuracy : {:.4f}'.format(rf_train_accuracy, rf_test_accuracy)) # グラフの描画 plt.plot(depth_list, dt_train_acc_list, label='Decision Tree - train accuracy', color='r') plt.plot(depth_list, dt_test_acc_list, label='Decision Tree - test accuracy', color='g') plt.plot(depth_list, rf_train_acc_list, label='Random Forest - train accuracy', color='y') plt.plot(depth_list, rf_test_acc_list, label='Random Forest - test accuracy', color='b') plt.xlabel('Max Depth') plt.ylabel('Accuracy') plt.xlim(0, 20) plt.xticks(np.arange(0, 21, 2)) plt.ylim(0, 1.0) plt.legend(loc='lower right') plt.title('Max Depth of Decision Trees and Accuracy') # グラフを保存 plt.savefig('figures/mnist/max_depth_&_accuracy.png')
def main(): dataset = datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(dataset['data'], dataset['target'], test_size=0.3, random_state=0) # 決定木の深度の制限が1~3、制限なしの各場合について調べる depth_list = [1, 2, 3, None] for depth in depth_list: print('######### max_depth = {} #########'.format(depth)) # 全ての特徴量を使用したときの精度、学習時間、推論時間、汎化性能を調べる # 決定木 decision_tree = DecisionTreeClassifier(max_depth=depth) dt_lr_start = time.time() # 学習開始時間を記録 decision_tree.fit(X_train, y_train) dt_lr_time = time.time() - dt_lr_start # 学習時間 dt_est_start = time.time() # 推論開始時間を記録 y_est = decision_tree.predict(X_test) dt_est_time = time.time() - dt_est_start # 推論時間 print('決定木 学習時間 : {:.6f} [sec] 推論時間 : {:.6f} [sec]'.format( dt_lr_time, dt_est_time)) dt_train_accuracy = decision_tree.accuracy_score(X_train, y_train) dt_test_accuracy = decision_tree.accuracy_score(X_test, y_test) print('決定木 train accuracy : {:.4f} test_accuracy : {:.4f}'. format(dt_train_accuracy, dt_test_accuracy)) # ランダムフォレスト random_forest = RandomForestClassifier(max_depth=depth) rf_lr_start = time.time() # 学習開始時間を記録 random_forest.fit(X_train, y_train) rf_lr_time = time.time() - rf_lr_start # 学習時間 rf_est_start = time.time() # 推論開始時間を記録 y_est = random_forest.predict(X_test) rf_est_time = time.time() - rf_est_start # 推論時間 print('ランダムフォレスト 学習時間 : {:.6f} [sec] 推論時間 : {:.6f} [sec]'. format(rf_lr_time, rf_est_time)) rf_train_accuracy = random_forest.accuracy_score(X_train, y_train) rf_test_accuracy = random_forest.accuracy_score(X_test, y_test) print( 'ランダムフォレスト train accuracy : {:.4f} test_accuracy : {:.4f}' .format(rf_train_accuracy, rf_test_accuracy)) # 使用する特徴量を2つ(Petal legth, Petal width)に絞って、二次元で可視化 visualize('decision_tree', max_depth=depth) visualize('random_forest', max_depth=depth)
def createForest(m, n, d, data): # m: Number of features for a tree; n: Number of trees; d: Depth of each tree print("Number of features, m = {}".format(m)) print("Number of trees, n = {}".format(n)) print("Maximum Depth, d = {}".format(d)) length = len(data) list_of_trees = [] for i in range(n): print("Tree {}".format(i)) list_of_trees.append(DecisionTreeClassifier()) sample = np.random.choice(length, length, replace = True) sample = data[sample] # starttime = time.time() createTree(sample, d, 0, list_of_trees[i], m) # print("Total Training time {}".format(time.time() - starttime)) return list_of_trees
def adaboost(data, l, maximumDepth): size = len(data) D = np.empty( size) # D is the Distribution Matrix (Matrix containing the weights) D.fill(1.0 / size) data = np.insert(data, 1, D, axis=1) # Insert D as column indexed at 1 in data tree_list = [] alpha_list = [] for weakLearner in range(l): tree = DecisionTreeClassifier() print("Learner No: {}".format(weakLearner)) createTree_adaboost(data, maximumDepth, 0, tree) err, weightChange_list = errorCalc(tree, data, maximumDepth) alpha = (np.log(((1 - err) * 1.0) / err)) / 2 data[:, 1] = data[:, 1] * np.exp(alpha * np.array(weightChange_list)) tree_list.append(tree) alpha_list.append(alpha) return tree_list, alpha_list
def test_calculate_entropy(self): clf = DecisionTreeClassifier() all_positive_class = np.array([True, True]) assert clf._calculate_entropy(all_positive_class) == 0.0 fifty_fifty_mix = np.array([True, False]) assert clf._calculate_entropy(fifty_fifty_mix) == 1.0
def _get_base_estimator(self, **kwargs): return DecisionTreeClassifier(**kwargs)
from sklearn import datasets, cross_validation from decision_tree import DecisionTreeClassifier iris = datasets.load_iris() X, Y = iris.data, iris.target clf = DecisionTreeClassifier() clf.fit(X, Y) print cross_validation.cross_val_score(clf, X, Y) clf.draw_tree('decision_tree_example.png')
{'aspectOfHand': 'palmar'}) print('Getting unlabelled image features from Phase 1') unlabelled_features = helper_functions.get_main_features( label_feature_name, unlabelled_dataset_path) dorsal_features = {} palmar_features = {} for image in dorsal_images_list: dorsal_features[image] = label_folder_features[image] for image in palmar_images_list: palmar_features[image] = label_folder_features[image] if classifier == 'DT': decisiontree = DecisionTreeClassifier(max_depth=100) dorsal_images = list(dorsal_features.keys()) palmar_images = list(palmar_features.keys()) image_list = dorsal_images image_list.extend(palmar_images) random.shuffle(image_list) X = [] y = [0] * len(image_list) for i in range(0, len(image_list)): image = image_list[i] if image in dorsal_features: y[i] = 0 else: y[i] = 1 X.append(label_folder_features[image])
from treeFunc import createTree, treeAccuracy from decision_tree import DecisionTreeClassifier from random import sample, randint from randomForest import createForest, forestAccuracy from adaboost import adaboost, treeAccuracy_ada if __name__ == '__main__': trainData = prep.fileRead('pa3_train_reduced.csv') # Read Training Examples trainData = prep.changeData(trainData) validData = prep.fileRead('pa3_valid_reduced.csv') # Read Validation Data validData = prep.changeData(validData) ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART-1: DECISION TREE CLASSIFIER~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~## print("!!!!!Executing DECISION TREE CLASSIFIER!!!!!") maximum_Depth = 20 tree = DecisionTreeClassifier() # starttime = time.time() createTree(trainData, maximum_Depth, 0, tree, 100) # print("Total Training time {}".format(time.time() - starttime)) train_acc_list = [] valid_acc_list = [] itr_list = [] for i in range(21): itr_list.append(i) train_acc_list.append(treeAccuracy(tree, trainData, i)) valid_acc_list.append(treeAccuracy(tree, validData, i)) # plt.scatter(itr_list, train_acc_list, color = 'blue', s = 15) # blue_line, = plt.plot(itr_list, train_acc_list, color = 'blue', label = 'Training Accuracy') # plt.title("ACCURACY vs DEPTH")
from sklearn.datasets import load_iris from decision_tree import DecisionTreeClassifier from sklearn import tree # load the iris dataset dataset = load_iris() # set X and y variables X, y = dataset.data, dataset.target print(':::::::::::::::::::::::::::::::::::::::::::::') print(f'APPROPRIATE X, y DATATYPES: {type(X)}') print(':::::::::::::::::::::::::::::::::::::::::::::') # create a new isntance of the DecisionTreeClassifier object clf = DecisionTreeClassifier(max_depth=5) # call the fit method on that object clf.fit(X, y) print('') print(':::::::::::::PREDICTIONS:::::::::::::::::::::') print('') print(':::::::::::::::::::::::::::::::::::::::::::::') inputs = [[1, 1.5, 5, 1.5]] print(f'INPUTS: {inputs}') print(f'OUR MODEL PREDICTION: {clf.predict(inputs)}') clf2 = tree.DecisionTreeClassifier(max_depth=5) clf2.fit(X, y) print(f'SCIKITLEARN MODEL PREDICTION: {clf2.predict(inputs)}') print(':::::::::::::::::::::::::::::::::::::::::::::')