Exemplo n.º 1
0
def test():

    attributes = ['Author', 'Name', 'Genre', 'Year', 'Topics']
    infolists = [
        ['James Joyce', 'Ulyssess', 'Novel', 1922, 'Modernist Novel'],
        [
            'James Joyce', 'AfterTheRace', 'Short Story', 1914,
            'Realist Fiction'
        ], ['James Joyce', 'Araby', 'Short Story', 1914, 'Realist Fiction'],
        ['James Joyce', 'Encounter', 'Short Story', 1914, 'Realist Fiction'],
        ['James Joyce', 'Eveline', 'Short Story', 1904, 'Realist Fiction'],
        [
            'James Joyce', 'TheBoardingHouse', 'Short Story', 1914,
            'Realist Fiction'
        ],
        ['Mark Twain', 'ConnecticutYankee', 'Novel', 1889, 'Science Fiction'],
        ['Poe', 'CaskofAmontillado', 'Short Story', 1846, 'Horror'],
        ['Poe', 'FallHouseOfUsher', 'Short Story', 1839, 'Horror'],
        ['Poe', 'MasqueofTheRedDeath', 'Short Story', 1842, 'Horror'],
        ['Poe', 'Raven', 'Short Story', 1845, 'Horror']
    ]
    testlists = [[None, 'TwoGallants', 'Short Story', 1914, 'Realist Fiction'],
                 [None, 'Sisters', 'Short Story', 1914, 'Realist Fiction'],
                 [None, 'AnnalbelLee', 'Short Story', 1849, 'Horror'],
                 [None, 'ConnecticutYankee', 'Novel', 1889, 'Science Fiction']]

    infoTree = DecisionTree()
    aftereval = None
    while True:
        try:
            infoTree = DecisionTree()
            infoTree.train(infoTree.root, infolists, attributes)
            aftereval = infoTree.eval(testlists)

            break
        except Exception as e:
            e = str(e)
            for i in testlists:
                ind = 0
                for j in i:
                    j = str(j)
                    if j == e:
                        attributes[ind] = None
                        break
                    elif ind == len(i) - 1:
                        break
                    ind += 1
        else:
            break

    print(attributes)

    text = 'Prefix Walk \n'
    text += prefixWalk(infoTree.root)
    text += '\nPostfix Walk \n'
    text += postfixWalk(infoTree.root)
    writeFile(text, 'author')

    print(aftereval)
Exemplo n.º 2
0
 def create_tree(self):
     targetIndex = self.dataset.columns.get_loc(self.target)
     samples_Idxs = np.random.permutation(self.dataset.shape[0])[:self.n_sample]
     features_idxs = list(set(np.append(np.random.permutation(self.dataset.shape[1])[:self.n_features], targetIndex)))
     train = self.dataset.iloc[samples_Idxs][self.dataset.columns[features_idxs]]
     
     return dt.DecisionTree(train.values, train.columns.get_loc(self.target), train.columns.values, features_idxs)
Exemplo n.º 3
0
    def create(jsonFilePath, dataset):
        try:
            with open(jsonFilePath) as json_file:
                try:
                    jsonData = json.load(json_file)
                    validate(instance=jsonData, schema=estimatorSchema)
                except jsonschema.exceptions.ValidationError as err:
                    print(err)
                    raise ValueError(error.errors['estimator_config'])
                except ValueError as err:
                    print(err)
                    raise ValueError(error.errors['estimator_config'])

                if jsonData['estimator'].startswith('KNeighbors'):
                    import Knn  #as Knn
                    esti = Knn.Knn(jsonData)
                elif jsonData['estimator'].startswith('DecisionTree'):
                    import DecisionTree
                    esti = DecisionTree.DecisionTree(jsonData)
                else:
                    est_str = jsonData['estimator']
                    print(f'Invalid value for estimator name: {est_str}')
                    raise ValueError(error.errors['estimator_config'])
                esti.parse(jsonData)
                esti.assign_dataset(dataset)
                return esti
        except FileNotFoundError as err:
            print(err)
            raise ValueError(error.errors['estimator_config'])
Exemplo n.º 4
0
 def __init__(self, train, n_trees, sample_leaf_limits, sample_ratio,
              chara_ratio):
     '''
     : __init__: 根据参数初始化随机森林,并根据训练集进行训练
     : note: 实现步骤可以直接参照李航的统计学习方法中的步骤依次进行实现
     : param train: 训练集,其中第一列为样本类别标签
     : type train: pd.Dataframe
     : param n_trees: 随机森林中的决策树个数
     : type n_trees: int
     : param sample_leaf_limits: 随机挑选的样本比例,范围在[0,1]
     : type sample_leaf_limits: float
     : param sample_ratio: 随机挑选的特征比例,范围在[0,1]
     : type chara_ratio: float
     '''
     self.forest = []
     fn = int(chara_ratio * (train.shape[1] - 1))
     for n in range(n_trees):
         temp1 = time.time()
         sf = np.random.choice(np.arange(1, train.shape[1]),
                               fn,
                               replace=False)
         sf = np.append(0, sf)
         train_n = train.iloc[:, sf]
         p = np.random.random_sample() * (1 - sample_ratio) + sample_ratio
         train_n = train_n.loc[np.random.choice(train_n.index,
                                                int(p * train_n.index.size),
                                                replace=False)]
         tree = DT.DecisionTree(train_n, sample_leaf_limits)
         self.forest.append(tree)
         temp2 = time.time()
         print('随机森林中的第%d棵树构造成功,耗时%f' % (n, temp2 - temp1))
    def train(self, data, labels, bootstrapping=True):
        #        for i, data in enumerate(data):

        for i in data.iterrows():
            index, rowdata = i
            assigned_tree = math.floor(random.random() * self.count)
            # adds key value pair to data, labels
            #       self.data[assigned_tree].append((index, rowdata))
            self.data[assigned_tree] = self.data[assigned_tree].append(rowdata)
            self.labels[assigned_tree].append((index, labels[index]))
        if bootstrapping:
            treesPerForest = int(len(data) / 3)
            for i in range(0, self.count):
                data = data.sample(frac=1)
                self.data[i] = self.data[i].append(
                    data.iloc[1:treesPerForest, :])
                index = data.index.values.astype(int)[1:treesPerForest]
                for r in index:
                    self.labels[i].append((r, labels[r]))

        for i, tree in enumerate(self.forest):
            x = pd.DataFrame(self.labels[i]).drop(0, axis=1)
            self.forest[i] = DecisionTree.DecisionTree(
                self.data[i].reset_index(drop=True), x.squeeze())
            self.forest[i].build_tree()
Exemplo n.º 6
0
 def decisionTreeLearning(examples, attributes, parents_examples=()):
     if len(examples) == 0:
         return pluralityValue(
             parents_examples
         )  #ritorna la piu frequente classificazione tra gli examples
     elif allSameClass(examples):
         return DecisionTree.Leaf(
             examples[0][dataset.target]
         )  #se tutti hanno la stessa classe ritorna la classe del primo esempio
     elif len(attributes) == 0:
         return pluralityValue(
             examples
         )  #ritorna la piu frequente classificazione tra gli esempi
     else:
         if ce == 0:
             mostImpAtt, threshold = chooseAttribute(attributes, examples)
         else:
             mostImpAtt, threshold = chooseAttribute2(attributes, examples)
         tree = DecisionTree.DecisionTree(mostImpAtt, threshold,
                                          dataset.attrnames[mostImpAtt])
         ExampleMinor, ExampleMajor = splittingOnThreshold(
             mostImpAtt, threshold,
             examples)  #separazione basata sulla soglia
         #fa la ricorsione ed aggiunge all albero
         branchesLeft = decisionTreeLearning(ExampleMinor,
                                             removeAttr(
                                                 mostImpAtt, attributes),
                                             examples)  #ricorsione
         branchesRight = decisionTreeLearning(ExampleMajor,
                                              removeAttr(
                                                  mostImpAtt, attributes),
                                              examples)  #ricorsione
         tree.addLeft(threshold, branchesLeft)
         tree.addRight(threshold, branchesRight)
         return tree
 def decisionTreeLearning(examples, attributes, parents_examples=()):
     if len(examples) == 0:
         return pluralityValue(
             parents_examples
         )  #returns the most frequent classification among the examples
     elif allSameClass(examples):
         return DecisionTree.Leaf(
             examples[0][dataset.target]
         )  #if they all have the same class, I return the class of the first example
     elif len(attributes) == 0:
         return pluralityValue(
             examples
         )  #returns the most frequent classification among the examples
     else:
         mostImpAtt, threshold = chooseAttribute(attributes, examples)
         tree = DecisionTree.DecisionTree(mostImpAtt, threshold,
                                          dataset.attrnames[mostImpAtt])
         ExampleMinor, ExampleMajor = splittingOnThreshold(
             mostImpAtt, threshold, examples)  #separate based on threshold
         #do recursion and add to the tree
         branchesLeft = decisionTreeLearning(ExampleMinor,
                                             removeAttr(
                                                 mostImpAtt, attributes),
                                             examples)  #recursion
         branchesRight = decisionTreeLearning(ExampleMajor,
                                              removeAttr(
                                                  mostImpAtt, attributes),
                                              examples)  #recursion
         tree.addLeft(threshold, branchesLeft)
         tree.addRight(threshold, branchesRight)
         return tree
Exemplo n.º 8
0
def main():
    print("Enter main()")
    #==========================================================================================
    # 決定木 [DecisionTree] の不純度 [purity] を表す関数の作図
    # ノードの誤り率 [eror rate], 交差エントロピー関数 [cross-entropy], ジニ係数 [Gini index] 
    #==========================================================================================
    tree = DecisionTree.DecisionTree()

    #-------------------------------
    # 不純度を表す関数群の plot
    #-------------------------------
    figure = plt.figure()
    axis = plt.subplot(1,1,1)
    plt.grid(linestyle='-')
    
    tree.plotNodeErrorFunction( figure, axis )
    tree.plotCrossEntropyFunction( figure, axis )
    tree.plotGiniIndexFunction( figure, axis )

    plt.title("purity functions (i=1)")     # title
    plt.legend(loc = "upper left")          # 凡例    
    plt.tight_layout()                      # グラフ同士のラベルが重ならない程度にグラフを小さくする。

    # 図の保存&表示
    plt.savefig("./DecisionTree_scikit-learn_1.png", dpi=300)
    plt.show()

    print("Finish main()")
    return
Exemplo n.º 9
0
 def __init__(self, T=10, M=30, bagging=False):
     self.t = T
     self.m = M
     self.bagging = bagging
     self.forest = map(lambda i: DecisionTree(), range(T))
     self.shape = None
     self.selected_attributes = list()
Exemplo n.º 10
0
def classify_dataset_test():
    #create dataset
    filename = "Dataset/iris.data"
    dataset = DT.Dataset(filename, _delimiter=',')
    Tree = DT.DecisionTree(dataset)

    #load exemples
    exemple1 = np.array([5.4, 3.9, 1.3, 0.4]).astype('S15')
    exemple2 = np.array([6.3, 2.5, 4.9, 1.5]).astype('S15')
    exemple3 = np.array([
        6.5,
        3.0,
        5.5,
        1.8,
    ]).astype('S15')

    #classify exemples
    class1 = Tree.classify(exemple1)
    class2 = Tree.classify(exemple2)
    class3 = Tree.classify(exemple3)

    #verify classification
    eq_(class1, b'Iris-setosa')
    eq_(class2, b'Iris-versicolor')
    eq_(class3, b'Iris-virginica')
Exemplo n.º 11
0
def test_DT2():
    X, y = loadDataSet("HCTrain.csv")
    X_test, y_test = loadDataSet("HCTest.csv")
    tree = dt.DecisionTree(gt_privacy_p=float(1.0 / 100))
    tree.fit(X, y)
    pred1 = tree.predict(X_test)
    print "AUC value", roc_auc_score(y_test, pred1)
Exemplo n.º 12
0
def runTree(X_train, y_train, X_test, y_test, d):
    '''
    initialize Decision Tree
    '''

    # now for decision tree
    tree = dt.DecisionTree()
    tree.fitTree(
        X_train, y_train, max_depth=d
    )  # calling fitTree without maxDepth argument sets max depth to 999

    # test on training set
    pred_tree_train = tree.predict(X_train)
    error_rate_tree_train = (sum([
        0 if pred == true else 1
        for (pred, true) in zip(y_train, pred_tree_train)
    ]) / float(len(y_train)))

    # test on test set
    pred_tree_test = tree.predict(X_test)
    error_rate_tree_test = (sum([
        0 if pred == true else 1
        for (pred, true) in zip(y_test, pred_tree_test)
    ]) / float(len(y_test)))
    '''print('')
    print('***** RESULTS DECISION TREE *****')
    print('Depth: ', tree.depth)

    print('')
    print('Training Error: ', error_rate_tree_train)
    print('Test Error    : ', error_rate_tree_test)'''
    return error_rate_tree_train, error_rate_tree_test, tree.depth
Exemplo n.º 13
0
 def fit(self, train_data):
     self.train_data = train_data
     self.label_col = train_data.columns[0]
     labels = list(set(train_data[self.label_col].tolist()))
     for n in range(0, self.n_trees):
         print('%d tree begin fit' % (n + 1))
         tt1 = time.time()
         train_labels = []
         for label in labels:
             train_label = train_data[train_data[self.label_col] == label]
             train_n_label = train_label.sample(frac=1,
                                                replace=True,
                                                random_state=20)
             train_labels.append(train_n_label)
         train_n = pd.concat(train_labels)
         train_n.reset_index(drop=True, inplace=True)
         tree = DecisionTree.DecisionTree(
             min_sample_split=self.min_sample_split,
             n_features=self.n_features,
             criterion=self.criterion)
         tree.fit(train_n)
         weight = tree.score(train_data)
         self.forest.append(tree)
         self.weights.append(weight)
         tt2 = time.time()
         print('%d tree time cost: %f' % (n + 1, tt2 - tt1))
Exemplo n.º 14
0
def bootstrap(trees, depth, train, test, display=False):
    """Performs bootstrap aggregation with a decision tree learner for k-class classification"""

    #Build an array for indices/predictions for output
    indices = np.zeros((train.length, trees), dtype=int)
    prediction_labels = np.zeros((test.length, trees), dtype=str)
    prediction_probs = np.zeros((test.length, test.label_length), dtype=float)
    for i in range(0, trees):
        #Randomly sample data from train to use
        bs_sample = np.random.choice(range(0, train.length), size=train.length)
        indices.T[i] = bs_sample
        bs_features = train.features[bs_sample]
        bs_labels = train.labels[bs_sample]

        #Create and train boostrap decision tree
        tree = dt.DecisionTree()
        tree.fit(bs_features, bs_labels, train.metadata, max_depth=depth)

        #Using this tree, do prediction on test
        prediction_probs += tree.predict(test.features, prob=True)
        prediction_labels.T[i] = tree.predict(test.features, prob=False)

    #Now, vote for predicted class using prediction_probs matrix
    predictions = []
    truth = []
    correct = 0
    for i in range(0, test.length):
        #Finds the class that received the most probability
        prediction_index = np.argmax(prediction_probs[i])
        yhat = test.metadata[-1][1][prediction_index]
        y = test.labels[i]
        predictions.append(yhat)
        truth.append(y)

        #Increment number of correct predictions
        if yhat == y:
            correct += 1
    #calculate accuracy
    accuracy = correct / test.length

    if display:
        #Print the tree training indices
        for i in range(0, train.length):
            print(','.join(map(str, indices[i])))

        #Print the predictions
        print()
        for i in range(0, test.length):
            print(','.join(prediction_labels[i]),
                  predictions[i],
                  truth[i],
                  sep=',')

        #Print accuracy
        print()
        print(accuracy)

    #Return the overall predictions
    return predictions
Exemplo n.º 15
0
 def dtree(self, event):
     self.GetParent().setStatus("Generando Árbol...", 1)
     self.pbutton.actions = [self.mlC.GetValue(), self.mmC.GetValue()]
     dtree = DecisionTree(self.db, self.target, self.labels,
                          self.mlC.GetValue(), self.mmC.GetValue())
     tv = treeView(self, dtree)
     self.GetParent().setStatus("", 0)
     tv.Show()
Exemplo n.º 16
0
 def setUp(self):
     print("Testing probability calculation on sample training file")
     self.dt = DecisionTree.DecisionTree(training_datafile = training_datafile,
                                         csv_class_column_index = 1,
                                         csv_columns_for_features = [2,3,4,5])
     self.dt.get_training_data()
     self.dt.calculate_first_order_probabilities()
     self.dt.calculate_class_priors()
Exemplo n.º 17
0
    def create(jsonFilePath, dataset):
        try:
            with open('schemas/estSchema.json') as schema_file:
                estimatorSchema = json.load(schema_file)
        except FileNotFoundError as err:
            template = "An exception of type {0} occurred. Arguments: {1!r}"
            message = template.format(type(err).__name__, err.args)
            print(message)
            raise ValueError(error.errors['estimator_config'])

        try:
            with open(jsonFilePath) as json_file:
                try:
                    jsonData = json.load(json_file)
                    validate(instance=jsonData, schema=estimatorSchema)
                except jsonschema.exceptions.ValidationError as err:
                    template = "An exception of type {0} occurred. Arguments: {1!r}"
                    message = template.format(type(err).__name__, err.args)
                    print(message)
                    raise ValueError(error.errors['estimator_config'])
                except ValueError as err:
                    template = "An exception of type {0} occurred. Arguments: {1!r}"
                    message = template.format(type(err).__name__, err.args)
                    print(message)
                    raise ValueError(error.errors['estimator_config'])

                if jsonData['estimator'].startswith('KNeighbors'):
                    import Knn  #as Knn
                    esti = Knn.Knn(jsonData)
                elif jsonData['estimator'].startswith('DecisionTree'):
                    import DecisionTree
                    esti = DecisionTree.DecisionTree(jsonData)
                elif jsonData['estimator'].startswith('RandomForest'):
                    import RandomForest
                    esti = RandomForest.RandomForest(jsonData)
                elif jsonData['estimator'] == 'LinearSVC' or jsonData[
                        'estimator'] == 'LinearSVR':
                    import SVM
                    esti = SVM.SVM(jsonData)
                elif jsonData['estimator'].startswith('ANN'):
                    import ANN
                    esti = ANN.ANN(jsonData)
                elif jsonData['estimator'] == 'TripleES':
                    import TripleES
                    esti = TripleES.TripleES(jsonData)
                else:
                    est_str = jsonData['estimator']
                    print(f'Invalid value for estimator name: {est_str}')
                    raise ValueError(error.errors['estimator_config'])

#esti.parse(jsonData) # right???
                esti.assign_dataset(dataset)
                return esti
        except FileNotFoundError as err:
            template = "An exception of type {0} occurred. Arguments: {1!r}"
            message = template.format(type(err).__name__, err.args)
            print(message)
            raise ValueError(error.errors['estimator_config'])
Exemplo n.º 18
0
 def setUp(self):
     print("Testing decision-tree induction on sample training file")
     self.dt = DecisionTree.DecisionTree(training_datafile = training_datafile,
                                         csv_class_column_index = 1,
                                         csv_columns_for_features = [2,3,4,5])
     self.dt.get_training_data()
     self.dt.calculate_first_order_probabilities()
     self.dt.calculate_class_priors()
     self.root_node = self.dt.construct_decision_tree_classifier()
Exemplo n.º 19
0
def test_DT1():
    X = np.array([[0, 0, 0], [0.1, 0.1, 0.1], [
                 1.0, 1.0, 1.0], [.99, .99, .99]])
    y = np.array([0, 0, 1, 1])
    tree = dt.DecisionTree(gt_privacy_p=float(1.0 / 100))
    tree.fit(X, y)
    pred1 = tree.predict(np.array([0.05, 0.05, 0.05]))
    print "pred1 value-0:", pred1
    pred2 = tree.predict(np.array([0.995, 0.995, 0.995]))
    print "pred2 value-1:", pred2
Exemplo n.º 20
0
 def fit(self, X, Y):
     N = len(X)
     d = np.int(len(X[0]) * 0.5)
     for i in range(N):
         print("Progress:", i, "of ", N)
         sel = np.random.choice(len(X), size=len(X), replace=True)
         Xb, Yb = X[sel], Y[sel]
         model = DecisionTree()
         model.fit(Xb, Yb, d)
         self.models.append(model)
Exemplo n.º 21
0
def main(argv):

    #read parameters num of trees and max depth of decision tree
    n_trees = int(argv[1])
    max_d = int(argv[2])

    #get training and test data
    train = load(open(argv[3], 'r'))
    meta = train['metadata']['features']
    train_data = np.array(train['data'])
    n_train = train_data.shape[0]
    K = len(meta[-1][1])

    #get test data
    test = load(open(argv[4], 'r'))
    test_data = np.array(test['data'])
    n_test = test_data.shape[0]

    #initial weight
    w = np.full((n_train), 1.0 / n_train)
    predictions = []
    weights = []
    alphas = []

    #training decision tree by adaboost
    epsilon = 0
    for i in range(n_trees):
        tree = dt.DecisionTree()
        tree.fit(train_data[:, :-1],
                 train_data[:, -1],
                 meta,
                 max_d,
                 instance_weights=w)
        train_result = tree.predict(train_data[:, :-1], prob=False)
        test_result = tree.predict(test_data[:, :-1], prob=False)
        match = (train_result == train_data[:, -1]).astype(int)
        err = np.sum(w * (1 - match)) / np.sum(w)
        if (err >= 1 - 1.0 / K):
            break
        weights.append(w)
        predictions.append(test_result)
        alpha = np.log((1 - err) / err) + np.log(K - 1)
        alphas.append(alpha)
        w = w * np.exp(alpha * (1 - match))
        w = w / np.sum(w)

    predictions = np.asarray(predictions).T
    alphas = np.asarray(alphas)
    weights = np.asarray(weights).T

    #calculate ensemble prediction and accuracy
    ens_prediction = np.apply_along_axis(combine_predict, 1, predictions,
                                         alphas)
    test_Y = test_data[:, -1]
    return (meta[-1][1], ens_prediction, test_Y)
Exemplo n.º 22
0
def Process_air_quality():
    X, y = LoadData.load_ozone_data()

    Experiments.Models_Comparison(X, y, "Air Pollution")
    op = True

    DT.DecisionTree(X, y, title="Air Pollution Decision Tree", optimize=op)
    AB.AdaBoost(X, y, title="Air Pollution AdaBoost", optimize=op)
    KNN.KNN(X, y, title="Air Pollution KNN", optimize=op)
    NN.NeuralNetwork(X, y, title="Air Pollution Neural Network", optimize=op)
    SVM.SVM(X, y, title="Air Pollution SVM", optimize=op)
Exemplo n.º 23
0
    def create_a_node(self, node_type):
        # type list:
        #
        # sensor_listener
        # serial_transmit
        # wheel_node
        # arm_node
        # voice_node
        # dt_node
        # UI_node
        # app_train_face
        # vision_node

        # ddd
        a_object = None
        if node_type is 'sensor_listener':
            a_object = SensorListener.SensorListener(self)
        elif node_type is 'serial_transmit':
            a_object = SerialTransmit.SerialTransmit(self)
        elif node_type is 'wheel_node':
            a_object = WheelNode.WheelNode(self)
        elif node_type is 'arm_node':
            a_object = ArmNode.ArmNode(self)
        elif node_type is 'voice_sys':
            a_object = voice.Voice(self)
        elif node_type is 'dt_sys':
            a_object = DT.DecisionTree(self)
            self.dt_node = a_object
        elif node_type is 'UI_sys':
            a_object = UI.RobotControl(self)
        elif node_type is 'app_train_face':
            self.cap = open_camera(self.cap)
            a_object = tfr.TrainFaceRecognition(self, self.cap)
        elif node_type is 'vision_sys':
            self.cap = open_camera(self.cap)
            a_object = vision.Vision(self, self.cap)
        elif node_type is 'default_control':
            a_object = defaultcontrol.DefaultControl(self)
        elif node_type is 'marker_sys':
            self.cap = open_camera(self.cap, [1280, 720])
            a_object = MarkerAPI.Marker(self, self.cap)
        elif node_type is 'face_track':
            a_object = PersonTrack.PersonTrack(self)
        elif node_type is 'roam':
            a_object = Roam.Roam(self)
        elif node_type is 'cloud':
            a_object = Cloud.PersonTrackToCloud(self)
        elif node_type is 'object_detect':
            self.cap = open_camera(self.cap)
            a_object = obj_detect.ObjectRecognition(self, self.cap)
        elif node_type is 'emotion_detect':
            self.cap = open_camera(self.cap)
            a_object = emotion_detection.EmotionDetection(self, self.cap)
        return a_object
Exemplo n.º 24
0
 def fit(self, x, y):
     data = np.hstack((x, y))
     for i in range(self.max_tree):
         ranData = self.randomSample(data)
         x2 = ranData[:, :-1]
         y2 = ranData[:, -1]
         model = de.DecisionTree(criterion=self.criterion,
                                 max_depth=self.max_depth)
         model.fit(x2, y2.reshape(len(y2), 1))
         self.forest.append(model)
     return self
Exemplo n.º 25
0
def decision_tree_classification(X, y, test_dat):
    classifier = dt.DecisionTree(45)
    classifier.train(X, y)
    y_hat = classifier.predict(test_dat)

    f = open("spam_predictions_decision_tree.csv", 'w')
    f.write("Id,Category\n")
    for i in range(np.size(test_dat, 0)):
        f.write(str(i + 1) + "," + str(int(y_hat[i, 0])) + "\n")
    f.close()
    print("DONE")
 def __init__(self):
     self.player_id = None
     self.our_hand = []
     self.our_rule_expression = None
     self.decision_tree = dt.DecisionTree()
     self.god_instance = God.God.get_instance()
     self.num_correct = 0
     self.num_incorrect = 0
     self.card_played = None
     self.num_consecutive_correct = 0
     self.confidence_value = 50
Exemplo n.º 27
0
 def fit(self,data,data_label):
     self.data = data
     self.data_label = data_label
     self.trees = []
             
     for _ in range(0,self.n_trees):
         train_index = self.sample()
         train = [data[j] for j in train_index]
         train_label = [data_label[j] for j in train_index]
         dt = DecisionTree.DecisionTree(n_attribute=self.n_attribute,discretize=self.discretize)
         dt.fit(train, train_label)            
         self.trees.append(dt)
Exemplo n.º 28
0
 def fit(self, data, label):
     num_samples, total_features = data.shape
     for tree_num in range(self.num_trees):
         # print("TREE:", tree_num)
         random_rows = np.random.randint(0, num_samples, num_samples)
         random_features = np.random.choice(total_features,
                                            self.num_features,
                                            replace=False)
         random_data = data[random_rows, :][:, random_features]
         random_labels = label[random_rows]
         dt = DecisionTree(self.max_depth, self.min_obs)
         dt.fit(random_data, random_labels)
         self.trees += [(random_features, dt)]
Exemplo n.º 29
0
def bagging_learning(sample_indices, meta, train_data, test_data, max_d):
    resamples = train_data[sample_indices]
    tree = dt.DecisionTree()
    tree.fit(resamples[:, :-1], resamples[:, -1], meta, max_d)
    print(tree.predict(test_data[:, :-1], prob=True))
    #return np.concatenate((tree.predict(test_data[:,:-1],prob=False).reshape((-1,1)),\
    #        tree.predict(test_data[:,:-1],prob=True)), axis=1)
    return pd.DataFrame({
        "prediction":
        tree.predict(test_data[:, :-1], prob=False),
        "probs":
        tree.predict(test_data[:, :-1], prob=True)
    })
Exemplo n.º 30
0
def main():

    opts = util.parse_args()
    train_partition = util.read_arff(opts.train_filename, True)
    test_partition = util.read_arff(opts.test_filename, False)

    # create an instance of the DecisionTree class from the train_partition
    tree = DecisionTree(train_partition, (vars(opts)).get("depth"))
    rootnode = tree.constructsubtree(train_partition,
                                     (vars(opts)).get("depth"), 0)

    #print text representation of the DecisionTree
    tree.printtree(rootnode)