def draw_graph(self, x, y):
     # Decision Tree Graph
     clf = Id3Estimator()
     clf.fit(x, y, check_input=True)
     #clf.predict_proba(x)
     print(export_text(clf.tree_, self.feature_names))
     
     # export tree.dot as pdf file to write Decision Tree as a graph
     dot_data = StringIO()
     #tree.export_graphviz(clf, out_file = dot_data)
     export_graphviz(clf.tree_, 'SVC_Tree.dot', self.feature_names)
     graph = pydot.graph_from_dot_file('SVC_Tree.dot')
     graph[0].write_pdf("SVC_Tree.pdf")
     
     clf = DecisionTreeClassifier()
     clf = clf.fit(x,y)
     clf.predict(x, check_input=True)
     clf.predict_proba(x)
     
     # version v1 pdf output
     dot_data = tree.export_graphviz(clf, out_file='SVC_Tree_v1.dot')
     graph = pydot.graph_from_dot_file('SVC_Tree_v1.dot')
     graph[0].write_pdf("SVC_Tree_v1.pdf")
     
     # version v2 pdf output
     dot_data = tree.export_graphviz(clf, out_file="SVC_Tree_v2", feature_names=self.feature_names, class_names=self.target, filled=True, rounded=True, special_characters=True)
     #dot_data = tree.export_graphviz(clf, out_file="Decision-Tree-Regression-v2", feature_names=feature_names, class_names=target.name, filled=True, rounded=True, special_characters=True)
     graph = graphviz.Source(dot_data)
     # print graph this is done correct as lang as out_file=None
     graph
     
     # save graph version 2 as pdf data file
     graph = pydot.graph_from_dot_file('SVC_Tree_v2')
     graph[0].write_pdf("SVC_Tree_v2.pdf")
     return True
Exemplo n.º 2
0
def Tree():
    names = ["tarcza", "czy lata", "wiek", "zbroja", "hp", "level", "potwor"]

    count = len(open('przypadki.txt', 'rU').readlines())
    x = []

    for i in range(1, count):
        line = linecache.getline('przypadki.txt', i).split(" ")
        line[6] = str(line[6][0])
        x.append(line)
    X = np.asarray(x)
    print(X)

    y = np.array([int(i) for i in linecache.getline('wyniki.txt', 1)[:-2]])
    yd = [int(i) for i in linecache.getline('wyniki.txt', 1)[:-2]]
    d = []
    d.append(names)
    d[0].append("wynik")
    for i in range(0, len(yd)):
        d.append(x[i] + [yd[i]])
    print(d)
    clf = Id3Estimator()
    clf.fit(X, y, check_input=True)
    #d = np.array([['0', '0', '39', '1', '9', '0','1', 't']])
    #print(d)
    #c = clf.predict(d)
    #print(c)

    export_graphviz(clf.tree_, "out.dot", names)
    print(export_text(clf.tree_, names))
    return clf
Exemplo n.º 3
0
 def cpuUsageDecisionTree(self):
     (X, Y) = self.get_data_from_csv()
     feature_names = [
         "vm_id_map", "timestamp_new", "cpu_usage_percent",
         "admin_historic_decision_cpu"
     ]
     clf = Id3Estimator()
     clf.fit(X, Y, check_input=True)
     export_graphviz(clf.tree_, "out.dot", feature_names)
Exemplo n.º 4
0
def id3():
  headers = pd.read_csv('Task4_Data.csv', nrows=1).columns.values
  headers = headers[3:6]

  X = pd.read_csv('Task4_Data.csv').values
  y = X[:,6]
  X = X[:,3:6]

  clf = Id3Estimator()
  clf.fit(X, y, check_input=True)
  export_graphviz(clf.tree_, 'tree.dot', headers)
Exemplo n.º 5
0
def BuildTree():
    feature_names = ["danie", "na ciepło", "z mięsem", "na słodko", "kwaśne", "alkoholowe", "czekoladowe", "wybor"]

    dataset = ps.read_csv("recommend.csv", header=None, names=feature_names, sep=";")
 
    X = dataset.drop('wybor', axis=1)
    Y = dataset['wybor']

    clf = Id3Estimator()

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20)
    clf.fit(X_train, Y_train)
    
    export_graphviz(clf.tree_, "lol.dot", feature_names)
    return clf
Exemplo n.º 6
0
def BuildTree():
    # nazwy cech
    feature_names = [
        "pair", "empty_plate", "talking", "mood", "asked", "hurry", "bill"
    ]

    Yfeature_names = [
        "pair", "empty_plate", "talking", "mood", "asked", "hurry"
    ]

    # wczytaj dataset z pliku dane.csv
    dataset = ps.read_csv("bill.csv",
                          header=None,
                          names=feature_names,
                          sep=";")

    X = dataset.drop('bill', axis=1)
    Y = dataset['bill']

    # tworzenie drzewa decyzyjnego
    clf = Id3Estimator()

    # Podział na dane treningowe i dane testowe
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20)
    # fit - synonim do "find patterns in data"
    clf.fit(X_train, Y_train)
    export_graphviz(clf.tree_, "test.dot", feature_names)
    model = load_model('third_try.h5')

    while True:
        path = random.choice(
            os.listdir("C://Users/Kinia/Desktop/sztuczna2/SI-master/test"))
        print(path)

        img_pred = image.load_img("test/" + path, target_size=(100, 100))
        img_pred = image.img_to_array(img_pred)
        img_pred = np.expand_dims(img_pred, axis=0)

        rslt = model.predict(img_pred)
        print(rslt)
        if rslt[0][0] == 1:
            prediction = 1
            break
        else:
            prediction = 0

        print(prediction)
    return [prediction, clf]
Exemplo n.º 7
0
 def generate_tree(self, max_depth):
     self.__print(
         "\n-------------------------------------- Modelling ------------------------------------------\n"
     )
     self.__create_output_dir()
     self.__print("\n-----DECISION TREE GENERATION-----\n")
     self.__print("Output file names: ./output/" + self.run_id +
                  "/tree.dot ./output/" + self.run_id + "/tree.png")
     # the estimator
     self.estimator = Id3Estimator(max_depth)
     # suvrived
     x = self.dataframe.iloc[:, 0]
     # all attributes except survieved
     y = self.dataframe.iloc[:, 1:]
     # all var names except survieved
     feature_names = list(y.columns.values)
     # calc the tree
     self.estimator = self.estimator.fit(y, x)
     # export as .dot
     dot_data = export_graphviz(self.estimator.tree_,
                                './output/' + self.run_id + '/tree.dot',
                                feature_names)
     # create png file
     #command = ["dot", "-Tpng", './output/' + self.run_id + '/tree.dot', "-o", "./output/" + self.run_id + "/tree.png"]
     #subprocess.check_call(command, shell=True)
     command = "dot -Tpng " + './output/' + self.run_id + '/tree.dot' + " -o " + "./output/" + self.run_id + "/tree.png"  #Tsvg can be changed to Tjpg, Tpng, Tgif etc (see dot man pages)
     os.system(command)
Exemplo n.º 8
0
basketball_feature_names = ["HomeOrAway", "InTop25", "Media"]

X = np.array([
    ['Home', 'Out', '1-NBC'],
    ['Home', 'In', '1-NBC'],
    ['Away', 'Out', '2-ESPN'],
    ['Away', 'Out', '3-FOX'],
    ['Home', 'Out', '1-NBC'],
    ['Away', 'Out', '4-ABC'],
])

Y = np.array([
    'Win',
    'Lose',
    'Win',
    'Win',
    'Win',
    'Win',
])

clf = Id3Estimator()
clf.fit(X, Y)

export_graphviz(clf.tree_,
                'hw3/ID3/basketball.dot',
                feature_names=basketball_feature_names)
subprocess.run(
    ['dot', '-Tpng', 'hw3/ID3/basketball.dot', '-o', 'hw3/ID3/basketball.png'])
subprocess.run(['rm', 'hw3/ID3/basketball.dot'])
Exemplo n.º 9
0
    X, Y, test_size=test_proportion)
X_trainf10, X_test, Y_trainf10, Y_test = train_test_split(
    X, Y, test_size=test_proportion)

estimator1 = Id3Estimator()
estimator2 = Id3Estimator()
estimator3 = Id3Estimator()
estimator4 = Id3Estimator()
estimator5 = Id3Estimator()
#
estimator6 = Id3Estimator()
estimator7 = Id3Estimator()
estimator8 = Id3Estimator()
estimator9 = Id3Estimator()
estimator10 = Id3Estimator()

estimator1.fit(X_trainf1, Y_trainf1)
estimator2.fit(X_trainf2, Y_trainf2)
estimator3.fit(X_trainf3, Y_trainf3)
estimator4.fit(X_trainf4, Y_trainf4)
estimator5.fit(X_trainf5, Y_trainf5)
#
estimator6.fit(X_trainf6, Y_trainf6)
estimator7.fit(X_trainf7, Y_trainf7)
estimator8.fit(X_trainf8, Y_trainf8)
estimator9.fit(X_trainf9, Y_trainf9)
estimator10.fit(X_trainf10, Y_trainf10)

dot_data = export_graphviz(estimator1.tree_, 'random_forest_tree1.dot')

estimator1.predict(X_test)
Exemplo n.º 10
0
]].values
X[0:5]
y = df['CETIFIED'].values
y[0:5]

#Split data
X_trainset, X_testset, y_trainset, y_testset = train_test_split(X,
                                                                y,
                                                                test_size=0.3,
                                                                random_state=3)

#Entrenar mdelo
irisTree = Id3Estimator()
irisTree

#predercir
irisTree.fit(X_trainset, y_trainset)
predTree = irisTree.predict(X_testset)

print(predTree[0:5])
print(y_testset[0:5])

#Evaluate
print("DecisionTrees's Accuracy: ",
      metrics.accuracy_score(y_testset, predTree))

#Visualizing tree
featureNames = df.columns[0:4]
export_graphviz(irisTree.tree_, 'iris.dot', featureNames)

#En una cmd ejecutar dot -Tpdf iris.dot -o iris.pdf
   :align:   center

"""

from id3 import Id3Estimator, export_graphviz
import numpy as np

feature_names = ["age", "gender", "sector", "degree"]

X = np.array([[45, "male", "private", "m"], [50, "female", "private", "m"],
              [61, "other", "public", "b"], [40, "male", "private", "none"],
              [34, "female", "private", "none"],
              [33, "male", "public", "none"], [43, "other", "private", "m"],
              [35, "male", "private", "m"], [34, "female", "private", "m"],
              [35, "male", "public", "m"], [34, "other", "public", "m"],
              [34, "other", "public", "b"], [34, "female", "public", "b"],
              [34, "male", "public", "b"], [34, "female", "private", "b"],
              [34, "male", "private", "b"], [34, "other", "private", "b"]])

y = np.array([
    "(30k,38k)", "(30k,38k)", "(30k,38k)", "(13k,15k)", "(13k,15k)",
    "(13k,15k)", "(23k,30k)", "(23k,30k)", "(23k,30k)", "(15k,23k)",
    "(15k,23k)", "(15k,23k)", "(15k,23k)", "(15k,23k)", "(23k,30k)",
    "(23k,30k)", "(23k,30k)"
])

clf = Id3Estimator()
clf.fit(X, y, check_input=True)

export_graphviz(clf.tree_, "out.dot", feature_names)
feature_list_file = "features_65.txt"
feature_list_fh = open(feature_list_file, "r")

features = feature_list_fh.read()
feature_list_fh.close()

feature_list = features.split()
feature_list = unique(feature_list)

if (model == 0):  # ID3 Decision tree
    estimator = Id3Estimator()
    max_estimator = Id3Estimator()  # To store max model
    m_scores = np.zeros(num_folds)
    estimator = estimator.fit(data, np.ravel(target))
    tree = export_graphviz(estimator.tree_, 'tree.dot',
                           np.asarray(feature_list).ravel())
    #tree = export_graph('tree.dot', 'tree_mine.txt')
    k_fold = StratifiedKFold(
        num_folds, shuffle=False)  # K-fold data split function from sklearn
    max_acc_model = 0

    for k, (train, test) in enumerate(k_fold.split(
            data, np.ravel(target))):  # Compute accuracy over k folds
        estimator.fit(data[train], np.ravel(target)[train])
        prediction = estimator.predict(data[test])
        #tree = export_graphviz(estimator.tree_, 'tree'+str(k)+'.dot', np.asarray(feature_list).ravel())
        yTest = np.ravel(target)[test]  # target labels - test set
        m_scores[k] = 1 - (
            np.count_nonzero(prediction.astype(int) ^ yTest.astype(int)) /
            test.size)  # Checking for errors and computing score
        if m_scores[k] > m_scores[max_acc_model]:
Exemplo n.º 13
0
])

# Leave-one-out#########################################################
n = np.random.randint(0, x.shape[0])  ## select randomly a data test
x_test = np.array([x[n]])
x_train = np.delete(x, n, 0)
y_test = np.array([y[n]])
y_train = np.delete(y, n, 0)
########################################################################

id3 = Id3Estimator()  ####################################################
id3.fit(x_train, y_train)  ###############################################

# Testing

y_predict = id3.predict(x_test)  #########################################
# Precision
print("Precision")
print("Input: ", x_test, "| Expected: ", y_test, "| Result: ", y_predict)
if np.array_equal(y_test, y_predict):
    print("100%")
else:
    print("0%")

export_graphviz(id3.tree_, 'tree_p1.dot', features)
#with open("tree_p1.dot") as f:
#    dot_graph = f.read()
#g = graphviz.Source(dot_graph)
#g.render()
#g.view()
Exemplo n.º 14
0
    'cap-shape', 'cap-surface', 'cap-color', 'bruises%3F', 'odor',
    'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
    'stalk-shape', 'stalk-surface-above-ring', 'stalk-surface-below-ring',
    'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-type',
    'veil-color', 'ring-type', 'spore-print-color', 'population', 'habitat'
]].values
X[0:5]

#Split data
X_trainset, X_testset, y_trainset, y_testset = train_test_split(X,
                                                                y,
                                                                test_size=0.5,
                                                                random_state=3)

#Entrenar mdelo
mushroomTree = Id3Estimator()
mushroomTree

#predercir
mushroomTree.fit(X_trainset, y_trainset)
predTree = mushroomTree.predict(X_testset)
print(predTree[0:21])
print(y_testset[0:21])

#Evaluate
print("Accuracy del árbol de decisión: ",
      metrics.accuracy_score(y_testset, predTree))
#Visualizing tree
featureNames = df.columns[0:21]
export_graphviz(mushroomTree.tree_, 'mushroom.dot', featureNames)
Exemplo n.º 15
0
import numpy as np

feature_names = ['Outlook', 'Temperature', 'Humidity', 'Wind', 'PlayTennis']
inputda = np.array([['Sunny', 'Hot', 'High', 'Weak'],
                    ['Sunny', 'Hot', 'High', 'Strong'],
                    ['Overcast', 'Hot', 'High', 'Weak'],
                    ['Rain', 'Mild', 'High', 'Weak'],
                    ['Rain', 'Cool', 'Normal', 'Weak'],
                    ['Rain', 'Cool', 'Normal', 'Strong'],
                    ['Overcast', 'Cool', 'Normal', 'Strong'],
                    ['Sunny', 'Mild', 'High', 'Weak'],
                    ['Sunny', 'Cool', 'Normal', 'Weak'],
                    ['Rain', 'Mild', 'Normal', 'Weak'],
                    ['Sunny', 'Mild', 'Normal', 'Strong'],
                    ['Overcast', 'Mild', 'High', 'Strong'],
                    ['Overcast', 'Hot', 'Normal', 'Weak'],
                    ['Rain', 'Mild', 'High', 'Strong']])

inputsa = np.array([['No'], ['No'], ['Yes'], ['Yes'], ['Yes'], ['No'], ['Yes'],
                    ['No'], ['Yes'], ['Yes'], ['Yes'], ['Yes'], ['Yes'],
                    ['No']])

inputsa = inputsa.reshape(len(inputsa), )
estimator = Id3Estimator()
estimator.fit(inputda, inputsa, check_input=False)
export_graphviz(estimator.tree_, 'tree.dot', feature_names)
#export_graphviz(estimator,out_file=dot_data,filled=True,rounded=True,special_characters=True)
#graph = pydotplus.graph_from_dot_data(dot_data_getvalue())
#graph.write_png("dtree.png")
system("dot -Tpng tree.dot > ID3_Rafa_Play_Tennis.png")
y = np.array([
    "Höch", "Höch", "Höch", "Niedrig", "Niedrig", "Niedrig", "M-Höch",
    "M-Höch", "M-Höch", "M-Niedrig", "M-Niedrig", "M-Niedrig", "M-Niedrig",
    "M-Niedrig", "M-Höch", "M-Höch", "M-Höch"
])

clf = Id3Estimator()
clf.fit(X, y, check_input=True)

print(export_text(clf.tree_, feature_names))

# export tree.dot as pdf file to write Decision Tree as a graph
dot_data = StringIO()
#tree.export_graphviz(clf, out_file = dot_data)
export_graphviz(clf.tree_, 'Ausfallrisiko.dot', feature_names)
graph = pydot.graph_from_dot_file('Ausfallrisiko.dot')

graph[0].write_pdf("Ausfallrisiko.pdf")

# Accuracy

X = np.array([[900, 1, int(True), int(True)], [1000, 0,
                                               int(True),
                                               int(True)],
              [1220, 2, int(False), int(False)], [800, 1, int(True), 2],
              [680, 0, int(True), 2], [660, 1, int(False), 2],
              [860, 2, int(True), int(False)], [700, 1,
                                                int(True),
                                                int(True)],
              [680, 0, int(True), int(True)], [700, 1,
https://pypi.python.org/pypi/decision-tree-id3/0.1.2
"""
#from sklearn import tree
from id3 import Id3Estimator
from id3 import export_graphviz
import numpy as np
import graphviz

#           | 0     | 1         | 2
#Outlook    | Sunny | Overcast  | Rain
#Temperature| Hot   | Mild      | Cool
#Humidity   | High  | Normal    | -
#Wind       | Weak  | Strong    | -

x_labels = ["Outlook", "Temperature", "Humidity", "Wind"]

X = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [2, 1, 0, 0],
              [2, 2, 1, 0], [2, 2, 1, 1], [1, 2, 1, 1], [0, 1, 0, 0],
              [0, 2, 1, 0], [2, 1, 1, 0], [0, 1, 1, 1], [1, 1, 0, 1],
              [1, 0, 1, 0], [2, 1, 0, 1]])

Y = np.array([0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0])

#clf = tree.DecisionTreeClassifier()
clf = Id3Estimator(min_samples_split=3)
clf.fit(X, Y)
dot_data = export_graphviz(clf.tree_, "decisiontree.dot", x_labels)
#predictions = clf.predict(X)
#for i in range(len(X)):
#    print X[i],Y[i],"->",predictions[i]
Exemplo n.º 18
0
    "exang", "oldpeak", "slope", "ca", "thal", "num"
]
with open('AI/Decision_tree/cleveland.csv') as csvfile:
    reader = csv.DictReader(csvfile, fieldnames=feaure_names)
    list_all_data = [dat for dat in reader if "?" not in dat.values()]

    temp_all_data = copy.deepcopy(list_all_data[:-1])
    list_data = np.array([
        np.array([v for k, v in data.items()]) for data in temp_all_data
        if data.pop('num', None)
    ])
    list_target = np.array(
        [np.array([data['num']]) for data in list_all_data[:-1]])

estimator.fit(list_data, list_target)
export_graphviz(estimator.tree_, 'DecTree.dot', feaure_names)
test_temp_data = copy.deepcopy(list_all_data[-5:-4])
test_data = np.array([
    np.array([v for k, v in data.items()]) for data in test_temp_data
    if data.pop('num', None)
])

predict_data = estimator.predict(test_data)
actual_outcome = [d['num'] for d in list_all_data[-5:-4]]
print('\n\nTesting input set:\n')
for k, v in list_all_data[-5:-4][0].items():
    if (k != 'num'):
        print(f'{k}={v}')

print('\n')
print(f'Actual outcome: {actual_outcome[0]}')
Exemplo n.º 19
0
yPredict = model.predict(dataOnlyTest)
#print(accuracy_score(yTest,yPredict))

tree.export_graphviz(model,
                     out_file='footballCARTtree.dot',
                     feature_names=fbFeatures)
call(["dot", "footballCARTtree.dot", "-Tpng", "-o", "footballCARTtree.png"])

#
#ID3
#

estimator = Id3Estimator()
estimator.fit(fbX, fbY)
export_graphviz(estimator.tree_, 'footballID3tree.dot', fbFeatures)
call(["dot", "footballID3tree.dot", "-Tpng", "-o", "footballID3tree.png"])

#
#C4.5
#
"""
Cannot find a reliable source that implemented C4.5.
"""

#
#Task 4
#Question 2
#

#
Exemplo n.º 20
0
from id3 import Id3Estimator, export_graphviz
import numpy as np
import graphviz

feature_names = ["color", "forma", "tamanio"]

x = np.array([["rojo", "cuadrado", "grande"], ["azul", "cuadrado", "grande"],
              ["rojo", "redondo", "pequenio"],
              ["verde", "cuadrado", "pequenio"], ["rojo", "redondo", "grande"],
              ["verde", "cuadrado", "grande"]])

y = np.array(["+", "+", "-", "-", "+", "-"])

id3 = Id3Estimator()
id3.fit(x, y)

export_graphviz(id3.tree_, 'objetos.dot', feature_names)
with open("objetos.dot") as f:
    dot_graph = f.read()
g = graphviz.Source(dot_graph)
g.render()
g.view()
Exemplo n.º 21
0
y_train = data.target[size:]

# In[150]:

id3 = Id3Estimator()
id3.fit(x_train, y_train)
y_pred = id3.predict(x_test)

# In[151]:

print('Tamaño de test: ', size)
print("Precisión:", metrics.accuracy_score(y_test, y_pred))
print(precision_recall_fscore_support(y_test, y_pred))

# In[152]:

export_graphviz(id3.tree_, 'arbol.dot', data.feature_names)

# In[153]:

with open("arbol.dot") as f:
    dot_graph = f.read()

# In[154]:

g = graphviz.Source(dot_graph)
g.render()
g.view()

# In[ ]:
Exemplo n.º 22
0
X = np.vstack([X, bX])
Y = np.concatenate([bY, Y])

clf = Id3Estimator(min_samples_split=4,
                   prune=False,
                   is_repeating=False,
                   gain_ratio=False)
clf.fit(X, Y, check_input=True)

J = np.genfromtxt(TESTCASE_DATASET,
                  delimiter=",",
                  skip_header=1,
                  dtype=str,
                  usecols=(tuple(range(1, trainning_data_col_upper))))

K = clf.predict(J)
predict_table = np.genfromtxt(TESTCASE_DATASET, delimiter=",", dtype=str)
left, right = np.hsplit(predict_table, [leaf_col])
right = np.hsplit(right, [1])[1]  # without the predicted column

K = np.hstack([("Predicted" + attributes[leaf_col]), K]).reshape(-1, 1)
I = np.hstack((left, K, right))

print("Writing result (.csv) in {}".format(RESULT_DATASET))
np.savetxt(RESULT_DATASET, I, delimiter=",", fmt="%s")

output = path.realpath(path.join(path.realpath(__file__), '..', 'out.dot'))
print("Writing dt in %s" % (output))
export_graphviz(clf.tree_, output,
                attributes[trainning_data_col_lower:trainning_data_col_upper])
Exemplo n.º 23
0
from id3 import Id3Estimator, export_graphviz
import numpy as np
import graphviz

feature_names = ["No. de ejemplares", "Nivel de ventas", "Precio"]

x = np.array([["<=4", "Buenas", "<=150"], [">4", "Buenas", ">150"],
              [">4", "Buenas", "<=150"], ["<=4", "Buenas", ">150"],
              [">4", "Buenas", ">150"], [">4", "Bajas", ">150"],
              ["<=4", "Bajas", ">150"], ["<=4", "Bajas", ">150"],
              [">4", "Bajas", "<=150"], ["<=4", "Bajas", "<=150"],
              ["<=4", "Promedio", "<=150"], [">4", "Promedio", "<=150"],
              ["<=4", "Promedio", ">150"], [">4", "Promedio", ">150"],
              [">4", "Promedio", "<=150"]])

y = np.array([
    "si", "si", "si", "si", "si", "si", "no", "si", "si", "no", "no", "no",
    "si", "si", "no"
])

id3 = Id3Estimator()
id3.fit(x, y)

export_graphviz(id3.tree_, 'librerias.dot', feature_names)
with open("librerias.dot") as f:
    dot_graph = f.read()
g = graphviz.Source(dot_graph)
g.render()
g.view()
Exemplo n.º 24
0
Created on Wed Sep 05 01:56:33 2018

@author: Student
"""
import sklearn as sk
import pandas as pd
from id3 import Id3Estimator
from id3 import export_graphviz

data = pd.read_csv('car.data.txt',
                   names=[
                       'buying', 'maint', 'doors', 'persons', 'lug_boot',
                       'safety', 'class'
                   ])
data['class'], class_names = pd.factorize(data['class'])
data['buying'], _ = pd.factorize(data['buying'])
data['maint'], _ = pd.factorize(data['maint'])
data['doors'], _ = pd.factorize(data['doors'])
data['persons'], _ = pd.factorize(data['persons'])
data['lug_boot'], _ = pd.factorize(data['lug_boot'])
data['safety'], _ = pd.factorize(data['safety'])
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
X_train, X_test, y_train, y_test = sk.cross_validation.train_test_split(
    X, y, test_size=0.3, random_state=0)
estimator = Id3Estimator()
estimator.fit(X_train, y_train)
export_graphviz(estimator.tree_, 'tree.dot', data['class'])

import graphviz
#dot -T p
Exemplo n.º 25
0
    feature_names = f.readlines()
feature_names = [x.strip() for x in feature_names]

X = np.array(genfromtxt(x_file, dtype=None, delimiter="~").tolist())
y = genfromtxt(y_file, dtype='i4')

if len(feature_names) == 1:
    X = X.reshape(-1, 1)

clf = Id3Estimator()
clf.fit(X, y, check_input=True)
end = datetime.now()
delta = end - start

try:
    export_graphviz(clf.tree_, dot_file, feature_names)
except:
    print("Unexpected error:", sys.exc_info()[0])

result = convert_dot_to_predicate(dot_file, graph_dir)

path, filename = os.path.split(graph_dir)
result.insert(0, filename)
result.append(delta.seconds)
pprint(result)

with open(path + "/result.csv", 'a') as f:
    writer = csv.writer(f)
    writer.writerow(result)

# # dot -Tpng out.dot -o out.png
Exemplo n.º 26
0
df['windy'] = df.windy.map({'weak': 0, 'strong': 1})

print('\n+++ CSV Data Change +++')
print(df.head())

print('\n+++ Data shape +++')
print(df.shape)

data = df.values
print('\n+++ Data values +++')
print(data)

data_train = data[:, :-1]
print('\n+++ Data train +++')
print(data_train)

data_label = data[:, -1:].flatten()
print('\n+++ Data label +++')
print(data_label)

clf = Id3Estimator()
clf.fit(data_train, data_label, check_input=True)

feature_names = ["outlook", "temperature", "humidity", "windy"]

exported_text = export_text(clf.tree_, feature_names)

print(exported_text)

export_graphviz(clf.tree_, 'out.dot', feature_names)
Exemplo n.º 27
0
dadosX = dados[[
    'pontuacao_final', 'sobrevivencia', 'bonus_ultima_sobrevivencia',
    'dano_disparo', 'bonus_disparo_morte', 'colisao_dano',
    'bonus_colisao_morte', '1lugar', '2lugar', '3lugar'
]].values
dadosY = dados['classificacao']

treinoX, testeX, treinoY, testeY = train_test_split(dadosX,
                                                    dadosY,
                                                    test_size=0.3,
                                                    shuffle=False)

modeloArvodeID3 = Id3Estimator(max_depth=3)

modeloArvodeID3.fit(treinoX, treinoY)

export_graphviz(modeloArvodeID3.tree_, 'arvoreExecutada.dot', [
    'pontuacao_final', 'sobrevivencia', 'bonus_ultima_sobrevivencia',
    'dano_disparo', 'bonus_disparo_morte', 'colisao_dano',
    'bonus_colisao_morte', '1lugar', '2lugar', '3lugar'
])

classificacoes = modeloArvodeID3.predict(testeX)

print('Resultados Árvore de Decisão ID3 (Iterative Dichotomiser 3):')
print('Acurácia: %.4f' % accuracy_score(classificacoes, testeY))
print('Precisão: %.4f' %
      precision_score(classificacoes, testeY, average='macro'))
print('Sensibilidade: %.4f' %
      recall_score(classificacoes, testeY, average='macro'))
Exemplo n.º 28
0
# wrap in dataframe
gender_df = pd.DataFrame(gender_data)

# create transformed data frame
# insert the names of the columns that must be encoded as strings
transformed = MultiColumnLabelEncoder(
    columns=['link_color', 'sidebar_color', 'gender']).fit_transform(gender_df)

# slice the transformed dataframe into an array
data = np.array(transformed.ix[:, :5])
# identify the target
target = np.array(transformed['gender'])

estimator = Id3Estimator()
estimator.fit(data, target)
export_graphviz(estimator.tree_, 'gender_tree.dot', names)

fold_one = 4708
fold_two = 9417
fold_three = 14126
fold_four = 18835

dataOne = data[4709:18836]
dataTwo = np.append(data[0:4709], data[9417:18836], axis=0)
dataThree = np.append(data[0:9417], data[14127:18836], axis=0)
dataFour = data[0:14127]

targetOne = target[4709:18836]
targetTwo = np.append(target[0:4709], target[9417:18836], axis=0)
targetThree = np.append(target[0:9417], target[14127:18836], axis=0)
targetFour = target[0:14127]
bunch = fetch_kddcup99(subset="SA")

data = bunch.data
data = np.delete(data, np.s_[1:4], axis=1)
target = bunch.target
X_train, X_test, y_train, y_test = train_test_split(data,
                                                    target,
                                                    test_size=.2,
                                                    random_state=17)

estimator = Id3Estimator()
print("->Fitting ID3 classifier")
estimator.fit(X_train, y_train)

print("->Writing dot file")
export_graphviz(estimator.tree_, 'tree.dot')

print("->Calculating predictions")
pred = estimator.predict(X_test)

well_detected = 0
for index, val in enumerate(pred):
    if val == y_test[index]:
        well_detected += 1

percentage = well_detected / len(pred) * 100
print("predictions: ", well_detected, "/", len(pred), " = ", percentage, "%")

print("error rate: ", (100 - percentage), "%")
"""
col_names = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised', 'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'label']
Exemplo n.º 30
0
    ['Overcast', 'Hot', 'Normal', 'False'],
    ['Rainy', 'Mild', 'High', 'True'],
])

Y = np.array([
    'No',
    'No',
    'Yes',
    'Yes',
    'Yes',
    'No',
    'Yes',
    'No',
    'Yes',
    'Yes',
    'Yes',
    'Yes',
    'Yes',
    'No',
])

clf = Id3Estimator()
clf.fit(X, Y)

export_graphviz(clf.tree_,
                "hw3/ID3/tennis.dot",
                feature_names=tennis_feature_names)
subprocess.run(
    ['dot', '-Tpng', 'hw3/ID3/tennis.dot', '-o', 'hw3/ID3/tennis.png'])
subprocess.run(['rm', 'hw3/ID3/tennis.dot'])