Python classify示例，tree.classify Python示例

示例#1

0

显示文件

def testClass():
    myDat, labels = tree.createDataSet()
    myTree = tree.createTree(myDat, labels)

    # persistenting the decision tree
    tree.storeTree(myTree, 'myTree.train')

    myTree2 = tree.grabTree('myTree.train')
    testVec = [1, 0]
    print "Test ",testVec," result: ", tree.classify(myTree2, labels, testVec)
    testVec = [1, 1]
    print "Test ",testVec," result: ", tree.classify(myTree2, labels, testVec)

示例#2

0

显示文件

def classify(datapoint, tree):
    if isinstance(tree, Leaf):
        return max(tree.labels.items(), key=operator.itemgetter(1))[0]
    value = datapoint[tree.feature]
    for branch in tree.branches:
        if branch.value == value:
            return classify(datapoint, branch)

示例#3

0

显示文件

from tree import tree, classify
car = ['med', 'med', '4', 'more', 'big', 'high']
print(classify(car, tree))

from collections import Counter
labels = ["unacc", "unacc", "acc", "acc", "good", "good"]
#labels = ["unacc","unacc","unacc", "good", "vgood", "vgood"]
#labels = ["unacc", "unacc", "unacc", "unacc", "unacc", "unacc"]
impurity = 1
label_counts = Counter(labels)
print(label_counts)
for label in label_counts:
    probability_of_label = label_counts[label] / len(labels)
    impurity -= probability_of_label**2
print(impurity)

from collections import Counter
unsplit_labels = [
    "unacc", "unacc", "unacc", "unacc", "unacc", "unacc", "good", "good",
    "good", "good", "vgood", "vgood", "vgood"
]
split_labels_1 = [[
    "unacc", "unacc", "unacc", "unacc", "unacc", "unacc", "good", "good",
    "vgood"
], ["good", "good"], ["vgood", "vgood"]]
split_labels_2 = [[
    "unacc", "unacc", "unacc", "unacc", "unacc", "unacc", "good", "good",
    "good", "good"
], ["vgood", "vgood", "vgood"]]

示例#4

0

显示文件

文件： RFC3.py 项目： MurrayCode/CodeAcademyMLCourseWork

from tree import build_tree, print_tree, car_data, car_labels, classify
import random
random.seed(4)

# The features are the price of the car, the cost of maintenance, the number of doors, the number of people the car can hold, the size of the trunk, and the safety rating
unlabeled_point = ['high', 'vhigh', '3', 'more', 'med', 'med']

indices = [random.randint(0, 999) for i in range(1000)]
predictions = []
for i in range(0, 20):
    data_subset = [car_data[index] for index in indices]
    labels_subset = [car_labels[index] for index in indices]
    subset_tree = build_tree(data_subset, labels_subset)
    predictions.append(classify(unlabeled_point, subset_tree))
print(predictions)
final_prediction = max(predictions, key=predictions.count)
print(final_prediction)

示例#5

0

显示文件

文件： random_forest_module20.py 项目： akhilchintala/Codecademy-Data-Science-Path

data_subset = [car_data[index] for index in indices]
labels_subset = [car_labels[index] for index in indices]
print(find_best_split(data_subset, labels_subset))

from tree import build_tree, print_tree, car_data, car_labels, classify
import random
random.seed(4)
# The features are the price of the car, the cost of maintenance, the number of doors, the number of people the car can hold, the size of the trunk, and the safety rating
unlabeled_point = ['high', 'vhigh', '3', 'more', 'med', 'med']
predictions = []
for i in range(20):
    indices = [random.randint(0, 999) for i in range(1000)]
    data_subset = [car_data[index] for index in indices]
    labels_subset = [car_labels[index] for index in indices]
    subset_tree = build_tree(data_subset, labels_subset)
    predictions.append(classify(unlabeled_point, subset_tree))
print(predictions)
final_prediction = max(predictions, key=predictions.count)
print(final_prediction)

from tree import training_data, training_labels, testing_data, testing_labels, make_random_forest, make_single_tree, classify
import numpy as np
import random
np.random.seed(1)
random.seed(1)
tree = make_single_tree(training_data, training_labels)
forest = make_random_forest(40, training_data, training_labels)
forest_correct = 0
single_tree_correct = 0
for i in range(len(testing_data)):
    prediction = classify(testing_data[i], tree)

示例#6

0

显示文件

import treePlot
import tree

if __name__ == '__main__':
    # 构建树
    my_data, class_labels = tree.create_dataset()
    # my_tree = tree.create_tree(my_data, class_labels)

    # number_leafs = tree.get_number_leafs(my_tree)
    # print(number_leafs)
    #
    # tree_depth = tree.get_tree_depth(my_tree)
    # print(tree_depth)
    # treePlot.create_plot(my_tree)

    my_tree = treePlot.retrieve_tree(0)

    class_label = tree.classify(my_tree, class_labels, [1, 0])
    print(class_label)

示例#7

0

显示文件

# -*- coding:utf-8 -*-

import tree
import treePlotter

feature, labels = tree.create_data_set()
# en = tree.calcShannomEnt(feature)
# print en
# print feature
# print  labels

# feature[0][-1] = "maybe"
# en2 = tree.calcShannomEnt(feature)
# print feature
# print en2

# split = tree.splitDataSet(feature,0, 0)
# print tree.splitDataSet(feature,0, 0)
# print tree.splitDataSet(feature,0, 1)

# bestFeature = tree.chooseBestFeature(feature)
# print bestFeature

myTree = tree.create_tree(feature, labels)
print myTree

# treePlotter.createPlot()
feature, labels = tree.create_data_set()
pre = tree.classify(myTree, labels, [1, 0])
print pre

示例#8

0

显示文件

# -*- coding: utf-8 -*-
import tree
import copy
dataset, label = tree.createDataSet()
print(label)
# 这里仅仅用 labels=label是不行的，因为它们指向同一个内存
labels = copy.deepcopy(label)
myTree = tree.createTree(dataset, labels)
# print(myTree)
print(label)
testResult = tree.classify(myTree, label, [1, 1])
print(testResult)
tree.storeTree(myTree, "F:\NatureRecognition/tree.txt")
tt = tree.grabTree("F:\NatureRecognition/tree.txt")
print(tt)

示例#9

0

显示文件

print tree.calcShannonEnt([[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'yes'],
                           [0, 1, 'yes'], [0, 1, 'yes']])

print tree.splitDataSet(
    [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']],
    0, 1)

print tree.chooseBestFeatureToSplit([[1, 1, 'yes'], [1, 1, 'yes'],
                                     [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']])

print tree.createTree(
    [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']],
    ['No Surfacing?', 'Flippers?'])

t = {'No Surfacing?': {0: 'no', 1: {'Flippers?': {0: 'no', 1: 'yes'}}}}
print treePlotter.getNumLeafs(t)
print treePlotter.getTreeDepth(t)

treePlotter.createPlot(t)

print tree.classify(
    {'No Surfacing?': {
        0: 'no',
        1: {
            'Flippers?': {
                0: 'no',
                1: 'yes'
            }
        }
    }}, ['No Surfacing?', 'Flippers?'], [1, 0])

示例#10

0

显示文件

# labels = ['no surfacing', 'filppers']
# dataset[0][-1] = 'maybe'
# shannonEnt =  tree.calcShannonEnt(dataset)
# print shannonEnt

# print tree.splitDataSet(dataset, 0, 0)
# print tree.chooseBestFeature(dataset)
# print tree.createTree(dataset, labels)
# treeplotter.createPlot()
# myTree = treeplotter.retrieveTree(0)
# print myTree
# print treeplotter.getNumLeafs(myTree)
# print treeplotter.getTreeDepth(myTree)
# treeplotter.createPlot(myTree)
# print tree.classify(myTree, labels,[1,1])
fr = open('lenses.txt')
lines = fr.readlines()

lensesAll = [ inst.split("\t") for inst in lines]
lensesTrain = lensesAll[5:len(lines)]
lensesLables = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = tree.createTree(lensesTrain, lensesLables[:])
# treeplotter.createPlot(lensesTree)
# lensesTree =  tree.grabTree( 'Decision.txt')
# treeplotter.createPlot(lensesTree)
for i in range(5):
    print "分类为%s, 正确为%s" %(tree.classify(lensesTree, lensesLables, lensesAll[i][0:-1]), lensesAll[i][-1])

示例#11

0

显示文件

import tree
import treeplotter

dataset,labels = tree.createDataSet()
print(dataset)
print(labels)
label = labels.copy()
#classlist = [example[-1] for example in dataset]
mytree = tree.createTree(dataset,labels)
print(mytree)

#treeplotter.createPlot()
#print(treeplotter.getTreeDepth(mytree))
#createPlot(mytree)
#print(label)
print(tree.classify(mytree,label,[1,0]))
treeplotter.createPlot(mytree)

示例#12

0

显示文件

from tree import training_data, training_labels, testing_data, testing_labels, make_random_forest, make_single_tree, classify
import numpy as np
import random
np.random.seed(1)
random.seed(1)
from collections import Counter

tree = make_single_tree(training_data, training_labels)
single_tree_correct = 0

forest = make_random_forest(40, training_data, training_labels)
forest_correct = 0

for i in range(len(testing_data)):
    prediction = classify(testing_data[i], tree)
    if prediction == testing_labels[i]:
        single_tree_correct += 1
    predictions = []
    for forest_tree in forest:
        predictions.append(classify(testing_data[i], forest_tree))
    forest_prediction = max(predictions, key=predictions.count)
    if forest_prediction == testing_labels[i]:
        forest_correct += 1

print(single_tree_correct / len(testing_data))
print(forest_correct / len(testing_data))

示例#13

0

显示文件

文件： test.py 项目： jasonber/DATA-SCIENTIST-

# tree.choose_best_feature_to_split(dataset)
#
# my_tree = tree.create_tree(dataset, labels)
#
# tree_plotter.retrieve_tree(1)

my_tree = tree_plotter.retrieve_tree(0)
#
# tree_plotter.get_num_leafs(my_tree)
#
# tree_plotter.get_tree_depth(my_tree)
tree_plotter.create_plot(my_tree)

data, labels = tree.create_dataset()

tree.classify(my_tree, labels, [1, 0])

tree.classify(my_tree, labels, [1, 1])

tree.store_tree(
    'my_tree', "/home/zhangzhiliang/Documents/my_git/DATA-SCIENTIST-/"
    "machine_learing_algorithm/machine_learning_in_action/3_decision_tree/classifierStorage.txt"
)

tree.load_tree(
    "/home/zhangzhiliang/Documents/my_git/DATA-SCIENTIST-/"
    "machine_learing_algorithm/machine_learning_in_action/3_decision_tree/classifierStorage.txt"
)

# 隐形眼镜
fr = open(

示例#14

0

显示文件

文件： testTree.py 项目： SolemnJoker/ml-learn

import tree as t
import treePlotter as tp
import os

f = open(os.path.dirname(__file__) +'/lenses.txt')
lenses = [r.strip().split('\t') for r in f.readlines()]
lensesLabel = ['age','prescript','astigmatic','tearRate']
lensesTree = t.createTree(lenses,lensesLabel)
tp.createPlot(lensesTree)
fmt = '%10s'
print [fmt % x for x in lensesLabel]
for lense in lenses:
    print [fmt % x for x in lense],t.classify(lensesTree,lensesLabel,lense[0:-1])

示例#15

0

显示文件

# Source from Codecademy
from tree import build_tree, print_tree, car_data, car_labels, classify
import random

random.seed(4)

# The features are the price of the car, the cost of maintenance, the number of doors, the number of people the car can hold, the size of the trunk, and the safety rating
unlabeled_point = ['high', 'vhigh', '3', 'more', 'med', 'med']

predictions = []
for i in range(20):
    indices = [random.randint(0, 999) for i in range(1000)]
    data_subset = [car_data[index] for index in indices]
    labels_subset = [car_labels[index] for index in indices]
    subset_tree = build_tree(data_subset, labels_subset)
    result = classify(unlabeled_point, subset_tree)
    predictions.append(result)

print(predictions)
final_prediction = max(predictions, key=predictions.count)
print(final_prediction)

示例#16

0

显示文件

文件： plotMain.py 项目： realWHY/machine-learning-in-action-test

import treePlotter
import tree

myDat, labels = tree.createDataSet()
labelsTemp = []
labelsTemp[:] = labels[:]
print('00000000000000000000labels = ', labelsTemp)
#myTree = tree.createTree(myDat, labelsTemp)
#tree.storeTree(myTree,'Tree.txt')
myTreeFromFile = tree.grabTree('Tree.txt')
print('myTreeFromFile = ', myTreeFromFile)
print('labels = ', labels)
result = tree.classify(myTreeFromFile, labels, [1, 0])
print('result = ', result)

示例#17

0

显示文件

文件： script.py 项目： Oksanatishka/codeacademy

# When considering buying a car, what factors go into making that decision?
# Each car can fall into four different classes which represent how satisfied someone would be with purchasing the car — unacc (unacceptable), acc (acceptable), good, vgood.
# Each car has 6 features:
#     - The price of the car which can be "vhigh", "high", "med", or "low".
#     - The cost of maintaining the car which can be "vhigh", "high", "med", or "low".
#     - The number of doors which can be "2", "3", "4", "5more".
#     - The number of people the car can hold which can be "2", "4", or "more".
#     - The size of the trunk which can be "small", "med", or "big".
#     - The safety rating of the car which can be "low", "med", or "high".

from tree import tree, classify, data

car = ["low", "low", "4", "4", "big", "high"]
print(classify(car, tree))

示例#18

0

显示文件

import arff
import tree
import sys

arg = sys.argv
m = int(arg[3])
trainData = arff.load(open(arg[1], 'r'))
testData = arff.load(open(arg[2], 'r'))

myTree = tree.createTree(trainData['data'], trainData['attributes'], m)
tree.plotTree(myTree, trainData['attributes'])

prediction = [tree.classify(myTree, testData['attributes'], obs) for obs in testData['data']]
true = [obs[-1] for obs in testData['data']]
print "<Predictions for the Test Set Instances>"
n = 0
for i in range(len(prediction)):
    index = i + 1   
    if prediction[i] == true[i]:
        n += 1
    print "{}: Actual: {} Predicted: {}".format(n, true[i], prediction[i])
print "Number of correctly classified: {} Total number of test instances: {}".format(n, len(testData['data']))