Python DecisionTree.classify示例

编程语言: Python

命名空间/包名称: dtree

类/类型: DecisionTree

方法/功能: classify

hotexamples.com的示例: 6

Python DecisionTree.classify - 已找到6个示例。这些是从开源项目中提取的最受好评的dtree.DecisionTree.classify现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

DecisionTree(12)

fit(6)

classify(4)

accuracy(3)

predict(3)

savePDF(2)

savePNG(2)

_count_leaf(1)

buildFromString(1)

executeTree(1)

index_to_class(1)

printState(1)

setCreditScore(1)

traverse(1)

treeToString(1)

示例#1

显示文件

def main():
    parser = argparse.ArgumentParser(description="csv data file path")
    parser.add_argument("--csv", type=str, help="The data file path")
    parser.add_argument(
        "--eval",
        type=str,
        default="gini",
        help=
        "The evaluation function, could be gini or entropy. Default using gini."
    )
    cli_args = parser.parse_args()

    if cli_args.eval not in ['gini', 'entropy']:
        print('The evaluation function should be gini or entropy')
        exit(0)

    data = pd.read_csv(cli_args.csv)
    train = data.sample(frac=0.75, random_state=0)
    test = pd.concat([train, data]).drop_duplicates(keep=False)

    class_weights = {'setosa': 1, 'versicolor': 1, 'virginica': 1}
    tree = DecisionTree()
    tree.fit(train, class_weights, gini)
    # print(tree._error_rate(tree.root))
    print(tree._count_leaf(tree.root))
    # tree.prune(test, 0.0)

    print(tree.treeToString())

    data = pd.DataFrame(
        [[5.1, 3.5, np.nan, 1]],
        columns=['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth'])
    print(tree.classify(data))

    tree.savePDF('output.pdf')
    tree.savePNG('output.png')

示例#2

显示文件

文件： parse-demo.py 项目： youzhonghui/sparsity-supported-decision-trees

def main():
    parser = argparse.ArgumentParser(description="csv data file path")
    parser.add_argument("--csv", type=str, help="The data file path")
    parser.add_argument(
        "--eval",
        type=str,
        default="gini",
        help=
        "The evaluation function, could be gini or entropy. Default using gini."
    )
    cli_args = parser.parse_args()

    if cli_args.eval not in ['gini', 'entropy']:
        print('The evaluation function should be gini or entropy')
        exit(0)

    data = pd.read_csv(cli_args.csv)
    tree = DecisionTree()

    str_list = '''
    0:[CRP(mg/L)<5.5] yes=1,no=2,missing=2
	1:[白细胞总数(x10^9/L)<18.6850014] yes=3,no=4,missing=3
		3:[血小板计数(x10^9/L)<171.5] yes=7,no=8,missing=7
			7:[白细胞总数(x10^9/L)<11.8999996] yes=13,no=14,missing=13
				13:[CRP(mg/L)<1.5] yes=19,no=20,missing=20
					19:[中性粒细胞百分比(%)<52.5999985] yes=27,no=28,missing=28
						27:leaf=-0.0121546965
						28:leaf=0.0117647061
					20:[出生时体重(g)<1840] yes=29,no=30,missing=29
						29:leaf=0.0510822535
						30:leaf=0.00118343194
				14:[白细胞总数(x10^9/L)<14.71] yes=21,no=22,missing=21
					21:leaf=-0.0139534893
					22:leaf=0.00118343194
			8:[临床表现异常数<1.5] yes=15,no=16,missing=15
				15:[PCT(ng/ML)<0.375] yes=23,no=24,missing=23
					23:[中性杆状核粒细胞百分比(%)<5] yes=31,no=32,missing=31
						31:leaf=-0.146943495
						32:leaf=0.00930232555
					24:[中性粒细胞百分比(%)<41.0500031] yes=33,no=34,missing=34
						33:leaf=0.0122905029
						34:leaf=-0.00952380989
				16:[出生时体重(g)<1340] yes=25,no=26,missing=25
					25:leaf=-0.00346820801
					26:[出生时体重(g)<1670] yes=35,no=36,missing=35
						35:leaf=0.0171428584
						36:leaf=-0.00116959063
		4:[PCT(ng/ML)<0.13499999] yes=9,no=10,missing=10
			9:leaf=-0.00952380989
			10:[出生时体重(g)<2270] yes=17,no=18,missing=17
				17:leaf=0.084153004
				18:leaf=0.00118343194
	2:[CRP(mg/L)<6.5] yes=5,no=6,missing=6
		5:[白细胞总数(x10^9/L)<12.04] yes=11,no=12,missing=11
			11:leaf=0.0200000014
			12:leaf=-0.00952380989
		6:leaf=0.117241383
    '''
    tree.buildFromString(str_list.split('\n'), data, {0: 1, 1: 2.5})
    print(cal_metric(tree.classify(data), data.values[:, -1]))

    tree.index_to_class = {0: '无感染', 1: '感染'}
    tree.savePDF('parse_output.pdf')
    tree.savePNG('parse_output.png')

示例#3

显示文件

    test_labels = test_data[:, 0]
    test_data = test_data[:, 1:]

    # create a map of the attributes so we can retain the original column numbers as the tree splits the data
    attributes = list(range(len(train_data[0])))

    # Do an initial run with the full training dataset
    correct = []
    p_max = 1.0
    level_max = 9
    tree = DecisionTree(train_data,
                        train_labels,
                        attributes,
                        p_threshold=p_max,
                        max_level=level_max)
    y = tree.classify(test_data)
    print("correct = {}".format(
        sum(np.asarray(y == test_labels, dtype=int)) / len(y) * 100))

    # Do 10 runs of 25-round bootstrap training varying the depth of the trees from 1 to 10 levels
    n = 25
    num_depths = 10
    bias = np.zeros(num_depths)
    variance = np.zeros(num_depths)
    accuracy = np.zeros(num_depths)
    depths = np.arange(1, num_depths + 1)
    for depth in depths:
        y = np.zeros((n, len(test_data)))
        # We are assuming that N(x) = 0, so there's no noise. This means y_star = y_t
        y_star = t = test_labels
        for i in range(n):

示例#4

显示文件

文件： dtree_test.py 项目： jabbermonkey/dtree_bias_var

    train_data = np.array(read_file('data/spect_train.txt', sep=','))
    train_labels = train_data[:,0]
    train_data = train_data[:,1:]
    test_data = np.array(read_file('data/spect_test.txt', sep=','))
    test_labels = test_data[:,0]
    test_data = test_data[:,1:]

    # create a map of the attributes so we can retain the original column numbers as the tree splits the data
    attributes = list(range(len(train_data[0])))

    # Do an initial run with the full training dataset
    correct=[]
    p_max = 1.0
    level_max = 9
    tree = DecisionTree(train_data, train_labels, attributes, p_threshold=p_max, max_level=level_max)
    y = tree.classify(test_data)
    print("correct = {}".format(sum(np.asarray(y == test_labels, dtype=int))/len(y)*100))

    # Do 10 runs of 25-round bootstrap training varying the depth of the trees from 1 to 10 levels
    n = 25
    num_depths = 10
    bias = np.zeros(num_depths)
    variance = np.zeros(num_depths)
    accuracy = np.zeros(num_depths)
    depths = np.arange(1,num_depths + 1)
    for depth in depths:
        y = np.zeros((n, len(test_data)))
        # We are assuming that N(x) = 0, so there's no noise. This means y_star = y_t
        y_star = t = test_labels
        for i in range(n):
            boot_data, boot_labels = bootstrap_replicate(train_data, train_labels)

示例#5

显示文件

文件： run.py 项目： e-dard/treepee

from dtree import DecisionTree
from id3 import information_gain

values = [
    "sunny hot high weak no", "sunny hot high strong no",
    "overcast hot high weak yes", "rain mild high weak yes",
    "rain cool normal weak yes", "rain cool normal strong no",
    "overcast cool normal strong yes", "sunny mild high weak no",
    "sunny cool normal weak yes", "rain mild normal weak yes",
    "sunny mild normal strong yes", "overcast mild high strong yes",
    "overcast hot normal weak yes", "rain mild high strong no"
]
keys = ["outlook", "temperature", "humidity", "wind", "target"]
data = [dict(zip(keys, x.split())) for x in values]

if __name__ == '__main__':
    tree = DecisionTree(data,
                        keys[:-1],
                        information_gain,
                        target_name=keys[-1])
    from pprint import PrettyPrinter
    pp = PrettyPrinter()
    pp.pprint(tree.tree)

    print[(x[0]['target'], x[1]) for x in tree.classify(data)]

示例#6

显示文件

文件： run.py 项目： e-dard/treepee

from dtree import DecisionTree
from id3 import information_gain

values = ["sunny hot high weak no",
          "sunny hot high strong no",
          "overcast hot high weak yes",
          "rain mild high weak yes",
          "rain cool normal weak yes",
          "rain cool normal strong no",
          "overcast cool normal strong yes",
          "sunny mild high weak no",
          "sunny cool normal weak yes",
          "rain mild normal weak yes",
          "sunny mild normal strong yes",
          "overcast mild high strong yes",
          "overcast hot normal weak yes",
          "rain mild high strong no"]
keys = ["outlook", "temperature", "humidity", "wind", "target"]
data = [dict(zip(keys, x.split())) for x in values]

if __name__ == '__main__':
    tree = DecisionTree(data, keys[:-1], information_gain, target_name=keys[-1])
    from pprint import PrettyPrinter
    pp = PrettyPrinter()
    pp.pprint(tree.tree)

    print [(x[0]['target'], x[1]) for x in tree.classify(data)]