Пример #1
0
def main():
    #Insert input file
    file = open('example_training.csv')
    target = "action"
    data = [[]]
    for line in file:
        line = line.strip("\r\n")
        data.append(line.split(','))
    data.remove([])
    attributes = data[0]
    data.remove(attributes)
    db = createDatabase(attributes,data)
    #Run ID3
    # Note: we can remove the target attribute from the attributes list
    tree = DecisionTree.makeTree(db, attributes, target, 0)
    print "generated tree"
    tree.accept(DecisionNode.PrintTreeVisitor())
    print 'adding new examples'
    example1 = ['hurt','exposed','with_ammo','defend']
    print example1
    tree = DecisionTree.updateTree(tree, db, example1, attributes, target)
    tree.accept(DecisionNode.PrintTreeVisitor())
    example2 = ['healthy','exposed','with_ammo','run']
    print example2
    tree = DecisionTree.updateTree(tree, db, example2, attributes, target)
    tree.accept(DecisionNode.PrintTreeVisitor())
Пример #2
0
def AdaBoost(data, labels, test_data, test_labels, values, T, printError=False):
	# Step 1: Initialize weights
	weights = np.array([1/len(data)]*len(data))

	int_labels = getIntLabels(labels)
	int_test_labels = getIntLabels(test_labels)

	# Step 2 for each iteration...
	h_ts = []
	votes = []
	for iteration in range(T):
		h_t = DecisionTree.getDecisionStump(data,labels,values,weights)
		h_ts.append(h_t)
		error, predictions = DecisionTree.getErrorAndPredictions(h_t, data, labels)
		summa = 0
		for index in range(len(predictions)):
			summa += weights[index]*predictions[index]*int_labels[index]
		e_t = .5 - (.5*summa)
		if printError:
			error, test_predictions = DecisionTree.getErrorAndPredictions(h_t, test_data, test_labels)
			summa = 0
			for index in range(len(test_predictions)):
				summa += (1/len(test_labels))*test_predictions[index]*int_test_labels[index]
			test_error = .5 - (.5*summa)
			print(iteration, e_t, test_error)
		# compute it's vote
		vote_t = 0.5 * np.log((1-e_t)/e_t)
		votes.append(vote_t)
		# update the weights
		new_weights_0 = weights*np.exp(-vote_t*(int_labels*predictions))
		new_weights = new_weights_0/np.sum(new_weights_0)
		weights = new_weights
	# Step 3, return the final hypothesis:
	return h_ts, votes
Пример #3
0
def trainNN(db, params):
	plot = False
	show_output = False

	random.shuffle(db)
	test_set = db
	train_set = db

	num_input_nodes = len(db[0])-1
	num_output_nodes = 1

	if params['num_hidden_layers'] == 1:
		model = NN1.createModel(num_input_nodes, params['num_hidden_layers'], params['num_hidden_nodes'], num_output_nodes, params['learning_rate'], list(db[0].keys()), params['output_name'])
		model = NN1.trainModelMiniBatch(model, train_set, params['batchSize'], params['output_name'], params['act_type'], params['rounds'], plot=plot)
	if params['num_hidden_layers'] == 2:
		model = NN2.createModel(num_input_nodes, params['num_hidden_layers'], params['num_hidden_nodes'], num_output_nodes, params['learning_rate'], list(db[0].keys()), params['output_name'])
		model = NN2.trainModelMiniBatch(model, train_set, params['batchSize'], params['output_name'], params['act_type'], params['rounds'], plot=plot)
	if params['num_hidden_layers'] == 3:
		model = NN3.createModel(num_input_nodes, params['num_hidden_layers'], params['num_hidden_nodes'], num_output_nodes, params['learning_rate'], list(db[0].keys()), params['output_name'])
		model = NN3.trainModelMiniBatch(model, train_set, params['batchSize'], params['output_name'], params['act_type'], params['rounds'], plot=plot)

	if params['saveModels']:
		NN3.exportModel(model, "../models/"+params['experiment_name']+'/NN/')

	rules = None
	if params['deepRED'] == True:
		rules = RED.getRules(model, db, params['output_name'], simplify=True)
		file = open('../models/'+ params['experiment_name'] + '/NN/NN_tree.txt', "w")
		DT.getTXTReprentation(rules, file)
		file.close()
		file = open('../models/'+ params['experiment_name'] + '/NN/NN_rules.txt', "w")
		DT.getRuleReprentation(rules, file)
		file.close()

	return model, rules
Пример #4
0
def classify_dataset_test():
    #create dataset
    filename = "Dataset/iris.data"
    dataset = DT.Dataset(filename, _delimiter=',')
    Tree = DT.DecisionTree(dataset)

    #load exemples
    exemple1 = np.array([5.4, 3.9, 1.3, 0.4]).astype('S15')
    exemple2 = np.array([6.3, 2.5, 4.9, 1.5]).astype('S15')
    exemple3 = np.array([
        6.5,
        3.0,
        5.5,
        1.8,
    ]).astype('S15')

    #classify exemples
    class1 = Tree.classify(exemple1)
    class2 = Tree.classify(exemple2)
    class3 = Tree.classify(exemple3)

    #verify classification
    eq_(class1, b'Iris-setosa')
    eq_(class2, b'Iris-versicolor')
    eq_(class3, b'Iris-virginica')
Пример #5
0
    def setUp(self):
        """
        Setup internal parameters used multiple times.
        """
        # Create decision with leaf size as 1
        self.leaf_terminate_1 = 1
        dt_1 = DT.RegressionDecisionTree(split_type='rss',
                                         leaf_terminate=self.leaf_terminate_1)

        # Create decision tree with leaf size as 2
        self.leaf_terminate_2 = 2
        dt_2 = DT.RegressionDecisionTree(split_type='rss',
                                         leaf_terminate=self.leaf_terminate_2)

        # Make simple input data
        self.x_data_1 = np.array([[1, 4], [6, 7], [1, 4], [2, 3], [4, 5],
                                  [1, 5], [3, 6], [1, 4], [3, 1], [8, 9]])
        self.y_data_1 = np.array([5, 6, 5, 1, 6, 7, 8, 6, 4, 0])

        # Train the data
        dt_1.fit(self.x_data_1, self.y_data_1)
        dt_2.fit(self.x_data_1, self.y_data_1)

        # Get the result object
        self.result_tree_1 = dt_1.get_tree()
        self.result_tree_2 = dt_2.get_tree()
Пример #6
0
    def setUp(self):
        """
        Setup internal parameters used multiple times.
        """
        # Create decision with tree with a gain ratio

        # Create decision tree with leaf pure termination criteria
        dt_1 = DT.ClassificationDecisionTree(
            split_type='gain_ratio',
            terminate='pure',
        )
        dt_2 = DT.ClassificationDecisionTree(
            split_type='gini',
            terminate='pure',
        )

        # Make simple input data
        self.x_data_1 = np.array([[1, 1], [2, 1], [3, 1], [4, 1], [5, 1],
                                  [6, 2], [7, 2], [8, 2], [9, 2], [10, 2]])
        self.y_data_1 = np.array([0, 1, 1, 0, 1, 0, 1, 1, 0, 1])

        # Train the data
        dt_1.fit(self.x_data_1, self.y_data_1)
        dt_2.fit(self.x_data_1, self.y_data_1)

        # Get the result object
        self.result_tree_1 = dt_1.get_tree()
        self.result_tree_2 = dt_2.get_tree()
Пример #7
0
    def setUp(self):
        """
        Setup internal parameters used multiple times.
        """
        # Create decision with leaf size as 1
        self.leaf_terminate_1 = 1
        self.dt_1 = DT.ClassificationDecisionTree(
            split_type='gini',
            terminate='leaf',
            leaf_terminate=self.leaf_terminate_1)

        # Create decision tree with leaf size as 2
        self.leaf_terminate_2 = 2
        self.dt_2 = DT.ClassificationDecisionTree(
            split_type='gini',
            terminate='leaf',
            leaf_terminate=self.leaf_terminate_2)

        # Create decision tree with leaf pure termination criteria
        self.dt_3_pure = DT.ClassificationDecisionTree(
            split_type='gini',
            terminate='pure',
        )

        # Make simple input data
        self.x_data_1 = np.array([[1, 4], [6, 7], [1, 4], [2, 3], [4, 5],
                                  [1, 5], [3, 6], [1, 4], [3, 1], [8, 9]])
        self.y_data_1 = np.array([0, 1, 1, 0, 1, 0, 1, 1, 0, 1])

        # Train the data
        self.dt_1.fit(self.x_data_1, self.y_data_1)
        self.dt_2.fit(self.x_data_1, self.y_data_1)
        self.dt_3_pure.fit(self.x_data_1, self.y_data_1)
 def decisionTreeLearning(examples, attributes, parents_examples=()):
     if len(examples) == 0:
         return pluralityValue(
             parents_examples
         )  #returns the most frequent classification among the examples
     elif allSameClass(examples):
         return DecisionTree.Leaf(
             examples[0][dataset.target]
         )  #if they all have the same class, I return the class of the first example
     elif len(attributes) == 0:
         return pluralityValue(
             examples
         )  #returns the most frequent classification among the examples
     else:
         mostImpAtt, threshold = chooseAttribute(attributes, examples)
         tree = DecisionTree.DecisionTree(mostImpAtt, threshold,
                                          dataset.attrnames[mostImpAtt])
         ExampleMinor, ExampleMajor = splittingOnThreshold(
             mostImpAtt, threshold, examples)  #separate based on threshold
         #do recursion and add to the tree
         branchesLeft = decisionTreeLearning(ExampleMinor,
                                             removeAttr(
                                                 mostImpAtt, attributes),
                                             examples)  #recursion
         branchesRight = decisionTreeLearning(ExampleMajor,
                                              removeAttr(
                                                  mostImpAtt, attributes),
                                              examples)  #recursion
         tree.addLeft(threshold, branchesLeft)
         tree.addRight(threshold, branchesRight)
         return tree
Пример #9
0
 def decisionTreeLearning(examples, attributes, parents_examples=()):
     if len(examples) == 0:
         return pluralityValue(
             parents_examples
         )  #ritorna la piu frequente classificazione tra gli examples
     elif allSameClass(examples):
         return DecisionTree.Leaf(
             examples[0][dataset.target]
         )  #se tutti hanno la stessa classe ritorna la classe del primo esempio
     elif len(attributes) == 0:
         return pluralityValue(
             examples
         )  #ritorna la piu frequente classificazione tra gli esempi
     else:
         if ce == 0:
             mostImpAtt, threshold = chooseAttribute(attributes, examples)
         else:
             mostImpAtt, threshold = chooseAttribute2(attributes, examples)
         tree = DecisionTree.DecisionTree(mostImpAtt, threshold,
                                          dataset.attrnames[mostImpAtt])
         ExampleMinor, ExampleMajor = splittingOnThreshold(
             mostImpAtt, threshold,
             examples)  #separazione basata sulla soglia
         #fa la ricorsione ed aggiunge all albero
         branchesLeft = decisionTreeLearning(ExampleMinor,
                                             removeAttr(
                                                 mostImpAtt, attributes),
                                             examples)  #ricorsione
         branchesRight = decisionTreeLearning(ExampleMajor,
                                              removeAttr(
                                                  mostImpAtt, attributes),
                                              examples)  #ricorsione
         tree.addLeft(threshold, branchesLeft)
         tree.addRight(threshold, branchesRight)
         return tree
Пример #10
0
def predict(filename):
    """
        调用决策树
    """
    tree,score=DecisionTree.build_decision_tree(filename)
    while True:
        domain=domains.get(timeout=30)
        predict=DecisionTree.predict(domain,tree)
        predicts.put([domain,predict])
Пример #11
0
def main(argv):
    if len(argv) != 3:
        print "incorrect input format"
        return

    train_file = argv[1]
    test_file = argv[2]
    forest = train_random_forest(DT.load_data(train_file))
    test_random_forest(DT.load_data(test_file), forest)
Пример #12
0
def binUsers(syn_pop_file):
	bnd_pop_file = bnd_syn_pop
	DecisionTree.classify(syn_pop_file, bnd_pop_file)
	
	csv_to_list = []
	with open(bnd_pop_file, 'r') as f:
		csv_to_list = [{k: int(v) for k, v in row.items()}
			for row in csv.DictReader(f, skipinitialspace=True)]
	
		return csv_to_list
Пример #13
0
 def fit(self, X, Y):
     N = len(X)
     d = np.int(len(X[0]) * 0.5)
     for i in range(N):
         print("Progress:", i, "of ", N)
         sel = np.random.choice(len(X), size=len(X), replace=True)
         Xb, Yb = X[sel], Y[sel]
         model = DecisionTree()
         model.fit(Xb, Yb, d)
         self.models.append(model)
Пример #14
0
    def _rec_build_random_tree(training_data_cut, rec_count):
        # increase recursion count by 1
        rec_count += 1

        # find the feature to split the data that provides greatest information gain from a random sample
        # returns tuple ((feature_name, feature_index), (fc_has_vote, sc_has_vote), (fc_has_not_vote, sc_has_not_vote))
        feature_and_votes = _find_best_sampled_feature(training_data_cut)

        # if training data falls below a preset threshold or the vote is unanimous build a Leaf node;
        # otherwise split data on feature and build a Tree node; also enforce a recursion limit
        fc_has_vote = feature_and_votes[1][0]
        sc_has_vote = feature_and_votes[1][1]
        fc_has_not_vote = feature_and_votes[2][0]
        sc_has_not_vote = feature_and_votes[2][1]

        # length of training data cut
        cut_length = len(training_data_cut)

        # build left (has feature) branch
        if cut_length < _leaf_threshold or fc_has_vote == 0 or sc_has_vote == 0 or rec_count > _rec_limit:
            # build Leaf based on votes
            left_branch = DecisionTree.Leaf((fc_has_vote, sc_has_vote))
        else:
            # split out and build Tree
            has_feature_data = []
            for tree_row in training_data_cut:
                # add 2 to feature index to skip RECORD and CLASS columns
                feature_index = feature_and_votes[0][1] + 2
                if tree_row[feature_index]:
                    has_feature_data.append(tree_row)

            # recurse into the left branch building the tree of data that has feature
            left_branch = _rec_build_random_tree(has_feature_data, rec_count)

        # build right (has not feature) branch
        if cut_length < _leaf_threshold or fc_has_not_vote == 0 or sc_has_not_vote == 0 or rec_count > _rec_limit:
            # build Leaf based on votes
            right_branch = DecisionTree.Leaf((fc_has_not_vote, sc_has_not_vote))
        else:
            # split out and build Tree
            has_not_feature_data = []
            for tree_row in training_data_cut:
                # add 2 to feature index to skip RECORD and CLASS columns
                feature_index = feature_and_votes[0][1] + 2
                if not tree_row[feature_index]:
                    has_not_feature_data.append(tree_row)

            # recurse into the right branch building the tree of data without feature
            right_branch = _rec_build_random_tree(has_not_feature_data, rec_count)

        # build tree with splitting feature name and index, and the left and right branches
        feature_name_index = feature_and_votes[0]
        random_tree = DecisionTree.Tree(feature_name_index, left_branch, right_branch)

        return random_tree
Пример #15
0
def buildtree(x,y, samples, min_node=1, result_cur = None):
    if type(x) != np.ndarray:
        x = np.array(x)
    if type(y) != np.ndarray:
        y = np.array(y)
    if type(samples) != np.ndarray:
        samples = np.array(samples)
    if len(samples) == 0:
        return DTme.decisionnode()
    ## transform old rank to new rank form
    if y.ndim == 2:
        # rank_old form #
        y = y.tolist()
        temp = map(rankO2New, y)
        y = np.array(temp)


    if result_cur is None:
        result_cur = MM(y[samples])

    if len(samples)<= min_node:
        return DTme.decisionnode(result=result_cur[1])
    # find best split
    best_gain = 0.0
    best_split = []
    best_sets = []
    best_sets_result = []

    N_feature = x.shape[1]
    start = datetime.now() ### test
    for feature in range(N_feature):
        # nlogn selection
        min_var, split, sets, sets_result = bestSplit(x,y,samples,feature)
        if min_var is None:
            continue
        gain = result_cur[0] - min_var
        # print "feature: ", feature, "gain: ", gain, "result_cur: ", result_cur, "min_var: ", min_var ### test
        if gain > best_gain and len(sets[0]) * len(sets[1]) > 0:
            best_gain = gain
            best_split = split
            best_sets = sets
            best_sets_result = sets_result
    duration = datetime.now() - start ### test
    print "Nsamps: ", len(samples)
    print "duration: ", duration.total_seconds()

    if best_gain > 0:
        tb = buildtree(x,y, best_sets[0], min_node = min_node, result_cur = best_sets_result[0])
        fb = buildtree(x,y, best_sets[1], min_node = min_node, result_cur = best_sets_result[1])
        return DTme.decisionnode(feature = best_split[0], value = best_split[1], result = result_cur[1],
                            tb = tb, fb = fb,
                            gain = (tb.gain+fb.gain+best_gain), size_subtree = (tb.size+fb.size))
    else:
        return DTme.decisionnode(result = result_cur[1])
Пример #16
0
def predictInstance(input):

    instance = input[0]
    trees = input[1]
    func_predict = lambda x: DecisionTree.predict(instance, x)
    prediction = map(func_predict, trees)
    #print(prediction)
    summarizeVoting = DecisionTree.labelCounts(prediction)
    #print(summarizeVoting)
    #print(max(summarizeVoting, key = summarizeVoting.get))
    return max(summarizeVoting, key=summarizeVoting.get)
def createPlot(inTree):
    fig = plt.figure(1, facecolor='white')
    fig.clf()
    axprops = dict(xticks=[], yticks=[])
    createPlot.ax1 = plt.subplot(111, frameon=False, **axprops)
    plotTree.totalW = float(DecisionTree.getNumLeafs(inTree))
    plotTree.totalD = float(DecisionTree.getTreeDepth(inTree))
    plotTree.xOff = -0.5 / plotTree.totalW
    plotTree.yOff = -1.0
    plotTree(inTree, (0.5, 1.0), '')
    plt.show()
Пример #18
0
def main():

    opts = util.parse_args()
    train_partition = util.read_arff(opts.train_filename, True)
    test_partition = util.read_arff(opts.test_filename, False)

    # create an instance of the DecisionTree class from the train_partition
    tree = DecisionTree(train_partition, (vars(opts)).get("depth"))
    rootnode = tree.constructsubtree(train_partition,
                                     (vars(opts)).get("depth"), 0)

    #print text representation of the DecisionTree
    tree.printtree(rootnode)
Пример #19
0
 def fit(self, data, label):
     num_samples, total_features = data.shape
     for tree_num in range(self.num_trees):
         # print("TREE:", tree_num)
         random_rows = np.random.randint(0, num_samples, num_samples)
         random_features = np.random.choice(total_features,
                                            self.num_features,
                                            replace=False)
         random_data = data[random_rows, :][:, random_features]
         random_labels = label[random_rows]
         dt = DecisionTree(self.max_depth, self.min_obs)
         dt.fit(random_data, random_labels)
         self.trees += [(random_features, dt)]
 def train(self, records, attributes):
     """Create subsample for each tree """
     self.tree_num = int(self.tree_num)
     for number in range(self.tree_num):
         self.forest.append(self.bootstrap(records))
     half = len(attributes)/2
     index = 1
     for tree in self.forest:
         dt = DecisionTree()
         print("Create TREE %d\n"%(index))
         attributes = self.shuffle_attributes(attributes, half)
         dt.train_random_forest(tree, attributes)
         self.get_tree.append(dt)
         index += 1
Пример #21
0
def main():
    training_data, features, classes = ingest_training_data(sys.argv[1])

    #print "Prior training data:"
    #print training_data

    #print training_data
    dt = DecisionTree(training_data, features, classes)

    #print "All training data:"
    #print dt.training_data
    #print "\n"
    
   # print dt.get_possible_values('mpg')
   # print dt.get_fixed_value_subset('mpg', 'OK')
   # print dt.get_class_counts(dt.training_data)
   # print dt.entropy_of_subset(dt.training_data)
   # for f in dt.features:
   #     print f + " " + str(dt.information_gain(f))
   # #dt.bin_values_median('displacement')
   # #dt.bin_values_quartile('displacement')
   # print dt.training_data[0]
   # print dt.features
   # for f in dt.features:
   #     print f + ": " + str(len(dt.get_possible_values(f)))
   # print dt.get_possible_values('displacement')
    
        
    dt.build_tree_id3()
    #display(dt) 

    testing_data, testing_features, testing_classes = ingest_training_data(sys.argv[2])
    print testing_data[0]
    bin_quartile(testing_data)
    print testing_data[0]

    

    num_test_instances = len(testing_data)
    num_correct = 0
    for t in testing_data:
        print "Actual class: " + str(t['class'])
        predicted_class = dt.classify(t)
        print "Predicted class: " + str(predicted_class)
        if predicted_class == t['class']:
            num_correct += 1

    accuracy = float(num_correct) / num_test_instances
    print "Accuracy = " + str(accuracy)
Пример #22
0
def decisionTreeLearning_test():
    """"Function to test the general decision tree learning function."""

    #create dataset
    filename = "/home/jorge/Documents/2-Programming/AI/DecisionTree/Dataset/restaurant.txt"
    dataset = DT.Dataset(filename, _delimiter='\t')

    Tree = DT.DecisionTree(dataset)

    #show first branch of decisition tree shown in page 702
    # print(Tree.root)
    # print(Tree.leaf)

    #return tree for classify function next
    return Tree
def lensesTest2():
    file = open('lenses.txt', 'r')
    dataSet = [inst.strip().split('\t') for inst in file.readlines()]
    labels = ['age', 'prescript', 'astigmatic', 'tearRate']  #特征标签

    print('香农熵为:', DecisionTree.calcShannonEnt(dataSet))
    print("最优特征索引值:" + str(DecisionTree.chooseBestFeatureToSplit(dataSet)))

    myTree = DecisionTree.createTree(dataSet, labels)
    print(myTree)

    testVec = ['normal', 'no', 'presbyopic', 'myope']
    finalFeatLabels = ['tearRate', 'astigmatic', 'age', 'prescript']
    result = DecisionTree.classify(myTree, finalFeatLabels, testVec)
    print("预测结果为" + result)
Пример #24
0
def dt_learn(dataset, attrs, parent_dist=None):
    if not dataset:
        return Dt.Leaf(parent_dist.get_most_common())
    dist = Distribution(dataset)
    if dist.is_leaf() or not attrs:
        return Dt.Leaf(dist.get_most_common())
    else:
        attr = max_gain(dataset, dist, attrs)
        tree = Dt.Node(attr)
        for v in attr.domain:
            dv = [d for d in dataset if d.x[attr.index] == v]
            child_attrs = [a for a in attrs if a != attr]
            subtree = dt_learn(dv, child_attrs, dist)
            tree.add_child(subtree, v)
        return tree
Пример #25
0
 def fit(self, X_train, y_train, is_continuous):
     '''
     拟合数据
     输入X,y
     调用函数:决策树
     '''
     self.trees = []
     m = X_train.shape[0]
     for _ in range(self.n_trees):
         indices = np.random.choice(m, m, replace=True)
         X_train_i = X_train.iloc[indices, :]
         y_train_i = y_train.iloc[indices]
         tree = DecisionTree(self.criterion, self.max_high)
         tree.fit(X_train_i, y_train_i, is_continuous)
         self.trees.append(tree)
Пример #26
0
def metrices(train_data, test_data, classes):
    actual_labels = []
    predict_labels = []
    for method in range(2):
        if method == 0:
            print('Decision Tree:')
            max_depth = DecisionTree.parameters(train_file)
            for sets in ['train_set', 'test_set']:
                if sets == 'train_set':
                    print('training set')
                    actual_labels = [row[-1] for row in train_data]
                    predict_labels = DecisionTree.decision_tree(
                        train_data, train_data, max_depth, 1)

                if sets == 'test_set':
                    print('testing set')
                    actual_labels = [row[-1] for row in test_data]
                    predict_labels = DecisionTree.decision_tree(
                        train_data, test_data, max_depth, 1)

                total, accuracy, conf_matrix = DecisionTree.conf_matrix(
                    actual_labels, predict_labels, classes)
                print('overall accuracy: {}'.format(accuracy))
                print_metrices(conf_matrix, total)
                continue
        else:
            print('Random Forest:')
            max_depth = len(train_data[0]) - 1
            F_features, tree_number = RandomForest.parameters(train_file)
            for sets in ['train_set', 'test_set']:
                if sets == 'train_set':
                    print('training set')
                    actual_labels = [row[-1] for row in train_data]
                    predict_labels = RandomForest.random_forest(
                        train_data, train_data, max_depth, 1, 1, tree_number,
                        F_features)
                if sets == 'test_set':
                    print('testing set')
                    actual_labels = [row[-1] for row in test_data]
                    predict_labels = RandomForest.random_forest(
                        train_data, test_data, max_depth, 1, 1, tree_number,
                        F_features)
                total, accuracy, conf_matrix = RandomForest.conf_matrix(
                    actual_labels, predict_labels, classes)
                print('overall accuracy: {}'.format(accuracy))
                print_metrices(conf_matrix, total)

    return
Пример #27
0
    def generateRules(self, randomCharacterBeingSentSomehow):

        print"generateRules"

        reduced_mdp_list = []
        attributes = []
        self.mdp_list[-1] = [self.combineIdenticalMDPs(self.mdp_list[-1])]
        self.writeToList(self.mdp_list[-1][-1])

        training_set = self.StateActionPairs

        attr_shape = ("cube", "prism", "cuboid")
        attr_colour = ("red", "blue", "green")
        attr_size = ("small","medium","large")
        attribute_dict = [("has_shape(A,", attr_shape), ("has_colour(A, ", attr_colour), ("has_size(A, ", attr_size),
                            ("has_shape(D, ", attr_shape), ("has_colour(D, ",attr_colour), ("has_size(D, ", attr_size)]
        attribute_dict = OrderedDict(attribute_dict)
        index = 0
        names = attribute_dict.keys()
        values = attribute_dict.values()
        for name, vals in zip(names, values):
            attributes.append(Attribute(name, index, vals))
            index += 1
        self.decision_tree = DecisionTree(attributes, training_set)
        rules = self.decision_tree.getRules()
        rules = self.selectRules(rules)
        print ""
        print rules
        return rules
Пример #28
0
def main():
    # # single data file
    file_name, target_attribute = 'datasets/bool_or.csv', 'output'
    data, attributes = read_csv(file_name, parse_attributes=True)
    train_data, test_data = train_test_split(data, test_size=0.3, shuffle=True)  # split in train and test data

    # separate data files
    # train_file, test_file, target_attribute = 'datasets/VoteTraining.csv', 'datasets/Vote.csv', 'class'
    # train_file, test_file, target_attribute = 'datasets/WeatherTraining.csv', 'datasets/Weather.csv', 'play'
    # train_file, test_file, target_attribute = 'datasets/SoybeanTraining.csv', 'datasets/Soybean.csv', 'class'
    # train_data, attributes = read_csv(train_file, parse_attributes=True)
    # test_data = read_csv(test_file)

    print("%d training sample, %d testing samples" % (len(train_data), len(test_data)))

    # don't know where it comes from
    while [''] in train_data:
        train_data.remove([''])
    while [''] in test_data:
        test_data.remove([''])

    # Run ID3 to generate a tree
    m_tree = DecisionTree.make_tree(train_data, attributes, target_attribute, 0)

    a, b = info(m_tree)
    print("Decision Tree contains %d judging attributes" % a)
    DrawTree.DrawTree(m_tree)

    # tree and list of attributes
    count, miss, err = test(test_data=test_data, tree=m_tree, attributes=attributes, target_attribute=target_attribute)

    print("%d total test cases, %d missed, %d wrong" % (count, miss, err))
Пример #29
0
def main():
    print("Enter main()")
    #==========================================================================================
    # 決定木 [DecisionTree] の不純度 [purity] を表す関数の作図
    # ノードの誤り率 [eror rate], 交差エントロピー関数 [cross-entropy], ジニ係数 [Gini index] 
    #==========================================================================================
    tree = DecisionTree.DecisionTree()

    #-------------------------------
    # 不純度を表す関数群の plot
    #-------------------------------
    figure = plt.figure()
    axis = plt.subplot(1,1,1)
    plt.grid(linestyle='-')
    
    tree.plotNodeErrorFunction( figure, axis )
    tree.plotCrossEntropyFunction( figure, axis )
    tree.plotGiniIndexFunction( figure, axis )

    plt.title("purity functions (i=1)")     # title
    plt.legend(loc = "upper left")          # 凡例    
    plt.tight_layout()                      # グラフ同士のラベルが重ならない程度にグラフを小さくする。

    # 図の保存&表示
    plt.savefig("./DecisionTree_scikit-learn_1.png", dpi=300)
    plt.show()

    print("Finish main()")
    return
Пример #30
0
    def test_data_intake_regression(self):
        """
        Test the regression can intake the data in different formats and predict same result.
        """
        # Setup y data
        y_data_train = np.array([5, 6, 5, 1, 6, 7, 8, 6, 4, 0])
        y_data_true = np.array([5.3333333, 4, 0])

        y_series_train = pd.Series(y_data_train)
        y_matrix_train = np.asmatrix(y_data_train)

        # Regression tree
        regress_tree = DT.RegressionDecisionTree(split_type='rss',
                                                 leaf_terminate=1)

        # Test different inputs and assertions
        regress_tree.fit(self.x_df_train, y_series_train)
        self.assertEqual(
            list(np.round(regress_tree.predict(self.x_df_test), 6)),
            list(np.round(y_data_true, 6)))

        regress_tree.fit(self.x_matrix_train, y_matrix_train)
        self.assertEqual(
            list(np.round(regress_tree.predict(self.x_matrix_test), 6)),
            list(np.round(y_data_true, 6)))
Пример #31
0
 def __init__(self, train, n_trees, sample_leaf_limits, sample_ratio,
              chara_ratio):
     '''
     : __init__: 根据参数初始化随机森林,并根据训练集进行训练
     : note: 实现步骤可以直接参照李航的统计学习方法中的步骤依次进行实现
     : param train: 训练集,其中第一列为样本类别标签
     : type train: pd.Dataframe
     : param n_trees: 随机森林中的决策树个数
     : type n_trees: int
     : param sample_leaf_limits: 随机挑选的样本比例,范围在[0,1]
     : type sample_leaf_limits: float
     : param sample_ratio: 随机挑选的特征比例,范围在[0,1]
     : type chara_ratio: float
     '''
     self.forest = []
     fn = int(chara_ratio * (train.shape[1] - 1))
     for n in range(n_trees):
         temp1 = time.time()
         sf = np.random.choice(np.arange(1, train.shape[1]),
                               fn,
                               replace=False)
         sf = np.append(0, sf)
         train_n = train.iloc[:, sf]
         p = np.random.random_sample() * (1 - sample_ratio) + sample_ratio
         train_n = train_n.loc[np.random.choice(train_n.index,
                                                int(p * train_n.index.size),
                                                replace=False)]
         tree = DT.DecisionTree(train_n, sample_leaf_limits)
         self.forest.append(tree)
         temp2 = time.time()
         print('随机森林中的第%d棵树构造成功,耗时%f' % (n, temp2 - temp1))
Пример #32
0
 def __init__(self, T=10, M=30, bagging=False):
     self.t = T
     self.m = M
     self.bagging = bagging
     self.forest = map(lambda i: DecisionTree(), range(T))
     self.shape = None
     self.selected_attributes = list()
Пример #33
0
def main():
    #Insert input file
    file = open('dt-data.txt')
    #class attribute
    target = "Enjoy"
    data = [[]]
    for line in file:
        #cleaning the data
        line = line.strip("\r\n;:0123456789\t").replace(" ","")
        data.append(line.split(',') )
    data.remove([])
    attributes = data[0]
   # attributes = attributes.strip("()")
    attributes = [x.strip("()") for x in attributes]
   # need to pass only the non-target attributes without any attribute headers and that one empty line
    data.remove(data[0])
    data.remove(data[0])
   # Run ID3
    print "Generated decision tree"
    tree = DecisionTree.makeTree(data, attributes, target, 0)
    pp = pprint.PrettyPrinter(indent = 4, depth = 14)
    pp.pprint(tree)
    #Generate IF THEN rules
    print "if - then rules"
    pre = ""
    rulegen(tree, pre)
Пример #34
0
def evaluate_tree(decisionTree, test_set, all_classes):
    print("Evaluating Tree")
    list_tuples = []
    for t in range(len(test_set)):
        string = dt.evaluate_data(test_set[t], decisionTree)
        if string is None:
            print(
                "test #" + str(t) +
                " result: Unable to evaluate data, too much repetition on training set"
            )
        else:
            print("test #" + str(t) + " result: " +
                  "(Verdadeiro / Classificado) (" + test_set[t]["Class"] +
                  " / " + string + ")")
            tup = (test_set[t]["Class"], string)
            list_tuples.append(tup)

    if len(all_classes) == 2:
        precision, recall, f1 = performance_binary(list_tuples, all_classes)
        print("performance_binary:")
        print("precision:", str(precision))
        print("recall:", str(recall))
        print("f1:", str(f1))
    else:
        perf = performance_multiclass(list_tuples, all_classes)
        print("performance_multiclass:")
        print(perf)
Пример #35
0
def evaluate_forest(forest, test_set, all_classes):
    list_tuples = []
    for t in range(len(test_set)):
        string = dt.evaluate_forest(test_set[t], forest, all_classes)
        if string is None:
            pass
        else:
            tup = (test_set[t]["Class"], string)
            list_tuples.append(tup)

    pesos = {}
    str_resultado = "Votos :"
    for opcao in all_classes:
        pesos[opcao] = 0
        for t in list_tuples:
            if t[1] == opcao:
                pesos[opcao] += 1
        str_resultado += " | " + opcao + ": " + str(pesos[opcao])

    if len(all_classes) == 2:
        precision, recall, f1 = performance_binary(list_tuples, all_classes)
        return {
            "precision": precision,
            "recall": recall,
            "f1Score": f1,
        }
    else:
        perf = performance_multiclass(list_tuples, all_classes)
        return perf
Пример #36
0
def main():
    #Insert input file
    #file = open('train.csv')
    file = open('OutsideTraining.csv')
    target = "outside"
    data = [[]]
    for line in file:
        line = line.strip("\r\n")
        data.append(line.split(','))

    data.remove([])

    attributes = data[0]
    data.remove(attributes)
    #Run ID3
    tree = DecisionTree.makeTree(data, attributes, target, 0)
    print tree
    print "generated decision tree"
    #Generate program
    file = open('program.py', 'w')
    file.write("import Node\n\n")
    #open input file
    file.write("data = [[]]\n")
    """
    IMPORTANT: Change this file path to change testing data 
    """
    file.write("f = open('Outside.csv')\n")
    #gather data
    file.write("for line in f:\n\tline = line.strip(\"\\r\\n\")\n\tdata.append(line.split(','))\n")
    file.write("data.remove([])\n")
    #input dictionary tree
    file.write("tree = %s\n" % str(tree))
    file.write("attributes = %s\n" % str(attributes))
    file.write("count = 0\n")
    file.write("for entry in data:\n")
    file.write("\tcount += 1\n")
    #copy dictionary
    file.write("\ttempDict = tree.copy()\n")
    file.write("\tresult = \"\"\n")
    #generate actual tree
    file.write("\twhile(isinstance(tempDict, dict)):\n")
    file.write("\t\troot = Node.Node(tempDict.keys()[0], tempDict[tempDict.keys()[0]])\n")
    file.write("\t\ttempDict = tempDict[tempDict.keys()[0]]\n")
    #this must be attribute
    file.write("\t\tindex = attributes.index(root.value)\n")
    file.write("\t\tvalue = entry[index]\n")
    #ensure that key exists
    file.write("\t\tif(value in tempDict.keys()):\n")
    file.write("\t\t\tchild = Node.Node(value, tempDict[value])\n")
    file.write("\t\t\tresult = tempDict[value]\n")
    file.write("\t\t\ttempDict = tempDict[value]\n")
    #otherwise, break
    file.write("\t\telse:\n")
    file.write("\t\t\tprint \"can't process input %s\" % count\n")
    file.write("\t\t\tresult = \"?\"\n")
    file.write("\t\t\tbreak\n")
    #print solutions 
    file.write("\tprint (\"entry%s = %s\" % (count, result))\n")
    print "written program"
Пример #37
0
def run(searchForOptimal, basepath, filepath):
	sc = buildContext()

	trainingData, testData = loadData(sc, basepath, filepath)

	if searchForOptimal:
		optimalRandomForestModel = RandomForest.trainOptimalModel(trainingData, testData)
		Evaluation.evaluate(optimalRandomForestModel, testData, logMessage=True)

		optimalDecisionTreeModel = DecisionTree.trainOptimalModel(trainingData, testData)
		Evaluation.evaluate(optimalDecisionTreeModel, testData, logMessage=True)
	else:
		randomForestModel = RandomForest.trainModel(trainingData)
		Evaluation.evaluate(randomForestModel, testData, logMessage=True)

		decisionTreeModel = DecisionTree.trainModel(trainingData)
		Evaluation.evaluate(decisionTreeModel, testData, logMessage=True)
Пример #38
0
    def __init__(self, file):

        fin = open(file, "r")  # training set file
        lines = [line.strip() for line in fin.readlines()]
        lines.reverse()
        attributes = [attrib.strip() for attrib in lines.pop().split(",")]
        targetAttrib = attributes[-1]
        lines.reverse()
        
        # creating data dictionary
        data = []
        for line in lines:
            data.append(dict(zip(attributes, [datum.strip() for datum in line.split(",")])))

        # creating a decision tree based on training set
        self.tree = DecisionTree.createDecisionTree(data, attributes, targetAttrib, ID3.gain)
Пример #39
0
def randomForest(data, attributes, attributesType, target, depth, recursion, treeNum, epsilon = 0):
    # generate a random number
    # choose attr and data
    # generate trees
    trees = []
    attrNum = math.sqrt(len(attributes))
    epsilonPerTree = float(epsilon)/treeNum
    for i in range(0,treeNum):
        newData = genDataset(data)
        attrAndType = genAttrbutes(attributes, attributesType, attrNum, target)
        newAttributes = attrAndType['attr']
        newAttrTypes = attrAndType['type']
        newDataset = genNewDataset(newData, newAttributes, attributes)
        tree = DecisionTree.makeTree(newDataset, newAttributes, newAttrTypes, target, depth, recursion, epsilonPerTree)
        trees.append(tree)
    return trees
Пример #40
0
    def classify(self, customers):
        # creating test data
            
        table = []
        for i in range(len(customers)):
            collection = {}
            collection['Waiting'] = fuzzifyWaiting(customers[i][0])
            collection['Meal'] = fuzzifyMeal(customers[i][1])
            collection['Distance'] = fuzzifyDistance(customers[i][2])
            table.append(collection)

        # classyfing test data
        classification = DecisionTree.classify(self.tree,table)
        table1=[]
        for item in classification:
            table1.append(item)

        return table1
Пример #41
0
def main():
    file = open('pokemonTraining.csv')
    target = "class"
    data = [[]]
    for line in file:
        line = line.strip("\r\n")
        data.append(line.split(','))
    data.remove([])
    attributes = data[0]
    data.remove(attributes)
    tree = DecisionTree.makeTree(data, attributes, target, 0)
    print "generated decision tree"
    file = open('program.py', 'w')
    file.write("import Node\n\n")
    file.write("data = [[]]\n")
    file.write("f = open('pokemon.csv')\n")
    file.write("output = open('result', 'w')")
    file.write("first_line = f.readline()")
    file.write("second_line = f.readline()")
    file.write("for line in f:\n\tline = line.strip(\"\\r\\n\")\n\tdata.append(line.split(','))\n")
    file.write("data.remove([])\n")
    file.write("tree = %s\n" % str(tree))
    file.write("attributes = %s\n" % str(attributes))
    file.write("count = 0\n")
    file.write("for entry in data:\n")
    file.write("\tcount += 1\n")
    file.write("\ttempDict = tree.copy()\n")
    file.write("\tresult = \"\"\n")
    file.write("\twhile(isinstance(tempDict, dict)):\n")
    file.write("\t\troot = Node.Node(tempDict.keys()[0], tempDict[tempDict.keys()[0]])\n")
    file.write("\t\ttempDict = tempDict[tempDict.keys()[0]]\n")
    file.write("\t\tindex = attributes.index(root.value)\n")
    file.write("\t\tvalue = entry[index]\n")
    file.write("\t\tif(value in tempDict.keys()):\n")
    file.write("\t\t\tchild = Node.Node(value, tempDict[value])\n")
    file.write("\t\t\tresult = tempDict[value]\n")
    file.write("\t\t\ttempDict = tempDict[value]\n")
    file.write("\t\telse:\n")
    file.write("\t\t\tprint \"can't process input %s\" % count\n")
    file.write("\t\t\tresult = \"?\"\n")
    file.write("\t\t\tbreak\n")
    file.write("\toutput.write(\"%s\" % (result))\n")
    print "written program"
Пример #42
0
class LearningModule:
    def __init__(self):
        self.mdp_list = []
        self.success_config = []
        self.decision_tree = None
        self.StateActionPairs= []

        src_error = rospy.Service('LMErrorHasOccured',LMErrorHasOccured, self.errorHandle)
        src_rules = rospy.Service('LMGenerateRules', LMGenerateRules, self.generateRules)
        srv_state = rospy.Service('LMInitialise', LMInitialise, self.initialise_mdp)
        srv_state = rospy.Service('LMNewBlocks', LMNewBlocks, self.newBlocks)
        srv_action = rospy.Service('LMStateActionTaken', LMStateActionTaken, self.onPolicyLearning)

        # initialise
        self.mdp_list.append([])


    def initialise_lists(self):
        self.success_config.append([])

    def initialise_mdp(self, state):
        try:
            blocks = []
            for prop in state.initial_state.block_properties:
                blocks.append(Block(prop.label, prop.shape, prop.colour, prop.size))
            start_config = state.initial_state.configuration.config
            startingState = State(0, start_config)
            self.initialise_lists()
            self.success_config[-1].append(startingState)
            label = len(self.mdp_list[-1])
            print ""
            print label
            print ""
            mdp = MDP(label, blocks)
            mdp.statelist.append(startingState)
            mdp.initMDP(startingState)
            self.mdp_list[-1].append(mdp)
            print "MDP initialised"
            return True
        except:
            return False

    def newBlocks(self, blockSet):
        try:
            # combine MDPS
            self.mdp_list[-1] = self.combineIdenticalMDPs(self.mdp_list[-1])
            # add combined MDPs state action pairs to the list!
            self.writeToList(self.mdp_list[-1][-1])
            # start new layer
            self.new_layer()
            return True
        except:
            return False

    def new_layer(self):
        try:
            self.mdp_list.append([])
            return True
        except:
            return False


    def combineIdenticalMDPs(self, mdp_list):
        print "combining"
        sum_distance = [[0.0 for i in range(0,len(mdp_list[0].getStateList()))] for j in range(0,len(mdp_list[0].getStateList()))]
        weighted_average = [[0.0 for i in range(0,len(mdp_list[0].getStateList()))] for j in range(0,len(mdp_list[0].getStateList()))]
        for mdp in mdp_list:
            for i, row in enumerate(mdp.getDistanceMatrix()):
                for j, distance in enumerate(row):
                    if distance > 0:
                        sum_distance[i][j] += 1/distance
        for mdp in mdp_list:
            for i, row in enumerate(mdp.getDistanceMatrix()):
                for j, distance in enumerate(row):
                    if distance > 0.0 and sum_distance[i][j] > 0.0:
                        weight = (1/distance)/(sum_distance[i][j])
                        weighted_average[i][j] += weight*mdp.getQMatrix()[i][j]

        newMDP = deepcopy(mdp_list[0])
        newMDP.setQMatrix(weighted_average)
        return newMDP

    def findState(self, config, mdp):
        for state in mdp.getStateList():
            if state.getConfiguration() == config:
                return state

    def errorHandle(self, action_chosen):
        # try:
        print "errr"
        action_chosen = action_chosen.action_chosen
        actionableBlock = int(re.findall('\d+$', action_chosen.actionableBlock)[0])
        destinationBlock = int(re.findall('\d+$', action_chosen.destinationBlock)[0])
        action_block = actionableBlock
        dest_block = destinationBlock
        action_chosen = None
        for action in self.mdp_list[-1][-1].getErrorState().getActions():
            if action.getActionableBlock() == action_block:
                if action.getDestinationBlock() == dest_block:
                    action_chosen = action

        self.mdp_list[-1][-1].onPolicyLearning(action_chosen)
        error_config = self.mdp_list[-1][-1].getErrorState()

        print self.success_config[-1]

        self.mdp_list[-1][-1].simulation(error_config, self.success_config[-1])
        return True
        # except:
        #     print "OMGMMMM"
        #     return False

    def onPolicyLearning(self, action):
        # try:
        """ This will be the callback function"""
        actionableBlock = int(re.findall('\d+$',action.action_chosen.actionableBlock)[0])
        if(re.findall('tab',action.action_chosen.destinationBlock)):
            print "###############TABLE################"
            destinationBlock = None
        else:
            destinationBlock = int(re.findall('\d+$',action.action_chosen.destinationBlock)[0])

        action_chosen = None

        for action in self.mdp_list[-1][-1].errorstate.actions:
            print action.actionableBlock
            print action.destinationBlock
            if(actionableBlock == action.actionableBlock) and (destinationBlock == action.destinationBlock):
                action_chosen = action


        self.mdp_list[-1][-1].onPolicyLearning(action_chosen)
        config = self.mdp_list[-1][-1].getErrorState()
        self.success_config[-1].append(config)
        return True

    def writeToList(self, mdp):
        blocks = mdp.getBlocks()
        for state in mdp.getStateList():
            for action in state.getActions():
                action_block = action.getActionableBlock()
                dest_block = action.getDestinationBlock()
                if dest_block == None:
                    example = (blocks[action_block].getShape(), blocks[action_block].getColour(), blocks[action_block].getSize(),
                               mdp.getQMatrix()[state.getLabel()][action.getNextStateAddr()])
                else:
                    example = (blocks[action_block].getShape(),
                               blocks[action_block].getColour(),blocks[action_block].getSize(), blocks[dest_block].getShape(),
                               blocks[dest_block].getColour(),blocks[dest_block].getSize(),
                               mdp.getQMatrix()[state.getLabel()][action.getNextStateAddr()])
                self.StateActionPairs.append(example)
        return

    def generateRules(self, randomCharacterBeingSentSomehow):

        print"generateRules"

        reduced_mdp_list = []
        attributes = []
        self.mdp_list[-1] = [self.combineIdenticalMDPs(self.mdp_list[-1])]
        self.writeToList(self.mdp_list[-1][-1])

        training_set = self.StateActionPairs

        attr_shape = ("cube", "prism", "cuboid")
        attr_colour = ("red", "blue", "green")
        attr_size = ("small","medium","large")
        attribute_dict = [("has_shape(A,", attr_shape), ("has_colour(A, ", attr_colour), ("has_size(A, ", attr_size),
                            ("has_shape(D, ", attr_shape), ("has_colour(D, ",attr_colour), ("has_size(D, ", attr_size)]
        attribute_dict = OrderedDict(attribute_dict)
        index = 0
        names = attribute_dict.keys()
        values = attribute_dict.values()
        for name, vals in zip(names, values):
            attributes.append(Attribute(name, index, vals))
            index += 1
        self.decision_tree = DecisionTree(attributes, training_set)
        rules = self.decision_tree.getRules()
        rules = self.selectRules(rules)
        print ""
        print rules
        return rules

    def selectRules(self, rules):
        """ Select the best rules """
        """ Think about doing it using SVM"""
        rules = sorted(rules, key=operator.itemgetter(-1))
        q_val = []
        for index, rule in enumerate(rules):
            q_val.append([index, rule[-1]])
        whitened = whiten(q_val)
        centroids,_ = kmeans(whitened, 3, thresh = 1,iter = 100)
        ids,_= vq(whitened, centroids)
        key = ids[-1]
        indices = []
        for index, keys in enumerate(ids):
            if key == keys:
                indices.append(index)
        valid_rules = []
        for index in indices:
            valid_rules.append(rules[index][0])
        return self.parseRules(valid_rules)

    def parseRules(self, rules):
        valid_rules = []
        for rule in rules:
            sentence = ""
            for segment in rule:
                sentence = sentence + segment + ", "
            sentence = sentence[:-2]
            valid_rules.append(sentence)
        return Rules(rule = valid_rules)

    def reduceMDP(self,errorconfig, stack_config, start_config, blocks):
        mdp_list = []
        for i in range(0, len(errorconfig)):
            mdp_list.append(MDP(i, blocks))
            startingState = State(0, start_config)
            mdp_list[i].statelist.append(startingState)
            mdp_list[i].initMDP(startingState)
            errorstate = self.findState(errorconfig[i], mdp_list[i])
            stackstate = []
            for j in range(0,len(stack_config)):
                stackstate.append(self.findState(stack_config[j], mdp_list[i]))
            mdp_list[i].simulation(errorstate, stackstate)
            mdp_list[i].updateDistanceMatrix(errorstate)

        reduced_mdp = self.combineIdenticalMDPs(mdp_list)
        return reduced_mdp
Пример #43
0
    "-v", "--validate", type="string", dest="validate", default="bcan.validate", help="Validation Data File"
)
parser.add_option("-s", "--stopping_parameter", type="int", dest="stop", default=1, help="Stopping Parameter")
(options, args) = parser.parse_args()

print "Loading\n...'%s' as training set,\n...'%s' as test set,\n...'%s' as validation set,\n...stopping parameter %d..." % (
    options.train,
    options.test,
    options.validate,
    options.stop,
)

training_set = load_data(options.train)
test_set = load_data(options.test)
validation_set = load_data(options.validate)
s_para = options.stop

# dt = DecisionTree(training_set, validation_set, stopping_parameter=s_para)
dt = DecisionTree(training_set, stopping_parameter=1)
print "test set: {} training_error : {} validation_error: {}".format(
    dt.prediction_error(test_set), dt.training_error(), dt.prediction_error(validation_set)
)
print dt.count_nodes()

dt = dt.post_prune(validation_set)
print "test set: {} training_error : {} validation_error: {}".format(
    dt.prediction_error(test_set), dt.training_error(), dt.prediction_error(validation_set)
)
print dt.count_nodes()
print dt.print_tree()
Пример #44
0
def train_r(records, attributes, sqm, depth):
    """Recursive call of the train function.

    Use the given records and train a tree of maximal given depth,
    using sqm attributes among all the possible given attributes.

    The recursion stops when all records are the same, or have the same
    label, or the maximal depth is reached."""

    if records.label_monotone or records.monotone or depth == 0:
        return Decision(records.mode)

    chosen_attributes = []
    attributes_with_no_split = 0

    # this loop ensure that we select attributes with distinct values.
    while len(chosen_attributes) == attributes_with_no_split:
        # select randomly sqm elements
        chosen_attributes = [attributes[randint(0, len(attributes)-1)] for i in xrange(sqm)]
        # repeat selection as long as at least one feature appears twice
        while len(list(set(chosen_attributes))) != len(chosen_attributes):
            chosen_attributes = [attributes[randint(0, len(attributes)-1)] for i in xrange(sqm)]

        best_gain = -1
        former_best = None
        best_split = None

        best_index = None
        best_range = None
        is_numerical = None

        attributes_with_no_split = 0
        for criteria in chosen_attributes:
            splits = generate_splits( records, criteria )
            if len(splits) == 0:
                # there is no splits when all values of the feature
                # are the same
                attributes_with_no_split += 1

            for s in splits:
                gain = s.gain
                if best_gain < gain:
                    former_best = best_split
                    best_split = s
                    best_gain = gain
                    if former_best is not None:
                        del former_best
            del splits

    s = best_split
    decision_tree = DecisionTree( s.feature_index, s.feature_range, s.is_numerical )

    if s.left.size == 0 or s.right.size == 0:
        del s
        return Decision( records.mode )

    depth -= 1
    decision_tree.right = train_r( s.right, attributes, sqm, depth )
    decision_tree.left  = train_r( s.left, attributes, sqm, depth )
    del s

    return decision_tree
Пример #45
0
def learn(dataset, pruneFlag, maxDepth):
    tree = DecisionTree.makeTree(dataset, 9, [0, 1, 2, 3, 4, 5, 6, 7, 8], -1, maxDepth)
    return tree
Пример #46
0
from optparse import OptionParser
from Loaders import *
from DecisionTree import *
	
# Options
parser = OptionParser()
parser.add_option("-r", "--train", type="string", dest="train", default="bcan.train", help="Training Data File")
parser.add_option("-t", "--test", type="string", dest="test", default="bcan.test", help="Test Data File")
parser.add_option("-v", "--validate", type="string", dest="validate", default="bcan.validate", help="Validation Data File")
parser.add_option("-s", "--stopping_parameter", type="int", dest="stop", default=1, help="Stopping Parameter")
(options, args) = parser.parse_args()

print "Loading\n...'%s' as training set,\n...'%s' as test set,\n...'%s' as validation set,\n...stopping parameter %d..." % (options.train, options.test, options.validate, options.stop)

training_set = load_data(options.train)
test_set = load_data(options.test)
validation_set = load_data(options.validate)
s_para = options.stop

dt = DecisionTree(training_set, stopping_parameter=s_para)
print dt.prediction_error(test_set)
print dt.training_error()
print dt.count_nodes()
def id3(trainfilename,testfilename,originalValuefilename):
	trainingFile = open(trainfilename)
	"""
	IMPORTANT: Change this variable too change target attribute
	"""
	target_attribute = "Close"
	data = [[]]
	for line in trainingFile:
		line = line.strip("\r\n")
		data.append(line.split(','))
	data.remove([])
	attributes = data[0]
	data.remove(attributes)
	#Run ID3
	tree = DecisionTree.makeTree(data, attributes, target_attribute, 0)
	#print "generated decision tree"

	data = [[]]
	testFile = open(testfilename)
	for line in testFile:
		line = line.strip("\r\n")
		data.append(line.split(','))
	data.remove([])
	#tree = str(tree)
	#tree = "%s\n" % str(tree)
	attributes = ['Open', 'High', 'Low', 'Close']
	prediction = []
	count = 0

	for entry in data:
		count += 1
		tempDict = tree.copy()
		result = ""
		while(isinstance(tempDict, dict)):
			root = Node.Node(tempDict.keys()[0], tempDict[tempDict.keys()[0]])
			tempDict = tempDict[tempDict.keys()[0]]
			index = attributes.index(root.value)
			value = entry[index]
			if(value in tempDict.keys()):
				child = Node.Node(value, tempDict[value])
				result = tempDict[value]
				tempDict = tempDict[value]
			else:
				result = recheck.some_func(value,trainfilename,testfilename)
				break
		#print ("entry%s = %s" % (count, result))
		prediction.append(result)
	#showinfo("ID3 Algorithm","Predictions are done"+str(prediction[0]))


	total_predictions = len(prediction)
	predicted_2 = []
	i = 0
	while i < total_predictions:
		temp = float(prediction[i])
		predicted_2.append(temp)
		i = i+1
	#showinfo("ID3 Algorithm","Predictions are done"+str(temp)+str(type(temp)))
	open_values = gettingOriginalOpenValues(originalValuefilename)
	original_close_values = gettingOriginalCloseValues(originalValuefilename)
	#print open_values
	#print original_close_values
	# print predicted_2
	# plotting
	plt.title("Results for given dataset using ID3 Algorithm")
	plt.plot(open_values,predicted_2,'g.',markersize=np.sqrt(150.),label ='ID3 Prediction')
	plt.plot(open_values,original_close_values,'b.',markersize=np.sqrt(100.),label = 'Orignial Values')
	plt.legend(loc='upper left')
	plt.xlabel("Open Values")
	plt.ylabel("Close Values")
	plt.grid()
	#plt.show()
	fig = plt.gcf()
	fig.set_size_inches(8, 4)
	ax=plt.subplot(111)
	# Shrink current axis's height by 10% on the bottom
	box = ax.get_position()
	ax.set_position([box.x0, box.y0 + box.height * 0.1,box.width, box.height * 0.9])

	# Put a legend below current axis
	ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.09),fancybox=True, shadow=True, ncol=5,fontsize="10")
	fig.savefig('test_result_id3.jpg', dpi=100)
	#showinfo("Naive Bayes Algorithm","Plotting Completed")'''


	x = Image.open("E:\\4.2\Final Year Project\Code\Complete Project\\test_result_id3.jpg")
	y = ImageTk.PhotoImage(x)

	label6 = Label(image=y)
	label6.image = y
	label6.place(x=50, y=290)

	#Generate program
	'''
	file = open('program.py', 'w')
	file.write("import Node\n\n")
	file.write("import recheck\n\n")
	#open input file
	file.write("data = [[]]\n")
	"""
	IMPORTANT: Change this file path to change testing data
	"""
	file.write("f = open('AAPLTest.csv')\n")
	#gather data
	file.write("for line in f:\n\tline = line.strip(\"\\r\\n\")\n\tdata.append(line.split(','))\n")
	file.write("data.remove([])\n")
	#input dictionary tree
	file.write("tree = %s\n" % str(tree))
	file.write("attributes = %s\n" % str(attributes))
	file.write("prediction = []\n")
	file.write("count = 0\n")
	file.write("for entry in data:\n")
	file.write("\tcount += 1\n")
	#copy dictionary
	file.write("\ttempDict = tree.copy()\n")
	file.write("\tresult = \"\"\n")
	#generate actual tree
	file.write("\twhile(isinstance(tempDict, dict)):\n")
	file.write("\t\troot = Node.Node(tempDict.keys()[0], tempDict[tempDict.keys()[0]])\n")
	file.write("\t\ttempDict = tempDict[tempDict.keys()[0]]\n")
	#this must be attribute
	file.write("\t\tindex = attributes.index(root.value)\n")
	file.write("\t\tvalue = entry[index]\n")
	#ensure that key exists
	file.write("\t\tif(value in tempDict.keys()):\n")
	file.write("\t\t\tchild = Node.Node(value, tempDict[value])\n")
	file.write("\t\t\tresult = tempDict[value]\n")
	file.write("\t\t\ttempDict = tempDict[value]\n")
	#otherwise, break
	file.write("\t\telse:\n")


	#file.write("\t\t\t#print \"can't process input %s\" % count\n")
	file.write("\t\t\tresult = recheck.some_func(value)\n")
	file.write("\t\t\tbreak\n")
	#print solutions
	file.write("\t#print (\"entry%s = %s\" % (count, result))\n")
	file.write("\tprediction.append(result)\n")
	print "written program"
	'''

	result=accuracy_calculation(original_close_values,predicted_2)
	return result
def completeExexution(trainfilename,testfilename,originalValuefilename):

	#id3 algorithm
	trainingFile = open(trainfilename)
	target_attribute = "Close"

	data = [[]]
	for line in trainingFile:
		line = line.strip("\r\n")
		data.append(line.split(','))
	data.remove([])

	attributes = data[0]
	data.remove(attributes)
	#Run ID3
	tree = DecisionTree.makeTree(data, attributes, target_attribute, 0)
	#print "generated decision tree"

	data = [[]]
	testFile = open(testfilename)
	for line in testFile:
		line = line.strip("\r\n")
		data.append(line.split(','))
	data.remove([])
	#tree = str(tree)
	#tree = "%s\n" % str(tree)
	attributes = ['Open', 'High', 'Low', 'Close']
	prediction = []
	count = 0

	for entry in data:
		count += 1
		tempDict = tree.copy()
		result = ""
		while(isinstance(tempDict, dict)):
			root = Node.Node(tempDict.keys()[0], tempDict[tempDict.keys()[0]])
			tempDict = tempDict[tempDict.keys()[0]]
			index = attributes.index(root.value)
			value = entry[index]
			if(value in tempDict.keys()):
				child = Node.Node(value, tempDict[value])
				result = tempDict[value]
				tempDict = tempDict[value]
			else:
				result = recheck.some_func(value,trainfilename,testfilename)
				break

		prediction.append(result)

	total_predictions = len(prediction)
	predicted_2 = []
	i = 0
	while i < total_predictions:
		temp = float(prediction[i])
		predicted_2.append(temp)
		i = i+1

	#naive bayes algorithm
	trainingdataset = loadTrainCsv(trainfilename)
	testdataset = loadTestCsv(testfilename)
	summaries = summarizeByClass(trainingdataset)
	naive_predictions = getPredictions(summaries, testdataset)
	predicted_1=naive_predictions


	open_values = gettingOriginalOpenValues(originalValuefilename)
	original_close_values = gettingOriginalCloseValues(originalValuefilename)

	#print "Naive Predictions"+str(predicted_1)
	#print "ID3"+str(predicted_2)

	plt.title("Results for given dataset using ID3 & Naive Bayes Algorithm")
	plt.plot(open_values,predicted_1,'r.',markersize=np.sqrt(150.),label ='Naive Bayes Prediction')
	plt.plot(open_values,predicted_2,'g.',markersize=np.sqrt(150.),label ='ID3 Prediction')
	plt.plot(open_values,original_close_values,'b.',markersize=np.sqrt(100.),label = 'Orignial Values')
	plt.legend(loc='upper left')
	plt.xlabel("Open Values")
	plt.ylabel("Close Values")
	plt.grid()
	#plt.show()
	fig = plt.gcf()
	fig.set_size_inches(8, 4)
	ax=plt.subplot(111)
	# Shrink current axis's height by 10% on the bottom
	box = ax.get_position()
	ax.set_position([box.x0, box.y0 + box.height * 0.1,box.width, box.height * 0.9])

	# Put a legend below current axis
	ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.09),fancybox=True, shadow=True, ncol=5,fontsize="10")
	fig.savefig('test_result_id3_naivebayes.jpg', dpi=100)
	#showinfo("Naive Bayes Algorithm","Plotting Completed")'''


	x = Image.open("E:\\4.2\Final Year Project\Code\Complete Project\\test_result_id3_naivebayes.jpg")
	y = ImageTk.PhotoImage(x)

	label6 = Label(image=y)
	label6.image = y
	label6.place(x=50, y=290)

	result=accuracy_calculation(original_close_values,predicted_1,predicted_2)
	return result
Пример #49
0
__author__ = 'Aman'

import DataPrepare
import DataSpecific
import DecisionTree

'''
tree is the root of DecisionTree obtained by training the data in data file
'''
tree = DecisionTree.trainData()
'''
Updating the attribute list with values obtained from converting continuous variables.
'''
DataSpecific.attribute_list.update(tree.dictIntervalContVar)

'''
Convert continuous variables from the query/test data to discrete variables using the intervals used in decision tree.
'''
def cleanRow(row):
    row = DataPrepare.cleanRow(row)

    dictContVar = tree.dictIntervalContVar

    for entry in dictContVar.keys():
        listIntervals = dictContVar[entry]
        for  i in range(0,len(listIntervals)-1):
            if row[entry] == '?':
                break
            if  row[entry] > listIntervals[i] and row[entry] <= listIntervals[i+1]:
                row[entry] = i
                break
Пример #50
0
#!/bin/env python
# -*- coding: utf-8 -*-
import ThinFatData
import DecisionTree 


data = ThinFatData.createDataSet(num=100000,version=2)
DecisionTree.testFromDF(data)
class LearningModule:
    def __init__(self):
        self.mdp_list = []
        self.success_config = []
        self.decision_tree = None
        self.StateActionPairs= []
        # initialise
        self.mdp_list.append([])

    def initialiseAttributes(self):
        binary_values = ("true", "false")
        attributes = ["on(b0, table)", "on(b0, b1)", "on(b0,b2)",
        "on(b1, table)", "on(b1, b0)", "on(b1, b2)",
        "on(b2, table)", "on(b2, b0)", "on(b2,b1)",
        "has_shape(b0, prism)", "has_shape(b1, prism)", "has_shape(b2, prism)",
        "has_shape(b0, cube)", "has_shape(b1, cube)", "has_shape(b2, cube)",
        "has_shape(b0, cuboid)", "has_shape(b1, cuboid)", "has_shape(b2, cuboid)",
        "has_colour(b0, red)", "has_colour(b1, red)", "has_colour(b2, red)",
        "has_colour(b0, blue)", "has_colour(b1, blue)", "has_colour(b2, blue)",
        "has_colour(b0, green)", "has_colour(b1, green)", "has_colour(b2, green)",
        "has_size(b0, small)", "has_size(b1, small)", "has_size(b2, small)",
        "has_size(b0, medium)", "has_size(b1, medium)", "has_size(b2, medium)",
        "has_size(b0, large)", "has_size(b1, large)", "has_size(b2, large)",
        "move(b0, table)", "move(b0, b1)", "move(b0, b2)",
        "move(b1, table)", "move(b1, b0)", "move(b1, b2)",
        "move(b2, table)", "move(b2, b0)", "move(b2, b1)"]
        attribute_dict = []
        for attribute in attributes:
            attribute_dict.append((attribute,binary_values))
        attribute_dict = OrderedDict(attribute_dict)
        index = 0
        names = attribute_dict.keys()
        values = attribute_dict.values()
        attributes = []
        for name, vals in zip(names, values):
            attributes.append(Attribute(name, index, vals))
            index += 1
        return attributes

    def initialise_mdp(self, blocks):
        start_config = [-1,-1,-1]
        startingState = State(0, blocks, start_config)
        self.initialise_lists()
        self.success_config[-1].append(startingState)
        label = len(self.mdp_list[-1])
        mdp = MDP(label, blocks)
        mdp.statelist.append(startingState)
        mdp.initMDP(startingState)
        self.mdp_list[-1].append(mdp)

    def new_layer(self):
        try:
            self.mdp_list.append([])
            return True
        except:
            return False

    def initialise_lists(self):
        self.success_config.append([])

    def combineIdenticalMDPs(self, mdp_list):
        print "combining"
        sum_distance = [[0.0 for i in range(0,len(mdp_list[0].getStateList()))] for j in range(0,len(mdp_list[0].getStateList()))]
        weighted_average = [[0.0 for i in range(0,len(mdp_list[0].getStateList()))] for j in range(0,len(mdp_list[0].getStateList()))]
        for mdp in mdp_list:
            for i, row in enumerate(mdp.getDistanceMatrix()):
                for j, distance in enumerate(row):
                    if distance > 0:
                        sum_distance[i][j] += 1/distance
        for mdp in mdp_list:
            for i, row in enumerate(mdp.getDistanceMatrix()):
                for j, distance in enumerate(row):
                    if distance > 0.0 and sum_distance[i][j] > 0.0:
                        weight = (1/distance)/(sum_distance[i][j])
                        weighted_average[i][j] += weight*mdp.getQMatrix()[i][j]

        newMDP = deepcopy(mdp_list[0])
        newMDP.setQMatrix(weighted_average)
        return newMDP

    def findState(self, config, mdp):
        for state in mdp.getStateList():
            if state.getConfiguration() == config:
                return state

    def errorHandle(self, error_config, success_config, attributes):
        success_states = []
        for config  in success_config:
            success_states.append(self.findState(config, self.mdp_list[-1][-1]))
        error_state = self.findState(error_config, self.mdp_list[-1][-1])
        self.mdp_list[-1][-1].simulation(error_state, self.success_config[-1], attributes)

    def writeToList(self, mdp):
        blocks = mdp.getBlocks()
        for state in mdp.getStateList():
            for action in state.getActions():
                action_block = action.getActionableBlock()
                dest_block = action.getDestinationBlock()
                if dest_block == None:
                    example = (blocks[action_block].getShape(), blocks[action_block].getColour(), blocks[action_block].getSize(),
                               mdp.getQMatrix()[state.getLabel()][action.getNextStateAddr()])
                else:
                    example = (blocks[action_block].getShape(),
                               blocks[action_block].getColour(),blocks[action_block].getSize(), blocks[dest_block].getShape(),
                               blocks[dest_block].getColour(),blocks[dest_block].getSize(),
                               mdp.getQMatrix()[state.getLabel()][action.getNextStateAddr()])
                self.StateActionPairs.append(example)
        return

    def generateRules(self):
        reduced_mdp_list = []
        attributes = []
        self.mdp_list[-1] = [self.combineIdenticalMDPs(self.mdp_list[-1])]
        self.writeToList(self.mdp_list[-1][-1])
        training_set = self.StateActionPairs

        attr_shape = ("cube", "prism", "cuboid")
        attr_colour = ("red", "blue", "green")
        attr_size = ("small","medium","large")
        attribute_dict = [("has_shape(A,", attr_shape), ("has_colour(A,", attr_colour), ("has_size(A,", attr_size),
                            ("has_shape(D,", attr_shape), ("has_colour(D,",attr_colour), ("has_size(D,", attr_size)]
        attribute_dict = OrderedDict(attribute_dict)
        index = 0
        names = attribute_dict.keys()
        values = attribute_dict.values()
        for name, vals in zip(names, values):
            attributes.append(Attribute(name, index, vals))
            index += 1
        self.decision_tree = DecisionTree(attributes, training_set)
        rules = self.decision_tree.getRules()
        for rule in rules:
            print rule
        # rules = self.selectRules(rules)

    def selectRules(self, rules):
        """ Select the best rules """
        """ Think about doing it using SVM"""
        print rules
        for rule in rules:
            print rule
        print "\n"
        rules = sorted(rules, key=operator.itemgetter(-1))
        q_val = []
        for index, rule in enumerate(rules):
            q_val.append([index, rule[-1]])
        whitened = whiten(q_val)
        centroids,_ = kmeans(whitened, 3, thresh = 1,iter = 100)
        ids,_= vq(whitened, centroids)
        key = ids[-1]
        indices = []
        for index, keys in enumerate(ids):
            if key == keys:
                indices.append(index)
        valid_rules = []
        for index in indices:
            valid_rules.append(rules[index][0])
        return self.parseRules(valid_rules)

    def parseRules(self, rules):
        valid_rules = []
        for rule in rules:
            sentence = ""
            for segment in rule:
                sentence = sentence + segment + ", "
            sentence = sentence[:-2]
            valid_rules.append(sentence)
        return valid_rules

    def reduceMDP(self,errorconfig, stack_config, start_config, blocks):
        mdp_list = []
        attributes = self.initialiseAttributes()
        for i in range(0, len(errorconfig)):
            mdp_list.append(MDP(i, blocks))
            startingState = State(0, start_config)
            mdp_list[i].statelist.append(startingState)
            mdp_list[i].initMDP(startingState)
            errorstate = self.findState(errorconfig[i], mdp_list[i])
            stackstate = []
            for j in range(0,len(stack_config)):
                stackstate.append(self.findState(stack_config[j], mdp_list[i]))
            mdp_list[i].simulation(errorstate, stackstate, attributes)
            mdp_list[i].updateDistanceMatrix(errorstate)

        reduced_mdp = self.combineIdenticalMDPs(mdp_list)
        return reduced_mdp
Пример #52
0
  return parts

avg_error = []
tsets = parts(training_set, 10)
pg, prog = ProgressBar(), 1.0
for s_para in s_paras:
  op = prog/s_len
  ip = 1.0
  test_error = []
  for p in tsets:
    train = []
    for q in tsets:
      if p != q:
        train += q
    test = p
    dt = DecisionTree(train, stopping_parameter=s_para)
    test_error.append(dt.prediction_error(test)[1])
    pg.update(-(1-ip/10)/s_len+op, "Stopping Para %d" % s_para)
    ip += 1
  #print test_error
  avg_error.append((s_para, float(sum(test_error))/len(test_error)))
  prog += 1
  
top = sorted(avg_error, key = lambda (_,e) : e)[0]
print avg_error
print "Stopping Parameter and Error: %d, %f" % (top[0], top[1])

# Build decision tree with "best" parameter trained on original
print "Using these values to get training and test error"
dt = DecisionTree(training_set, stopping_parameter=top[0])
print dt.training_error()
def main():
    row1 = 0
    count = 0
    accuracy = 0
    orig_op = []
    print "Training Naive-Bayes ..."
    tic = time.clock()

    file1 = open('CreditTraining.csv')
    finattr = "class"
    base1 = [[]]
    baseT1 = [[]]
    basedata = [[]]
    basedata1 = [[]]
    basedata2 = [[]]
    for line in file1:
        line = line.strip("\r\n")
        basedata.append(line.split(','))
        
    basedata.remove([])
    #print " base main "+str(basedata)
    parameters = basedata[0]
    basedata.remove(parameters)
   
    #import pudb
    #pudb.set_trace() 
    #base1, A2 = DecisionTree.getContinuous(basedata, parameters, parameters[1])
    #base1, A3 = DecisionTree.getContinuous(base1, parameters, parameters[2])
    #base1, A8 = DecisionTree.getContinuous(base1, parameters, parameters[7])
    #base1, A11 = DecisionTree.getContinuous(base1, parameters, parameters[10])
    #base1, A14 = DecisionTree.getContinuous(base1, parameters, parameters[13])
    #base1, A15 = DecisionTree.getContinuous(base1, parameters, parameters[14])
    
    #print " base final " + str(base1)
    #print " A2 " + str(A2)
    #print " A3" + str(A3)
    #print " A8 " + str(A8)
    #print " A11 " + str(A11)
    #print " A14 " + str(A14)
    #Run ID3'''
    
    tree = DecisionTree.ID(basedata, parameters, finattr)
    #print "generated decision tree"+ str(tree)
    f = open('Credit.csv')
    for line in f:
        line = line.strip("\r\n")
        basedata1.append(line.split(','))
    basedata1.remove([])
    #import pudb
    #pudb.set_trace()
    '''baseT1= DecisionTree.getContinuousTest(basedata1, parameters, parameters[1],A2)
    #print "based="+str(basedata1)
    #print "baset="+str(baseT1)
    baseT1= DecisionTree.getContinuousTest(baseT1, parameters, parameters[2],A3)
    baseT1= DecisionTree.getContinuousTest(baseT1, parameters, parameters[7],A8)
    #import pudb
    #pudb.set_trace()
    baseT1= DecisionTree.getContinuousTest(baseT1, parameters, parameters[10],A11)
    baseT1= DecisionTree.getContinuousTest(baseT1, parameters, parameters[13],A14)
    baseT1= DecisionTree.getContinuousTest(baseT1, parameters, parameters[14],A15)'''
    #print " baseT1 " + str(baseT1)
    #import pudb
    #pudb.set_trace()
    for entry in basedata1:
        row1 += 1
        train_data = tree.copy()
        output = ""
        #import pudb
	#pudb.set_trace() 
        while(isinstance(train_data, dict)):
            root = Node.Node(train_data.keys()[0], train_data[train_data.keys()[0]])
            
            train_data = train_data[train_data.keys()[0]]
            index = parameters.index(root.X)
            value = entry[index]
            if(value in train_data.keys()):
                Node.Node(value, train_data[value])
                output = train_data[value]
                train_data = train_data[value]
            else:
                #print " value break at " + str(value)
                
                #print "can't process input %s" % count
                output = DecisionTree.freq_check(parameters, basedata, parameters[15])
                break
                
        orig_op.append(output)
        #print ("row%s = %s" % (row1, output))

    #print "written program"
    
    f1 = open('classcredit.csv')
    for line in f1:
        line = line.strip("\r\n")
        basedata2.append(line)
    basedata2.remove([])
    i = 0
    for ent in basedata2:
        #print "orig_op[i] " + str(orig_op[i]) + "ent = " + ent
        if (ent == orig_op[i]):
            count += 1
        i += 1
    accuracy = (int)(count/100)
    
    print "Calculated accuracy for the testing data = "+ str(count)