def testTree():
    best = ('A', 5)
    data = {'A': [1,2,6,7,8,9,3,4,5], 'C': [1,0,1,0,1,0,1,0,1], 'B': [1,1,0,0,0,0,1,1,1]}
    df = pd.DataFrame(data)
    print tree.find_best_label_new(df, 'A', 'B')
    print 'best feature and label'
    print tree.find_best_feature_and_label_for_split(df, 'B', regression=True)
def branch_node(node, df, threshold, Y, regression=False):
    """
    :param node: Node object defined in Stats
    :param df: The dataframe being used by the tree
    :param threshold: max branching depth
    :param Y: Feature to predict
    :return: void
    """
    print 'Branching Level : ' + str(node.level)
    data = node.get_node_data(df)
    print 'Length of data ' + str(len(data)) + ' len df: ' + str(len(df))
    feature, label = mytree.find_best_feature_and_label_for_split(data, Y, regression)
    print 'feature: {} label: {}'.format(feature, label)
    if feature is not None and node.level < threshold:
        A_array, B_array = node.split(feature, df[feature], label)
        print ' A : {} B: {}'.format(sum(A_array), sum(B_array))
        node.add_left(A_array)
        node.add_right(B_array)
        branch_node(node.left, df, threshold, Y, regression)
        branch_node(node.right, df, threshold, Y, regression)
    else:
        if not regression:
            predict = 0
            prob = mystats.binary_probability(data, Y)
            print 'PROBABILITY ' + str(prob)
            if prob >= .5:
                predict = 1
            error = mystats.binary_error(data, Y, predict)
        else:
            print str(feature) +'is fueaturea ' + str(label) + str(node.presence)
            predict = float(sum(data[Y]))/len(data[Y])
            error = mystats.compute_MSE(predict, list(data[Y]))
        node.leaf(predict, error)