def main(col_names=None):
    # parse command-line arguments to read the name of the input csv file
    # and optional 'draw tree' parameter
    if len(sys.argv) < 2:  # input file name should be specified
        print ("Please specify input csv file name")
        return

    csv_file_name = sys.argv[1]

    data = []
    with open(csv_file_name) as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        for row in readCSV:
            list = []
            for attribute in row:
                try:
                    list += [float(attribute)]
                except:
                    list += [attribute]
            data.append(list)

    print("Total number of records = ",len(data))
    tree = regression_tree.buildtree(data, min_gain = 0.005, min_samples = 5)

    regression_tree.printtree(tree, '', col_names)

    max_tree_depth = dtree_build.max_depth(tree)
    print("max number of questions=" + str(max_tree_depth))

    if len(sys.argv) > 2: # draw option specified
        import regression_draw
        regression_draw.drawtree(tree, jpeg=csv_file_name+'.jpg')

    if len(sys.argv) > 3:  # create json file for d3.js visualization
        import json
        import dtree_to_json
        json_tree = dtree_to_json.dtree_to_jsontree(tree, col_names)
        print(json_tree)

        # create json data for d3.js interactive visualization
        with open(csv_file_name + ".json", "w") as write_file:
            json.dump(json_tree, write_file)

    print("course ['teaching', 'not minority', 'female', 'english', 50, 30, 'lower', 7, 4] is: ", dtree_build.classify(['teaching', 'not minority', 'female', 'english', 50, 30, 'lower', 7, 4], tree))
    print("course ['teaching', 'not minority', 'male', 'english', 40, 30, 'lower', 6, 4] is: ", dtree_build.classify(['teaching', 'not minority', 'male', 'english', 40, 30, 'lower', 6, 4], tree))
    print("course ['teaching', 'not minority', 'male', 'english', 70, 30, 'lower', 4, 4] is: ", dtree_build.classify(['teaching', 'not minority', 'male', 'english', 70, 30, 'lower', 4, 4], tree))
Exemplo n.º 2
0
def main(col_names=None):
    # parse command-line arguments to read the name of the input csv file
    # and optional 'draw tree' parameter
    if len(sys.argv) < 2:  # input file name should be specified
        print ("Please specify input csv file name")
        return

    csv_file_name = sys.argv[1]

    data = []
    with open(csv_file_name) as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        for row in readCSV:
            data.append(list(row))

    print("Total number of records = ",len(data))
    tree = dtree_build.buildtree(data, min_gain =0.01, min_samples = 5)

    dtree_build.printtree(tree, '', col_names)

    max_tree_depth = dtree_build.max_depth(tree)
    print("max number of questions=" + str(max_tree_depth))

    if len(sys.argv) > 2: # draw option specified
        pass
        # import dtree_draw
        # dtree_draw.drawtree(tree, jpeg=csv_file_name+'.jpg')

    if len(sys.argv) > 3:  # create json file for d3.js visualization
        import json
        import dtree_to_json
        json_tree = dtree_to_json.dtree_to_jsontree(tree, col_names)
        print(json_tree)

        # create json data for d3.js interactive visualization
        with open(csv_file_name + ".json", "w") as write_file:
            json.dump(json_tree, write_file)
import dtree_build
import sys

if __name__ == "__main__":
    # fruits with their size and color
    fruits = [[4, 'red', 'apple'], [4, 'green', 'apple'], [1, 'red', 'cherry'],
              [1, 'green', 'grape'], [5, 'red', 'apple']]

    tree = dtree_build.buildtree(fruits)
    dtree_build.printtree(tree, '', ["size", "color"])
    print("fruit [2, 'red'] is: ", dtree_build.classify([2, 'red'], tree))
    print("fruit [4.5, 'red'] is: ", dtree_build.classify([4.5, 'red'], tree))
    print("fruit [1.4, 'green'] is: ",
          dtree_build.classify([1.4, 'green'], tree))

    max_tree_depth = dtree_build.max_depth(tree)
    print("max number of questions=" + str(max_tree_depth))
    if len(sys.argv) > 1:  # draw option specified
        import dtree_draw
        dtree_draw.drawtree(tree, jpeg='fruits_dt.jpg')