def testTennis(self): """ Test entire program on the tennis data set """ tennis = mu.extract_data('tennis.csv') tennis = mu.enhance_data(tennis) dt = decisionTree.DecisionTree(tennis['feature_dict'], tennis['feature_names']) dt.fit(tennis['data'],tennis['target']) for x,y in zip(tennis['data'],tennis['target']): self.assertEquals(dt.predict([x]), [y]) self.assertEquals(dt.predict(tennis['data']), tennis['target'])
if __name__ == '__main__': #parse the command line arguments parser = argparse.ArgumentParser() parser.add_argument("train_file", help="Name of file with training data", type=str) parser.add_argument("-k", help="number of folds", type=int, default=5) parser.add_argument( "--ibm", help="Flag to indicate that input is IBM data, else plain CSV", action="store_true") parser.add_argument("--y_col", help="name of column containing target", type=str) args = parser.parse_args() #for you to add is logic for handling the --y_col flag if given (for tennis, for example) if args.ibm: data = joblib.load(args.train_file) else: if not args.y_col: data = mlUtil.extract_data(args.train_file) else: data = mlUtil.extract_data(fileName=args.train_file, targetInfo=args.y_col) data = mlUtil.enhance_data(data) print k_fold_eval(data, args.k)
#calculate the average for each value v_precision = sum(v_precisions)/k v_recall = sum(v_recalls)/k v_accuracy = sum(v_accuracys)/k result_dict["test_precision"] = v_precision result_dict["test_recall"] = v_recall result_dict["test_accuracy"] = v_accuracy t_precision = sum(t_precisions)/k t_recall = sum(t_recalls)/k t_accuracy = sum(t_accuracys)/k result_dict["train_precision"] = t_precision result_dict["train_recall"] = t_recall result_dict["train_accuracy"] = t_accuracy return result_dict if __name__ == '__main__': #data = joblib.load("tgmc_stripReal_subset.pkl") data = mlUtil.extract_data("nursery.csv") data = mlUtil.enhance_data(data) print k_fold_eval(data, 5)
print indent+"+-"+val+'-- <'+root.attribute+'>' print indent+"{" for k in root.children.keys(): printTree(root.children[k],k,indentNum+1) print indent+"}" if __name__ == '__main__': #parse the command line arguments parser = argparse.ArgumentParser() parser.add_argument("train_file", help="Name of file with training data", type=str) parser.add_argument("--y_col", help="name of column containing target", type=str) parser.add_argument("--ibm", help="Flag to indicate that input is IBM data, else plain CSV", action="store_true") args = parser.parse_args() #for you to add is logic for handling the --y_col flag if given (for tennis, for example) if args.ibm: data = joblib.load(args.train_file) else: data = mlUtil.extract_data(args.train_file) data = mlUtil.enhance_data(data) #will need some args in constructor tree = DecisionTree('***YOU ADD ARGUMENTS HERE***') tree.fit(data['data'], data['target']) #pritnTree(tree.clf) #test on training data tree.predict(data['data'])
def testSelAttRest(self): dt = decisionTree.DecisionTree() rest = mu.extract_data('restaurant.csv') attrib = dt.selectAttribute(rest['data'], rest['target']) self.assertEquals(attrib, 4)
def testSelAttTenn(self): dt = decisionTree.DecisionTree() tennis = mu.extract_data('tennis.csv') attrib = dt.selectAttribute(tennis['data'], tennis['target']) self.assertEquals(attrib, 0)
if __name__ == '__main__': #parse the command line arguments parser = argparse.ArgumentParser() parser.add_argument("train_file", help="Name of file with training data", type=str) parser.add_argument("--y_col", help="name of column containing target", type=str) parser.add_argument( "--ibm", help="Flag to indicate that input is IBM data, else plain CSV", action="store_true") args = parser.parse_args() #for you to add is logic for handling the --y_col flag if given (for tennis, for example) if args.ibm: data = joblib.load(args.train_file) else: data = mlUtil.extract_data(args.train_file) data = mlUtil.enhance_data(data) #will need some args in constructor tree = DecisionTree('***YOU ADD ARGUMENTS HERE***') tree.fit(data['data'], data['target']) #pritnTree(tree.clf) #test on training data tree.predict(data['data'])
def printTree(root, val='Tree', indentNum=0): """ For printing the decision tree in a nice format Usage: printTree(rootNode) """ indent = "\t" * indentNum if root.is_leaf(): print indent + "+-" + str(val) + '-- ' + root.value else: print indent + "+-" + str(val) + '-- <' + root.attribute + '>' print indent + "{" for k in root.children.keys(): printTree(root.children[k], k, indentNum + 1) print indent + "}" if __name__ == '__main__': #parse the command line arguments data = mlUtil.extract_data("lymphography.csv") data = mlUtil.enhance_data(data) tree = DecisionTree(attrib_d=data['feature_dict'], attribs=data['feature_names'], default_v="default") tree.fit(data['data'], data['target']) printTree(tree.clf) #test on training data print data['target'] print tree.predict(data['data'])