def test_set(train_X: DataFrame, train_y: Series, test_X: DataFrame, test_y: Series, print_model: bool): tree = DecisionTree() tree.train(train_X, train_y, list(train_X)) if print_model: print("\n### Model:") print(tree) tree.prune(train_X, train_y) if print_model: print("\n### Pruned Model:") print(tree) print("\n### Test:") tree.test(test_X, test_y, display=True)
def do_work(csv_file, cancer_value, save_file): data, headers = load_data(csv_file) X, y, X_headers = split_class(data, headers) y_copy = [(x) for x in y] print("converting cancer values") convert_values(lambda x: x > cancer_value, y) dt = DecisionTree() print("creating decision tree") dt.train(X, y, X_headers) print("saving decision tree") with open(save_file, "w") as modelfile: dt.save(modelfile) modelfile.close() print("done") return dt, data, headers
balancedata.columns = cols y = balancedata["class"] X = balancedata[balancedata.columns[1:]] trainlen = int(len(X) * 0.8) train_X = X[:trainlen] test_X = X[trainlen:] train_y = y[:trainlen] test_y = y[trainlen:] print("A little taste of the training data.") print(train_X[:10]) print(train_y[:10]) # Train the model using the basic features of DecisionTree dt = DecisionTree() dt.train(X, y, cols[1:]) #print(x) print("The model looks like:") print(dt) print("Testing it out.") dt.test(test_X, test_y, display=True) # Demonstrate saving and loading the model. with open("whatever.model", "wb") as modelfile: dt.save(modelfile) with open("whatever.model", "rb") as modelfile: dt2 = DecisionTree(load_from=modelfile) print(dt2)
df = pd.read_csv(StringIO("""cheese sauce spicy vegetables like mozza hllnds yes no no gouda tomato no no yes mozza tomato yes no yes jarls bbq no no no mozza bbq yes yes no gouda tomato yes yes yes jarls hllnds yes yes yes mozza tomato no yes yes mozza bbq yes no maybe"""), sep=" ") X = df.drop('like', axis=1) y = df['like'] cols = X.columns.values dt = DecisionTree() dt.train(X, y, cols) print(dt.model) pred = pd.read_csv(StringIO("""cheese sauce spicy vegetables mozza hllnds yes no jarls hllnds yes no mozzla tomato no yes jarls tomato no no jarls bbq no maybe"""), sep=" ") #print(pred) print(pd.concat([pred, dt.predict(pred)], axis=1)) dt.test(pred, np.array(["no", "no", "no", "maybe", "yes"]), display=True)
all_rows = all_rows + idx return [x for x in set(all_rows)] data, headers = load_data("test_data.csv") data2 = data.drop( find_invalid_rows(data)).reset_index().drop(columns=['index']) print("converting chemicals") conv(to_lmh, 'Toxic_Chem', data2, headers) print("converting lung cancer") conv(lambda x: x > 60, 'Lung_Cancer', data2, headers) print("converting population density") conv(to_lmh2, 'Population_Density', data2, headers) print(data2) print(len(data2)) X, y, X_headers = split_class(data2, headers) trainlen = int(len(X) * 0.8) train_X = X[:trainlen] train_y = y[:trainlen] test_X = X[trainlen:] test_y = y[trainlen:] print("training 80% of the data") dt = DecisionTree() dt.train(train_X, train_y, X_headers) print("Primary split attribute: %s" % (dt.root_node.split_name))