示例#1
0
def test_set(train_X: DataFrame, train_y: Series, test_X: DataFrame, test_y: Series, print_model: bool):
    tree = DecisionTree()
    tree.train(train_X, train_y, list(train_X))

    if print_model:
        print("\n### Model:")
        print(tree)

    tree.prune(train_X, train_y)

    if print_model:
        print("\n### Pruned Model:")
        print(tree)

    print("\n### Test:")
    tree.test(test_X, test_y, display=True)
示例#2
0
def do_work(csv_file, cancer_value, save_file):
    data, headers = load_data(csv_file)
    X, y, X_headers = split_class(data, headers)
    y_copy = [(x) for x in y]
    print("converting cancer values")
    convert_values(lambda x: x > cancer_value, y)
    dt = DecisionTree()
    print("creating decision tree")
    dt.train(X, y, X_headers)
    print("saving decision tree")
    with open(save_file, "w") as modelfile:
        dt.save(modelfile)
    modelfile.close()
    print("done")
    return dt, data, headers
示例#3
0
balancedata.columns = cols
y = balancedata["class"]
X = balancedata[balancedata.columns[1:]]

trainlen = int(len(X) * 0.8)
train_X = X[:trainlen]
test_X = X[trainlen:]
train_y = y[:trainlen]
test_y = y[trainlen:]

print("A little taste of the training data.")
print(train_X[:10])
print(train_y[:10])

# Train the model using the basic features of DecisionTree
dt = DecisionTree()
dt.train(X, y, cols[1:])
#print(x)
print("The model looks like:")
print(dt)
print("Testing it out.")

dt.test(test_X, test_y, display=True)

# Demonstrate saving and loading the model.
with open("whatever.model", "wb") as modelfile:
    dt.save(modelfile)
with open("whatever.model", "rb") as modelfile:
    dt2 = DecisionTree(load_from=modelfile)
    print(dt2)
示例#4
0
df = pd.read_csv(StringIO("""cheese sauce spicy vegetables like
mozza hllnds yes no no
gouda tomato no no yes
mozza tomato yes no yes
jarls bbq no no no
mozza bbq yes yes no
gouda tomato yes yes yes
jarls hllnds yes yes yes
mozza tomato no yes yes
mozza bbq yes no maybe"""),
                 sep=" ")

X = df.drop('like', axis=1)
y = df['like']
cols = X.columns.values
dt = DecisionTree()
dt.train(X, y, cols)
print(dt.model)

pred = pd.read_csv(StringIO("""cheese sauce spicy vegetables
mozza hllnds yes no
jarls hllnds yes no
mozzla tomato no yes
jarls tomato no no
jarls bbq no maybe"""),
                   sep=" ")

#print(pred)
print(pd.concat([pred, dt.predict(pred)], axis=1))
dt.test(pred, np.array(["no", "no", "no", "maybe", "yes"]), display=True)
示例#5
0
            all_rows = all_rows + idx
    return [x for x in set(all_rows)]


data, headers = load_data("test_data.csv")
data2 = data.drop(
    find_invalid_rows(data)).reset_index().drop(columns=['index'])

print("converting chemicals")
conv(to_lmh, 'Toxic_Chem', data2, headers)
print("converting lung cancer")
conv(lambda x: x > 60, 'Lung_Cancer', data2, headers)
print("converting population density")
conv(to_lmh2, 'Population_Density', data2, headers)

print(data2)
print(len(data2))

X, y, X_headers = split_class(data2, headers)

trainlen = int(len(X) * 0.8)
train_X = X[:trainlen]
train_y = y[:trainlen]
test_X = X[trainlen:]
test_y = y[trainlen:]

print("training 80% of the data")
dt = DecisionTree()
dt.train(train_X, train_y, X_headers)
print("Primary split attribute: %s" % (dt.root_node.split_name))