示例#1
0
        # print key,':',numpy.mean(a),numpy.std(a),len(vals)
    return summary_vals


print "Loading data"
dt = DataTable(FILENAME)
dt.split(0.10, True)
print "Table size:",
dt.printInfo()

print "Transforming"
dt.apply(getSize, "name", "size", False)
dt.apply(getBrand, "name", "brand", False)
dt.shuffle()

dt_train = dt.copy()
dt_train.split(0.75, True)
print "Training Table size:",
dt_train.printInfo()

dt_test = dt.copy()
dt_test.split(0.75, False)
print "Test Table size:",
dt_test.printInfo()

summary = dt_train.summarize(category_cols + ["size", "brand"], "price")

labels = dt_train.getCol("price")
features = dt_train.getData(category_cols + ["size", "brand"])

print "Training"