示例#1
0
        key = "|".join([data[c][i] for c in category_cols])
        summary_vals[key].append(data["price"][i])
    for key, vals in summary_vals.iteritems():
        a = numpy.array(vals)
        # print key,':',numpy.mean(a),numpy.std(a),len(vals)
    return summary_vals


print "Loading data"
dt = DataTable(FILENAME)
dt.split(0.10, True)
print "Table size:",
dt.printInfo()

print "Transforming"
dt.apply(getSize, "name", "size", False)
dt.apply(getBrand, "name", "brand", False)
dt.shuffle()

dt_train = dt.copy()
dt_train.split(0.75, True)
print "Training Table size:",
dt_train.printInfo()

dt_test = dt.copy()
dt_test.split(0.75, False)
print "Test Table size:",
dt_test.printInfo()

summary = dt_train.summarize(category_cols + ["size", "brand"], "price")