for key, vals in summary_vals.iteritems(): a = numpy.array(vals) # print key,':',numpy.mean(a),numpy.std(a),len(vals) return summary_vals print "Loading data" dt = DataTable(FILENAME) dt.split(0.10, True) print "Table size:", dt.printInfo() print "Transforming" dt.apply(getSize, "name", "size", False) dt.apply(getBrand, "name", "brand", False) dt.shuffle() dt_train = dt.copy() dt_train.split(0.75, True) print "Training Table size:", dt_train.printInfo() dt_test = dt.copy() dt_test.split(0.75, False) print "Test Table size:", dt_test.printInfo() summary = dt_train.summarize(category_cols + ["size", "brand"], "price") labels = dt_train.getCol("price") features = dt_train.getData(category_cols + ["size", "brand"])