示例#1
0
文件: test_ml.py 项目: datascibox/DDF
    def testKmeans(self):
        model = ml.kmeans(self.mtcars, 2, 5, 10)
        self.assertIsInstance(model, ml.KMeansModel)
        self.assertIsInstance(model.centers, pd.DataFrame)
        self.assertEqual(len(model.centers), 2)
        self.assertItemsEqual(model.centers.columns.tolist(), self.mtcars.colnames)

        self.assertIsInstance(model.predict(range(0, self.mtcars.ncol)), float)
        with self.assertRaises(Py4JJavaError):
            model.predict([0, 1, 2])
示例#2
0
    def testKmeans(self):
        model = ml.kmeans(self.mtcars, 2, 5, 10)
        self.assertIsInstance(model, ml.KMeansModel)
        self.assertIsInstance(model.centers, pd.DataFrame)
        self.assertEqual(len(model.centers), 2)
        self.assertItemsEqual(model.centers.columns.tolist(),
                              self.mtcars.colnames)

        self.assertIsInstance(model.predict(range(0, self.mtcars.ncol)), float)
        with self.assertRaises(Py4JJavaError):
            model.predict([0, 1, 2])
示例#3
0
print('Columns: ' + ', '.join(ddf.colnames))

print('Number of columns: {}'.format(ddf.cols))
print('Number of rows: {}'.format(ddf.rows))

print(ddf.summary())

print(ddf.head(2))

print(ddf.aggregate(['sum(mpg)', 'min(hp)'], ['vs', 'am']))

print(ddf.five_nums())

print(ddf.sample(3))

# Kmeans
km = ml.kmeans(ddf)
clu = km.predict(range(0, ddf.ncol))
print clu

# Linear Regression
lr = ml.linear_regression_gd(ddf)
lr.summary()

# Logistic Regression
lr = ml.logistic_regression_gd(ddf)
lr.summary()

dm.shutdown()