def testKmeans(self): model = ml.kmeans(self.mtcars, 2, 5, 10) self.assertIsInstance(model, ml.KMeansModel) self.assertIsInstance(model.centers, pd.DataFrame) self.assertEqual(len(model.centers), 2) self.assertItemsEqual(model.centers.columns.tolist(), self.mtcars.colnames) self.assertIsInstance(model.predict(range(0, self.mtcars.ncol)), float) with self.assertRaises(Py4JJavaError): model.predict([0, 1, 2])
print('Columns: ' + ', '.join(ddf.colnames)) print('Number of columns: {}'.format(ddf.cols)) print('Number of rows: {}'.format(ddf.rows)) print(ddf.summary()) print(ddf.head(2)) print(ddf.aggregate(['sum(mpg)', 'min(hp)'], ['vs', 'am'])) print(ddf.five_nums()) print(ddf.sample(3)) # Kmeans km = ml.kmeans(ddf) clu = km.predict(range(0, ddf.ncol)) print clu # Linear Regression lr = ml.linear_regression_gd(ddf) lr.summary() # Logistic Regression lr = ml.logistic_regression_gd(ddf) lr.summary() dm.shutdown()