示例#1
0
 def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||"):
     """Train a k-means clustering model."""
     # cache serialized data to avoid objects over head in JVM
     jcached = _to_java_object_rdd(rdd.map(_convert_to_vector), cache=True)
     model = callMLlibFunc("trainKMeansModel", jcached, k, maxIterations, runs,
                           initializationMode)
     centers = callJavaFunc(rdd.context, model.clusterCenters)
     return KMeansModel([c.toArray() for c in centers])
示例#2
0
def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
    first = data.first()
    if not isinstance(first, LabeledPoint):
        raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first)
    initial_weights = initial_weights or [0.0] * len(data.first().features)
    weights, intercept = train_func(_to_java_object_rdd(data, cache=True),
                                    _convert_to_vector(initial_weights))
    return modelClass(weights, intercept)
示例#3
0
 def _prepare(cls, ratings):
     assert isinstance(ratings, RDD), "ratings should be RDD"
     first = ratings.first()
     if not isinstance(first, Rating):
         if isinstance(first, (tuple, list)):
             ratings = ratings.map(lambda x: Rating(*x))
         else:
             raise ValueError("rating should be RDD of Rating or tuple/list")
     return _to_java_object_rdd(ratings, True)
示例#4
0
 def _prepare(cls, ratings):
     assert isinstance(ratings, RDD), "ratings should be RDD"
     first = ratings.first()
     if not isinstance(first, Rating):
         if isinstance(first, (tuple, list)):
             ratings = ratings.map(lambda x: Rating(*x))
         else:
             raise ValueError(
                 "rating should be RDD of Rating or tuple/list")
     return _to_java_object_rdd(ratings, True)
示例#5
0
 def test_to_java_object_rdd(self):  # SPARK-6660
     data = RandomRDDs.uniformRDD(self.sc, 10, 5, seed=0L)
     self.assertEqual(_to_java_object_rdd(data).count(), 10)
示例#6
0
文件: tests.py 项目: HodaAlemi/spark
 def test_to_java_object_rdd(self):  # SPARK-6660
     data = RandomRDDs.uniformRDD(self.sc, 10, 5, seed=0)
     self.assertEqual(_to_java_object_rdd(data).count(), 10)
示例#7
0
def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
    initial_weights = initial_weights or [0.0] * len(data.first().features)
    weights, intercept = train_func(_to_java_object_rdd(data, cache=True),
                                    _convert_to_vector(initial_weights))
    return modelClass(weights, intercept)
示例#8
0
文件: regression.py 项目: BViki/spark
def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
    initial_weights = initial_weights or [0.0] * len(data.first().features)
    weights, intercept = train_func(_to_java_object_rdd(data, cache=True),
                                    _convert_to_vector(initial_weights))
    return modelClass(weights, intercept)