def parseHashPoint(point, numBuckets):
    """Create a LabeledPoint for this observation using hashing.

    Args:
        point (str): A comma separated string where the first value is the label and the rest are
            features.
        numBuckets: The number of buckets to hash to.

    Returns:
        LabeledPoint: A LabeledPoint with a label (0.0 or 1.0) and a SparseVector of hashed
            features.
    """
    label = point.split(",")[0]

    unkeyed_features = point.split(",")[1:]
    
    index = 0
    keyed_features = []
    for feature in unkeyed_features:
      keyed_features.append((index, feature))
      index += 1
    
    features = hashFunction(numBuckets, keyed_features, True)
    features = SparseVector(numBuckets, sorted(features.keys()), features.values())
    
    return LabeledPoint(label, features)
示例#2
0
	def f(champ):
		i = 0
		newVects = []

		while champ + i * (max(champions) + 1) < len(partialVect):
			newVect = SparseVector(len(partialVect), partialVect.indices, partialVect.values)
			newVect.indices = numpy.append(newVect.indices, [champ + i * (max(champions) + 1)])
			newVect.values = numpy.append(newVect.values, [sign])
			newVects.append(newVect)
			i += 1

		return newVects