def parseHashPoint(point, numBuckets): """Create a LabeledPoint for this observation using hashing. Args: point (str): A comma separated string where the first value is the label and the rest are features. numBuckets: The number of buckets to hash to. Returns: LabeledPoint: A LabeledPoint with a label (0.0 or 1.0) and a SparseVector of hashed features. """ label = point.split(",")[0] unkeyed_features = point.split(",")[1:] index = 0 keyed_features = [] for feature in unkeyed_features: keyed_features.append((index, feature)) index += 1 features = hashFunction(numBuckets, keyed_features, True) features = SparseVector(numBuckets, sorted(features.keys()), features.values()) return LabeledPoint(label, features)
def f(champ): i = 0 newVects = [] while champ + i * (max(champions) + 1) < len(partialVect): newVect = SparseVector(len(partialVect), partialVect.indices, partialVect.values) newVect.indices = numpy.append(newVect.indices, [champ + i * (max(champions) + 1)]) newVect.values = numpy.append(newVect.values, [sign]) newVects.append(newVect) i += 1 return newVects