def Isotonic_Regression(filename, sc):
	filename = "/Users/Jacob/SparkService/data/sample_isotonic_regression_data.txt"

	data = sc.textFile(filename)

	# Create label, feature, weight tuples from input data with weight set to default value 1.0.
	parsedData = data.map(lambda line: tuple([float(x) for x in line.split(',')]) + (1.0,))

	# Split data into training (60%) and test (40%) sets.
	training, test = parsedData.randomSplit([0.6, 0.4], 11)

	# Create isotonic regression model from training data.
	# Isotonic parameter defaults to true so it is only shown for demonstration
	model = IsotonicRegression.train(training)

	# Create tuples of predicted and real labels.
	predictionAndLabel = test.map(lambda p: (model.predict(p[1]), p[0]))

	# Calculate mean squared error between predicted and real labels.
	meanSquaredError = predictionAndLabel.map(lambda pl: math.pow((pl[0] - pl[1]), 2)).mean()
	print("Mean Squared Error = " + str(meanSquaredError))

	# Save and load model
	#model.save(sc, "target/tmp/myIsotonicRegressionModel")
	#sameModel = IsotonicRegressionModel.load(sc, "target/tmp/myIsotonicRegressionModel")
示例#2
0
def main():
    records = get_records()
    records.cache()

    mappings = [get_mapping(records, i) for i in range(2, 10)]
    for m in mappings:
        print m
    cat_len = sum(map(len, mappings))
    num_len = len(records.first()[11:15])
    total_len = num_len + cat_len
    #data = records.map(lambda r: LabeledPoint(extract_label(r), extract_features(r, cat_len, mappings)))

    parsed_data = records.map(lambda r: (extract_label(
        r), extract_sum_feature(r, cat_len, mappings), 1.0))
    model = IsotonicRegression.train(parsed_data)

    first = parsed_data.first()
    print first

    true_vs_predicted = parsed_data.map(lambda p: (p[1], model.predict(p[2])))
    print "Isotonic Regression: " + str(true_vs_predicted.take(5))

    calculate_print_metrics("Isotonic Regression", true_vs_predicted)
    sc = SparkContext(appName="PythonIsotonicRegressionExample")

    # $example on$
    # Load and parse the data
    def parsePoint(labeledData):
        return (labeledData.label, labeledData.features[0], 1.0)

    data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_isotonic_regression_libsvm_data.txt")

    # Create label, feature, weight tuples from input data with weight set to default value 1.0.
    parsedData = data.map(parsePoint)

    # Split data into training (60%) and test (40%) sets.
    training, test = parsedData.randomSplit([0.6, 0.4], 11)

    # Create isotonic regression model from training data.
    # Isotonic parameter defaults to true so it is only shown for demonstration
    model = IsotonicRegression.train(training)

    # Create tuples of predicted and real labels.
    predictionAndLabel = test.map(lambda p: (model.predict(p[1]), p[0]))

    # Calculate mean squared error between predicted and real labels.
    meanSquaredError = predictionAndLabel.map(lambda pl: math.pow((pl[0] - pl[1]), 2)).mean()
    print("Mean Squared Error = " + str(meanSquaredError))

    # Save and load model
    model.save(sc, "target/tmp/myIsotonicRegressionModel")
    sameModel = IsotonicRegressionModel.load(sc, "target/tmp/myIsotonicRegressionModel")
    # $example off$
    # Load and parse the data
    def parsePoint(labeledData):
        return (labeledData.label, labeledData.features[0], 1.0)

    data = MLUtils.loadLibSVMFile(
        sc, "data/mllib/sample_isotonic_regression_libsvm_data.txt")

    # Create label, feature, weight tuples from input data with weight set to default value 1.0.
    parsedData = data.map(parsePoint)

    # Split data into training (60%) and test (40%) sets.
    training, test = parsedData.randomSplit([0.6, 0.4], 11)

    # Create isotonic regression model from training data.
    # Isotonic parameter defaults to true so it is only shown for demonstration
    model = IsotonicRegression.train(training)

    # Create tuples of predicted and real labels.
    predictionAndLabel = test.map(lambda p: (model.predict(p[1]), p[0]))

    # Calculate mean squared error between predicted and real labels.
    meanSquaredError = predictionAndLabel.map(lambda pl: math.pow(
        (pl[0] - pl[1]), 2)).mean()
    print("Mean Squared Error = " + str(meanSquaredError))

    # Save and load model
    model.save(sc, "target/tmp/myIsotonicRegressionModel")
    sameModel = IsotonicRegressionModel.load(
        sc, "target/tmp/myIsotonicRegressionModel")
    # $example off$