def Isotonic_Regression(filename, sc): filename = "/Users/Jacob/SparkService/data/sample_isotonic_regression_data.txt" data = sc.textFile(filename) # Create label, feature, weight tuples from input data with weight set to default value 1.0. parsedData = data.map(lambda line: tuple([float(x) for x in line.split(',')]) + (1.0,)) # Split data into training (60%) and test (40%) sets. training, test = parsedData.randomSplit([0.6, 0.4], 11) # Create isotonic regression model from training data. # Isotonic parameter defaults to true so it is only shown for demonstration model = IsotonicRegression.train(training) # Create tuples of predicted and real labels. predictionAndLabel = test.map(lambda p: (model.predict(p[1]), p[0])) # Calculate mean squared error between predicted and real labels. meanSquaredError = predictionAndLabel.map(lambda pl: math.pow((pl[0] - pl[1]), 2)).mean() print("Mean Squared Error = " + str(meanSquaredError)) # Save and load model #model.save(sc, "target/tmp/myIsotonicRegressionModel") #sameModel = IsotonicRegressionModel.load(sc, "target/tmp/myIsotonicRegressionModel")
def main(): records = get_records() records.cache() mappings = [get_mapping(records, i) for i in range(2, 10)] for m in mappings: print m cat_len = sum(map(len, mappings)) num_len = len(records.first()[11:15]) total_len = num_len + cat_len #data = records.map(lambda r: LabeledPoint(extract_label(r), extract_features(r, cat_len, mappings))) parsed_data = records.map(lambda r: (extract_label( r), extract_sum_feature(r, cat_len, mappings), 1.0)) model = IsotonicRegression.train(parsed_data) first = parsed_data.first() print first true_vs_predicted = parsed_data.map(lambda p: (p[1], model.predict(p[2]))) print "Isotonic Regression: " + str(true_vs_predicted.take(5)) calculate_print_metrics("Isotonic Regression", true_vs_predicted)
sc = SparkContext(appName="PythonIsotonicRegressionExample") # $example on$ # Load and parse the data def parsePoint(labeledData): return (labeledData.label, labeledData.features[0], 1.0) data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_isotonic_regression_libsvm_data.txt") # Create label, feature, weight tuples from input data with weight set to default value 1.0. parsedData = data.map(parsePoint) # Split data into training (60%) and test (40%) sets. training, test = parsedData.randomSplit([0.6, 0.4], 11) # Create isotonic regression model from training data. # Isotonic parameter defaults to true so it is only shown for demonstration model = IsotonicRegression.train(training) # Create tuples of predicted and real labels. predictionAndLabel = test.map(lambda p: (model.predict(p[1]), p[0])) # Calculate mean squared error between predicted and real labels. meanSquaredError = predictionAndLabel.map(lambda pl: math.pow((pl[0] - pl[1]), 2)).mean() print("Mean Squared Error = " + str(meanSquaredError)) # Save and load model model.save(sc, "target/tmp/myIsotonicRegressionModel") sameModel = IsotonicRegressionModel.load(sc, "target/tmp/myIsotonicRegressionModel") # $example off$
# Load and parse the data def parsePoint(labeledData): return (labeledData.label, labeledData.features[0], 1.0) data = MLUtils.loadLibSVMFile( sc, "data/mllib/sample_isotonic_regression_libsvm_data.txt") # Create label, feature, weight tuples from input data with weight set to default value 1.0. parsedData = data.map(parsePoint) # Split data into training (60%) and test (40%) sets. training, test = parsedData.randomSplit([0.6, 0.4], 11) # Create isotonic regression model from training data. # Isotonic parameter defaults to true so it is only shown for demonstration model = IsotonicRegression.train(training) # Create tuples of predicted and real labels. predictionAndLabel = test.map(lambda p: (model.predict(p[1]), p[0])) # Calculate mean squared error between predicted and real labels. meanSquaredError = predictionAndLabel.map(lambda pl: math.pow( (pl[0] - pl[1]), 2)).mean() print("Mean Squared Error = " + str(meanSquaredError)) # Save and load model model.save(sc, "target/tmp/myIsotonicRegressionModel") sameModel = IsotonicRegressionModel.load( sc, "target/tmp/myIsotonicRegressionModel") # $example off$