def isotonic_regression(trainingDataFrame): iso = IsotonicRegression() isoModel = iso.fit(trainingDataFrame) result = {} result["model"] = isoModel result["boundaries"] = isoModel.boundaries result["predictions"] = isoModel.predictions return result
def model_define(self): """Returns a model with the hyperparameters inputted in :func: `get_parameters` Returns: (pyspark.ml.regression.DecisionTreeRegressor): Decision Tree Regression model """ return IsotonicRegression()
def isotonicRegression(df, conf): """ Isotonic Regression training Input : - Dataframe of training (df) output : - Isotonic regression model (model) """ feature_col = conf["params"].get("featuresCol", "features") label_col = conf["params"].get("labelCol", "label") pred_col = conf["params"].get("predictionCol", "prediction") isoton = conf["params"].get("isotonic",True) feature_index = conf["params"].get("featureIndex",0) ir = IsotonicRegression(featuresCol=feature_col,labelCol=label_col, predictionCol=pred_col, isotonic=isoton, featureIndex=feature_index) model = ir.fit(df) return model
def calibrate(self, df_to_calibrate): # Make initial prediction on calibration data set self.predict(df_to_calibrate, 'calibration', False, False, False) print('[ {0} ] : Calibrating model'.format(datetime.utcnow())) # Convert initial probability to input feature pred_df_cal = VectorAssembler(inputCols=['prob_deact'], outputCol='prob_feature', handleInvalid='keep') \ .transform(self.pred_df) # Fit calibration function on results ir = IsotonicRegression( labelCol='deactivated', predictionCol='prob_deact_cal', featuresCol='prob_feature' ) ir_model = ir.fit(pred_df_cal) self.ir_model = ir_model
def Isoton_Regression(df, conf): """input : df [spark.dataframe], config[configuration (Params and use cross validator/not) output : Isotonic Regression Model""" # isoton = conf["params"].get("isoton") # feature_index = conf["params"].get("featureIndex") ir = IsotonicRegression() if conf["crossval"].get("crossval") == True: grid = ParamGridBuilder().build() evaluator = RegressionEvaluator(metricName="r2") cv = CrossValidator(estimator=ir, estimatorParamMaps=grid, evaluator=evaluator, parallelism=2) irmodel = cv.fit(training) if conf["crossval"].get("crossval") == False: irmodel = ir.fit(training) return irmodel
def isotonicRegression(df, label, features, adjust): """ This function returns the rmse and the predictions form the applied isotonic regression model on the dataframe with the speficied feature columns """ ## Columns with non numerical values are adjusted for col in adjust: indexer=StringIndexer(inputCol=col,outputCol="{}_num".format(col)) features.append("{}_num".format(col)) df=indexer.fit(df).transform(df) ## Features vector configured from dataframe for model processing assembler = VectorAssembler(inputCols=features, outputCol="features") assembled = assembler.transform(df) ir = IsotonicRegression(featuresCol ='features', labelCol=label) irModel=ir.fit(assembled) predictions = irModel.transform(assembled) ## Evaluator required for rmse estimation evaluator = RegressionEvaluator(labelCol=label, metricName="rmse") rmse = evaluator.evaluate(predictions) result = { "RMSE": rmse, "predictions": [r["prediction"] for r in predictions.select("prediction").collect()] } return result
# $example off$ from pyspark.sql import SparkSession """ An example demonstrating isotonic regression. Run with: bin/spark-submit examples/src/main/python/ml/isotonic_regression_example.py """ if __name__ == "__main__": spark = SparkSession\ .builder\ .appName("IsotonicRegressionExample")\ .getOrCreate() # $example on$ # Loads data. dataset = spark.read.format("libsvm")\ .load("data/mllib/sample_isotonic_regression_libsvm_data.txt") # Trains an isotonic regression model. model = IsotonicRegression().fit(dataset) print("Boundaries in increasing order: " + str(model.boundaries)) print("Predictions associated with the boundaries: " + str(model.predictions)) # Makes predictions. model.transform(dataset).show() # $example off$ spark.stop()
def binomialSparkIsoRegression(self): regr = IsotonicRegression() model = regr.fit(self.Xtrain, self.Ytrain) return model
def scalarSparkIsoRegression(self): regr = IsotonicRegression() model = regr.fit(self.train) return model
#Splitting into train and test sets #X_train, X_test = X.randomSplit([0.8, 0.2]) xtr = X_train.toPandas() xtr.to_csv('X_train.csv') xtst = X_test.toPandas() xtst.to_csv('X_test.csv') rf = RandomForestRegressor(labelCol='label', featuresCol="scaledFeatures", numTrees=500) xgb = GBTRegressor(labelCol='label', featuresCol="features", maxIter=100) glr = GeneralizedLinearRegression(family="gamma", link="inverse", maxIter=100, regParam=0.3) iso = IsotonicRegression() model = rf.fit(X_train) train_pred = model.transform(X_train) trpred = train_pred.toPandas() trpred.to_csv('train_pred_rf.csv') pred = model.transform(X_test) tstpred = pred.toPandas() tstpred.to_csv('test_pred_rf.csv') evaluator = RegressionEvaluator(labelCol="label", predictionCol="prediction", metricName="rmse") rmse = evaluator.evaluate(pred) print("Root Mean Squared Error (RMSE) on test data for RF = %g" % rmse)
# DBTITLE 1,Isotonic Regression Model from pyspark.ml.feature import RFormula from pyspark.ml.classification import LogisticRegression from pyspark.ml.regression import IsotonicRegression from pyspark.ml.pipeline import Pipeline labelPredDF1 = labelPredDF.withColumn( "proportion", (col("pos")) / (col("neg") + col("neu") / 3 + .000000001)) # When include neu in numerator, dominates it trainDF, testDF = labelPredDF1.randomSplit([.8, .2], seed=42) rf = RFormula(formula="label ~ neg + pos + neu + compound") lr = LogisticRegression(fitIntercept=True) ir = IsotonicRegression(featuresCol='proportion', predictionCol='prediction', isotonic=True) pipeline = Pipeline(stages=[ir]) pipelineModel = pipeline.fit(trainDF) testPredDF = pipelineModel.transform(testDF) # COMMAND ---------- display(testPredDF) # COMMAND ---------- # DBTITLE 1,Logistic Regression Model from pyspark.ml.feature import RFormula from pyspark.ml.classification import LogisticRegression
plot_lr_model() # ## Exercises # In the following exercises we use *isotonic regression* to fit a monotonic # function to the data. # (1) Import the `IsotonicRegression` class from the regression module. from pyspark.ml.regression import IsotonicRegression # (2) Create an instance of the `IsotonicRegression` class. Use the same # features and label that we used for our linear regression model. ir = IsotonicRegression(featuresCol="features", labelCol="duration") print(ir.explainParams()) # (3) Fit the isotonic regression model on the train data. It may take a # minute for the fit to complete. Note that this will produce an instance of # the `IsotonicRegressionModel` class. ir_model = ir.fit(train) type(ir_model) # (4) The model parameters are available in the `boundaries` and `predictions` # attributes of the isotonic regression model. Print these attributes. ir_model.boundaries ir_model.predictions