示例#1
0
def isotonic_regression(trainingDataFrame):
    iso = IsotonicRegression()
    isoModel = iso.fit(trainingDataFrame)
    result = {}
    result["model"] = isoModel
    result["boundaries"] = isoModel.boundaries
    result["predictions"] = isoModel.predictions
    return result
示例#2
0
    def model_define(self):
        """Returns a model with the hyperparameters inputted in :func:
        `get_parameters`

        Returns:
            (pyspark.ml.regression.DecisionTreeRegressor):
                Decision Tree Regression model
        """
        return IsotonicRegression()
示例#3
0
def isotonicRegression(df, conf):
  """ Isotonic Regression training
        Input  : - Dataframe of training (df)
        output : - Isotonic regression model (model)
  """
  feature_col = conf["params"].get("featuresCol", "features")
  label_col = conf["params"].get("labelCol", "label")
  pred_col = conf["params"].get("predictionCol", "prediction")
  isoton = conf["params"].get("isotonic",True)
  feature_index = conf["params"].get("featureIndex",0)
      
  ir = IsotonicRegression(featuresCol=feature_col,labelCol=label_col,
                          predictionCol=pred_col, isotonic=isoton, 
                          featureIndex=feature_index)

  model = ir.fit(df)
  return model
示例#4
0
    def calibrate(self, df_to_calibrate):
        # Make initial prediction on calibration data set
        self.predict(df_to_calibrate, 'calibration', False, False, False)
        print('[ {0} ] : Calibrating model'.format(datetime.utcnow()))

        # Convert initial probability to input feature
        pred_df_cal = VectorAssembler(inputCols=['prob_deact'], outputCol='prob_feature', handleInvalid='keep') \
            .transform(self.pred_df)

        # Fit calibration function on results
        ir = IsotonicRegression(
            labelCol='deactivated',
            predictionCol='prob_deact_cal',
            featuresCol='prob_feature'
        )
        ir_model = ir.fit(pred_df_cal)
        self.ir_model = ir_model
    def Isoton_Regression(df, conf):
        """input :  df [spark.dataframe], config[configuration (Params and use cross validator/not)
           output : Isotonic Regression Model"""
        #        isoton = conf["params"].get("isoton")
        #        feature_index = conf["params"].get("featureIndex")

        ir = IsotonicRegression()

        if conf["crossval"].get("crossval") == True:
            grid = ParamGridBuilder().build()
            evaluator = RegressionEvaluator(metricName="r2")
            cv = CrossValidator(estimator=ir,
                                estimatorParamMaps=grid,
                                evaluator=evaluator,
                                parallelism=2)
            irmodel = cv.fit(training)
        if conf["crossval"].get("crossval") == False:

            irmodel = ir.fit(training)

        return irmodel
def isotonicRegression(df, label, features, adjust):
    """ This function returns the rmse and the predictions form the applied isotonic 
        regression model on the dataframe with the speficied feature columns """
    ## Columns with non numerical values are adjusted
    for col in adjust:
        indexer=StringIndexer(inputCol=col,outputCol="{}_num".format(col)) 
        features.append("{}_num".format(col))
        df=indexer.fit(df).transform(df)
    ## Features vector configured from dataframe for model processing
    assembler = VectorAssembler(inputCols=features, outputCol="features")
    assembled = assembler.transform(df)
    ir = IsotonicRegression(featuresCol ='features', labelCol=label)
    irModel=ir.fit(assembled)
    predictions = irModel.transform(assembled)
    ## Evaluator required for rmse estimation
    evaluator = RegressionEvaluator(labelCol=label, metricName="rmse")
    rmse = evaluator.evaluate(predictions)
    result = {
        "RMSE": rmse,
        "predictions": [r["prediction"] for r in predictions.select("prediction").collect()]
    }
    return result
示例#7
0
# $example off$
from pyspark.sql import SparkSession

"""
An example demonstrating isotonic regression.
Run with:
  bin/spark-submit examples/src/main/python/ml/isotonic_regression_example.py
"""

if __name__ == "__main__":
    spark = SparkSession\
        .builder\
        .appName("IsotonicRegressionExample")\
        .getOrCreate()

    # $example on$
    # Loads data.
    dataset = spark.read.format("libsvm")\
        .load("data/mllib/sample_isotonic_regression_libsvm_data.txt")

    # Trains an isotonic regression model.
    model = IsotonicRegression().fit(dataset)
    print("Boundaries in increasing order: " + str(model.boundaries))
    print("Predictions associated with the boundaries: " + str(model.predictions))

    # Makes predictions.
    model.transform(dataset).show()
    # $example off$

    spark.stop()
示例#8
0
 def binomialSparkIsoRegression(self):
     regr = IsotonicRegression()
     model = regr.fit(self.Xtrain, self.Ytrain)
     return model
示例#9
0
 def scalarSparkIsoRegression(self):
     regr = IsotonicRegression()
     model = regr.fit(self.train)
     return model
示例#10
0
    #Splitting into train and test sets
    #X_train, X_test = X.randomSplit([0.8, 0.2])
    xtr = X_train.toPandas()
    xtr.to_csv('X_train.csv')
    xtst = X_test.toPandas()
    xtst.to_csv('X_test.csv')

    rf = RandomForestRegressor(labelCol='label',
                               featuresCol="scaledFeatures",
                               numTrees=500)
    xgb = GBTRegressor(labelCol='label', featuresCol="features", maxIter=100)
    glr = GeneralizedLinearRegression(family="gamma",
                                      link="inverse",
                                      maxIter=100,
                                      regParam=0.3)
    iso = IsotonicRegression()

    model = rf.fit(X_train)
    train_pred = model.transform(X_train)
    trpred = train_pred.toPandas()
    trpred.to_csv('train_pred_rf.csv')

    pred = model.transform(X_test)
    tstpred = pred.toPandas()
    tstpred.to_csv('test_pred_rf.csv')

    evaluator = RegressionEvaluator(labelCol="label",
                                    predictionCol="prediction",
                                    metricName="rmse")
    rmse = evaluator.evaluate(pred)
    print("Root Mean Squared Error (RMSE) on test data for RF = %g" % rmse)
示例#11
0
# DBTITLE 1,Isotonic Regression Model
from pyspark.ml.feature import RFormula
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.regression import IsotonicRegression
from pyspark.ml.pipeline import Pipeline

labelPredDF1 = labelPredDF.withColumn(
    "proportion",
    (col("pos")) / (col("neg") + col("neu") / 3 +
                    .000000001))  # When include neu in numerator, dominates it
trainDF, testDF = labelPredDF1.randomSplit([.8, .2], seed=42)

rf = RFormula(formula="label ~ neg + pos + neu + compound")
lr = LogisticRegression(fitIntercept=True)
ir = IsotonicRegression(featuresCol='proportion',
                        predictionCol='prediction',
                        isotonic=True)
pipeline = Pipeline(stages=[ir])

pipelineModel = pipeline.fit(trainDF)
testPredDF = pipelineModel.transform(testDF)

# COMMAND ----------

display(testPredDF)

# COMMAND ----------

# DBTITLE 1,Logistic Regression Model
from pyspark.ml.feature import RFormula
from pyspark.ml.classification import LogisticRegression
示例#12
0
plot_lr_model()

# ## Exercises

# In the following exercises we use *isotonic regression* to fit a monotonic
# function to the data.

# (1)  Import the `IsotonicRegression` class from the regression module.

from pyspark.ml.regression import IsotonicRegression

# (2)  Create an instance of the `IsotonicRegression` class.  Use the same
# features and label that we used for our linear regression model.

ir = IsotonicRegression(featuresCol="features", labelCol="duration")
print(ir.explainParams())

# (3)  Fit the isotonic regression model on the train data.  It may take a
# minute for the fit to complete.  Note that this will produce an instance of
# the `IsotonicRegressionModel` class.

ir_model = ir.fit(train)
type(ir_model)

# (4)  The model parameters are available in the `boundaries` and `predictions`
# attributes of the isotonic regression model.  Print these attributes.

ir_model.boundaries
ir_model.predictions