def linearRegression(df, conf): """ input : df [spark.dataframe], conf [configuration params] output : linear_regression model [model] """ #memanggil parameter (nilai default) featuresCol= conf["params"].get("featuresCol", "features") labelCol= conf["params"].get("labelCol", "label") predictionCol = conf["params"].get("predictionCol", "prediction") max_iter = conf["params"].get("maxIter", 100) reg_param = conf["params"].get("regParam", 0.0) elasticnet_param = conf["params"].get("elasticNetParam", 0.0) tol = conf["params"].get("tol", 1e-6) fitIntercept = conf["params"].get("fitIntercept", True) standardization = conf["params"].get("standardization", True) solver = conf["params"].get("solver", "auto") weightCol = conf["params"].get("weightCol", None) aggregationDepth = conf["params"].get("aggregationDepth", 2) loss = conf["params"].get("loss", "squaredError") epsilon = conf["params"].get("epsilon", 1.35) lr = LinearRegression(maxIter=max_iter, regParam=reg_param, elasticNetParam=elasticnet_param) print ("maxIter : " , lr.getMaxIter()) print ("regParam : " , lr.getRegParam()) print ("aggrDepth : " , lr.getAggregationDepth()) #jika menggunakan ml-tuning if conf["tuning"]: #jika menggunakan ml-tuning cross validation if conf["tuning"].get("method").lower() == "crossval": paramGrids = conf["tuning"].get("paramGrids") pg = ParamGridBuilder() for key in paramGrids: pg.addGrid(key, paramGrids[key]) grid = pg.build() folds = conf["tuning"].get("methodParam") evaluator = RegressionEvaluator() cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator, numFolds= folds) model = cv.fit(df) #jika menggunakan ml-tuning train validation split elif conf["tuning"].get("method").lower() == "trainvalsplit": paramGrids = conf["tuning"].get("paramGrids") pg = ParamGridBuilder() for key in paramGrids: pg.addGrid(key, paramGrids[key]) grid = pg.build() tr = conf["tuning"].get("methodParam") evaluator = RegressionEvaluator() tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator, trainRatio=tr ) model = tvs.fit(df) #jika tidak menggunakan ml-tuning elif conf["tuning"] == None: print ("test") model = lr.fit(df) return model