# In[109]: predResults = regressor.evaluate(val) # In[110]: predResults = predResults.predictions # In[111]: regressor.write().overwrite().save("lrModel") # In[112]: predResults = predResults.withColumn("prediction", func.round("prediction")) #predResults.show(2) # In[113]: ##### Linear Regression Ends ######
pred = regressor.evaluate(test_data) #Predict the model pred.predictions.show() predictions = regressor.transform(valid_finalized_data) predictions.show() dataset.groupby("quality").count().show() # ################################################################################################################ # export the trained model and create a zip file for ease of download import shutil from pyspark.ml.regression import LinearRegressionModel regressor.write().overwrite().save("cs643") path_drv = shutil.make_archive("cs643", format='zip', base_dir="cs643") shutil.unpack_archive( "cs643.zip", "test", format='zip', ) loadedRegressor = LinearRegressionModel.load("test/cs643") predictions = loadedRegressor.transform(valid_finalized_data) print(loadedRegressor.numFeatures) predictions.show() # ################################################################################################################ # run some equick evaluations
valid_data_final.show() # Split training data into 80% and 20% train_data,test_data = data_final.randomSplit([0.8,0.2]) regressor = LinearRegression(featuresCol = 'Attributes', labelCol = dataset.columns[11] ) # Train using training data regressor = regressor.fit(train_data) pred = regressor.evaluate(test_data) # Predict the model pred.predictions.show() predictions = regressor.transform(valid_data_final) predictions.show() # Save the model so that we can export it for later use regressor.write().overwrite().save("trained-model") path_drv = shutil.make_archive("trained-model", format='zip', base_dir="trained-model") shutil.unpack_archive("trained-model.zip", "trained-model-sample",format='zip',) loadedRegressor = LinearRegressionModel.load("trained-model-sample/trained-model") predictions = loadedRegressor.transform(valid_data_final) print(loadedRegressor.numFeatures) predictions.show() spark.stop()