wrangled = StopWordsRemover(inputCol="words",
                            outputCol="terms").transform(wrangled)

# Apply the hashing trick
wrangled = HashingTF(inputCol="terms", outputCol="hash",
                     numFeatures=1024).transform(wrangled)

# Convert hashed symbols to TF-IDF
sms = IDF(inputCol="hash",
          outputCol="features").fit(wrangled).transform(wrangled)

# View the first four records
sms.show(4, truncate=False)

# Split the data into training and testing sets
sms_train, sms_test = sms.randomSplit([0.8, 0.2], seed=13)

# Fit a Logistic Regression model to the training data
logistic = LogisticRegression(regParam=0.2).fit(sms_train)

# Make predictions on the testing data
prediction = logistic.transform(sms_test)

# Create a confusion matrix, comparing predictions to known labels
prediction.groupBy("label", 'prediction').count().show()

# Find weighted precision
multi_evaluator = MulticlassClassificationEvaluator()
accuracy = multi_evaluator.evaluate(prediction,
                                    {multi_evaluator.metricName: "accuracy"})
weighted_precision = multi_evaluator.evaluate(