Пример #1
0
def runMetrics(labeledDataRDD, *args):
    html = '<table width=100%><tr><th>Model</th><th>Accuracy</th><th>Precision</th><th>Recall</th></tr>'
    confusionHtml = '<p>Confusion Tables for each Model</p>'
    for model in args:
        label = model.__class__.__name__
        predictionAndLabels = model.predict(
            labeledDataRDD.map(lambda lp: lp.features))
        metrics = MulticlassMetrics(\
            predictionAndLabels.zip(labeledDataRDD.map(lambda lp: lp.label)).map(lambda t: (float(t[0]),float(t[1])))\
        )
        html+='<tr><td>{0}</td><td>{1:.2f}%</td><td>{2:.2f}%</td><td>{3:.2f}%</td></tr>'\
            .format(label,metrics.weightedFMeasure(beta=1.0)*100, metrics.weightedPrecision*100,metrics.weightedRecall*100 )

        if (displayConfusionTable):
            confusionMatrix = metrics.call("confusionMatrix")
            confusionMatrixArray = confusionMatrix.toArray()
            #labels = metrics.call("labels")
            confusionHtml += "<p>" + label + "<p>"
            confusionHtml += "<table>"
            for row in confusionMatrixArray:
                confusionHtml += "<tr>"
                for cell in row:
                    confusionHtml += "<td>" + str(cell) + "</td>"
                confusionHtml += "</tr>"
            confusionHtml += "</table>"

    html += '</table>'

    if (displayConfusionTable):
        html += confusionHtml

    display(HTML(html))
def runMetrics(labeledDataRDD, *args):
    html='<table width=100%><tr><th>Model</th><th>Accuracy</th><th>Precision</th><th>Recall</th></tr>'
    confusionHtml = '<p>Confusion Tables for each Model</p>'
    for model in args:
        label= model.__class__.__name__
        predictionAndLabels = model.predict(labeledDataRDD.map(lambda lp: lp.features))
        metrics = MulticlassMetrics(\
            predictionAndLabels.zip(labeledDataRDD.map(lambda lp: lp.label)).map(lambda t: (float(t[0]),float(t[1])))\
        )
        html+='<tr><td>{0}</td><td>{1:.2f}%</td><td>{2:.2f}%</td><td>{3:.2f}%</td></tr>'\
            .format(label,metrics.weightedFMeasure(beta=1.0)*100, metrics.weightedPrecision*100,metrics.weightedRecall*100 )

        if ( displayConfusionTable ):
            confusionMatrix = metrics.call("confusionMatrix")
            confusionMatrixArray = confusionMatrix.toArray()
            #labels = metrics.call("labels")
            confusionHtml += "<p>" + label + "<p>"
            confusionHtml += "<table>"
            for row in confusionMatrixArray:
                confusionHtml += "<tr>"
                for cell in row:
                    confusionHtml+="<td>" + str(cell) + "</td>"
                confusionHtml += "</tr>"
            confusionHtml += "</table>"
        
    html+='</table>'
    
    if ( displayConfusionTable ):
        html+=confusionHtml
    
    display(HTML(html))
def runMetrics(labeledDataRDD, *args):
    html = '<table width=100%><tr><th>Model</th><th>Accuracy</th><th>Precision</th><th>Recall</th></tr>'
    confusionHtml = '<p>Confusion Tables for each Model</p>'
    for model in args:
        label = model.__class__.__name__
        predictionAndLabels = model.predict(
            labeledDataRDD.map(lambda lp: lp.features))
        metrics = MulticlassMetrics(\
            predictionAndLabels.zip(labeledDataRDD.map(lambda lp: lp.label)).map(lambda t: (float(t[0]),float(t[1])))\
        )
        html+='<tr><td>{0}</td><td>{1:.2f}%</td><td>{2:.2f}%</td><td>{3:.2f}%</td></tr>'\
            .format(label,metrics.weightedFMeasure(beta=1.0)*100, metrics.weightedPrecision*100,metrics.weightedRecall*100 )

        if (displayConfusionTable):
            #get labels from RDD
            handler = getTrainingHandler()
            classLabels = labeledDataRDD.map(lambda t: t.label).distinct().map(
                lambda l: handler.getClassLabel(l)).collect()
            confusionMatrix = metrics.call("confusionMatrix")
            confusionMatrixArray = confusionMatrix.toArray()
            #labels = metrics.call("labels")
            confusionHtml += "<p>" + label + "<p>"
            confusionHtml += "<table>"
            confusionHtml += "<tr><td></td>"
            for classLabel in classLabels:
                confusionHtml += "<td>" + str(classLabel) + "</td>"
            confusionHtml += "</tr>"

            for i, row in enumerate(confusionMatrixArray):
                confusionHtml += "<tr>"
                confusionHtml += "<td>" + classLabels[i] + "</td>"
                for j, cell in enumerate(row):
                    confusionHtml += "<td style='text-align:center'>" + (
                        "<b>" if (i == j) else "") + str(cell) + ("</b>" if
                                                                  (i == j) else
                                                                  "") + "</td>"
                confusionHtml += "</tr>"
            confusionHtml += "</table>"

    html += '</table>'

    if (displayConfusionTable):
        html += confusionHtml

    display(HTML(html))
 def doRender(self, handlerId):
     html='<div class="pd_save"><table width=100%><tr><th>Model</th><th>Accuracy</th><th>Precision</th><th>Recall</th></tr>'
     confusionHtml = '<p>Confusion Tables for each Model</p>'
     for modelName,model in Configuration.getModels():
         label= model.__class__.__name__
         labeledDataRDD, sqlTableName = Configuration.getLabeledData(self.entity)
         predictionAndLabels = model.predict(labeledDataRDD.map(lambda lp: lp.features))
         metrics = MulticlassMetrics(\
             predictionAndLabels.zip(labeledDataRDD.map(lambda lp: lp.label)).map(lambda t: (float(t[0]),float(t[1])))\
         )
         html+='<tr><td>{0}</td><td>{1:.2f}%</td><td>{2:.2f}%</td><td>{3:.2f}%</td></tr>'\
             .format(label,metrics.weightedFMeasure(beta=1.0)*100, metrics.weightedPrecision*100,metrics.weightedRecall*100 )
         displayConfusionTable = True
         if ( displayConfusionTable ):
             #get labels from RDD
             handler=training.getTrainingHandler()
             classLabels = labeledDataRDD.map(lambda t: t.label).distinct().map(lambda l: handler.getClassLabel(l)).collect()
             confusionMatrix = metrics.call("confusionMatrix")
             confusionMatrixArray = confusionMatrix.toArray()
             #labels = metrics.call("labels")
             confusionHtml += "<p>" + label + "<p>"
             confusionHtml += "<table>"
             confusionHtml+="<tr><td></td>"
             for classLabel in classLabels:
                 confusionHtml+="<td>" + str(classLabel) + "</td>"
             confusionHtml+="</tr>"
             
             for i, row in enumerate(confusionMatrixArray):
                 confusionHtml += "<tr>"
                 confusionHtml += "<td>" + classLabels[i] + "</td>"
                 for j, cell in enumerate(row):
                     confusionHtml+="<td style='text-align:center'>" + ("<b>" if (i==j) else "") +  str(cell) + ("</b>" if (i==j) else "") + "</td>"
                 confusionHtml += "</tr>"
             confusionHtml += "</table>"
         
     html+='</table></div>'
     
     if ( displayConfusionTable ):
         html+=confusionHtml
     
     self._addHTML(html)
Пример #5
0
# cvModel uses the best model found from the Cross Validation
# Evaluate best model
f1= MulticlassClassificationEvaluator(labelCol ='label',predictionCol = "prediction",metricName="f1")
print('f1:', f1.evaluate(rfpredictions))


# Confusion Matrix

import pandas as pd
from pyspark.mllib.evaluation import MulticlassMetrics
predictionAndLabels = rfpredictions.select('label', 'prediction')
metrics = MulticlassMetrics(predictionAndLabels.rdd.map(lambda x: tuple(map(float, x))))

confusion_matrix = metrics.confusionMatrix().toArray()
labels = [int(l) for l in metrics.call('labels')]
confusion_matrix = pd.DataFrame(confusion_matrix , index=labels, columns=labels)


confusion_matrix


# Model metrics by class

# **Precision** is the ratio of correctly predicted positive observations to the total predicted positive observations. (Precision = TP/(TP+FP))
# 
# **Recall (Sensitivity)** is the ratio of correctly predicted positive observations to the all observations in actual class (Recall = TP/(TP+FN))
# 
# **F1 score** is the weighted average of Precision and Recall. Therefore, this score takes both false positives and false negatives into account. (F1 Score = 2*(Recall * Precision) / (Recall + Precision))
# 
#