Exemplo n.º 1
0
 def test_onevsrest(self):
     temp_path = tempfile.mkdtemp()
     df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
                                      (1.0, Vectors.sparse(2, [], [])),
                                      (2.0, Vectors.dense(0.5, 0.5))] * 10,
                                     ["label", "features"])
     lr = LogisticRegression(maxIter=5, regParam=0.01)
     ovr = OneVsRest(classifier=lr)
     model = ovr.fit(df)
     ovrPath = temp_path + "/ovr"
     ovr.save(ovrPath)
     loadedOvr = OneVsRest.load(ovrPath)
     self._compare_pipelines(ovr, loadedOvr)
     modelPath = temp_path + "/ovrModel"
     model.save(modelPath)
     loadedModel = OneVsRestModel.load(modelPath)
     self._compare_pipelines(model, loadedModel)
Exemplo n.º 2
0
 def test_save_load(self):
     temp_path = tempfile.mkdtemp()
     sqlContext = SQLContext(self.sc)
     df = sqlContext.createDataFrame(
         [(0.0, Vectors.dense(1.0, 0.8)), (1.0, Vectors.sparse(2, [], [])), (2.0, Vectors.dense(0.5, 0.5))],
         ["label", "features"],
     )
     lr = LogisticRegression(maxIter=5, regParam=0.01)
     ovr = OneVsRest(classifier=lr)
     model = ovr.fit(df)
     ovrPath = temp_path + "/ovr"
     ovr.save(ovrPath)
     loadedOvr = OneVsRest.load(ovrPath)
     self.assertEqual(loadedOvr.getFeaturesCol(), ovr.getFeaturesCol())
     self.assertEqual(loadedOvr.getLabelCol(), ovr.getLabelCol())
     self.assertEqual(loadedOvr.getClassifier().uid, ovr.getClassifier().uid)
     modelPath = temp_path + "/ovrModel"
     model.save(modelPath)
     loadedModel = OneVsRestModel.load(modelPath)
     for m, n in zip(model.models, loadedModel.models):
         self.assertEqual(m.uid, n.uid)
Exemplo n.º 3
0
imageDir = imageDir = "/media/prateek/2A8BEA421AC55874/PAIH/work/resized/"

labelZeroDf = readImages(imageDir + "tl0").withColumn("label", lit(0))
labelOneDf = readImages(imageDir + "tl1").withColumn("label", lit(1))
labelTwoDf = readImages(imageDir + "tl2").withColumn("label", lit(2))
labelThreeDf = readImages(imageDir + "tl3").withColumn("label", lit(3))
labelFourDf = readImages(imageDir + "tl4").withColumn("label", lit(4))
finalTestDf = labelZeroDf.unionAll(labelOneDf).unionAll(labelTwoDf).unionAll(
    labelThreeDf).unionAll(labelFourDf)
testSize = finalTestDf.count()
featurizer = DeepImageFeaturizer(inputCol="image",
                                 outputCol="features",
                                 modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDf)

model_nb = OneVsRestModel.load(imageDir + 'model-naive-bayes-new')
predictions = model_nb.transform(featureVector)
predictions.show(predictions.count())

evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
accuracy = evaluator.evaluate(predictions)
print("Test Data set accuracy with Naive Bayes for " + str(testSize) +
      "images = " + str(accuracy) + " and error " + str(1 - accuracy))

evaluator = MulticlassClassificationEvaluator(labelCol="label",
                                              predictionCol="prediction",
                                              metricName="weightedPrecision")
weightedPrecision = evaluator.evaluate(predictions)
print("Test Data set weightedPrecision with Naive Bayes  for " +
      str(testSize) + " images = " + str(weightedPrecision))
Exemplo n.º 4
0
    "header", "true").load(imageDir + "test25.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x: Row(image=x[0], label=int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image,
                                   'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",
                                 outputCol="features",
                                 modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)
del tmpTestDf
del tmpTestRDD
del tmpTestX
del csvTestTmp
del csvTestRDD
del csvTest
del finalTestDataFrame
#print gc.collect()

#model_nb = OneVsRestModel.load('hdfs://192.168.65.188:8020/paih/model-naive-bayes')
model_dc = OneVsRestModel.load(imageDir + 'model-decision-tree-classifier')

predictions = model_dc.transform(featureVector)
predictions.show()

print '***Transform complete***'
evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with decision tree classifier for 25 images = " +
      str(evaluator.evaluate(predictions)) + " and error " +
      str(1 - evaluator.evaluate(predictions)))
Exemplo n.º 5
0
#imageDir = "/media/prateek/2A8BEA421AC55874/PAIH/work"
imageDir = "hdfs://192.168.65.188:8020/paih/"

def getFileName (filePath) :
	fileName = os.path.basename(filePath).split(".")[0]
	return fileName

# Prepare Test Data
tmpTestDf = readImages(imageDir + "/test5")
tmpTestRDD = tmpTestDf.rdd.map(lambda x : Row(filepath = x[0], image = x[1], fileName = getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option("header", "true").load(imageDir + "/test5.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x : Row(image = x[0], label = int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image, 'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",outputCol="features", modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)
model_dc = OneVsRestModel.load('hdfs://192.168.65.188:8020/paih/model-dicision-tree-classifier')
predictions = model_dc.transform(featureVector)
predictions.persist()
predictions.select("filePath", "prediction").show(truncate=False)

predictionAndLabels = predictions.select("prediction", "label")

#predictionAndLabels.show()

evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with Decision tree Classifier = " + str(evaluator.evaluate(predictionAndLabels)) + " and error " + str(1 - evaluator.evaluate(predictionAndLabels)))
Exemplo n.º 6
0
# Prepare Test Data
tmpTestDf = readImages(imageDir + "/test5")
tmpTestRDD = tmpTestDf.rdd.map(
    lambda x: Row(filepath=x[0], image=x[1], fileName=getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option(
    "header", "true").load(imageDir + "/test5.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x: Row(image=x[0], label=int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image,
                                   'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",
                                 outputCol="features",
                                 modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)

#model_nb = OneVsRestModel.load('hdfs://192.168.65.188:8020/paih/model-naive-bayes')
model_gbt = OneVsRestModel.load(imgDir +
                                '/model-gradiant-boosted-tre-classifier')

predictions = model_gbt.transform(featureVector)

print '***Transform complete***'
predictionAndLabels = predictions.select("prediction", "label")

evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with gradiant boosted tree classifier = " +
      str(evaluator.evaluate(predictionAndLabels)) + " and error " +
      str(1 - evaluator.evaluate(predictionAndLabels)))