Пример #1
0
def runTests(activationsPath, indexLabelMappingPath, sourcePath):

   labels = utility.getLabelStrings(indexLabelMappingPath)
   activations = utility.arrayFromFile(activationsPath)

   if sourcePath.endswith('/') == False:
      sourcePath += '/'

   clusterGroups = loadClusters(sourcePath) # returns list with all previously generated cluster groups

   mixedClusterResultsSimple = []
   perLabelClusterResultsSimple = []

   for clusters in clusterGroups:
      logger.info("Evaluating clusters at " + clusters['path'])

      if(clusters['type'] == 'perLabel'):
         [confusionMatrix, meanAveragePrecision, overallCorrect, overallWrong] = runTest(clusters['data'], activations, labels True) # (clusters, activations, labels)
         perLabelClusterResultsSimple.append([clusters['k'], meanAveragePrecision, overallCorrect, overallWrong])
         saveConfusionMatrix(confusionMatrix, clusters['path'] + "confusion_test.npy")
         utility.plotConfusionMatrix(confusionMatrix, labels, clusters['path'] + "confusion_test.pdf")
      else:
         [confusionMatrix, meanAveragePrecision, overallCorrect, overallWrong] = runTest(clusters['data'], activations, labels False) # (clusters, activations, labels)
         mixedClusterResultsSimple.append([clusters['k'], meanAveragePrecision, overallCorrect, overallWrong])
         saveConfusionMatrix(confusionMatrix, clusters['path'] + "confusion_test.npy")
         utility.plotConfusionMatrix(confusionMatrix, labels, clusters['path'] + "confusion_test.pdf")

   overviewPerLabel = np.array(perLabelClusterResultsSimple)
   overviewMixed = np.array(mixedClusterResultsSimple)

   saveOverview(overviewPerLabel, sourcePath + 'perLabel_')
   saveOverview(overviewMixed, sourcePath + 'result_mixed_')

   utility.plotKMeansOverview(overviewPerLabel, sourcePath + 'perLabel_result.pdf', True)
   utility.plotKMeansOverview(overviewMixed, sourcePath + 'result_mixed_result.pdf', True)
Пример #2
0
def runTests(training, test, labels, targetPath, nn):
   global MAX_K

   if targetPath.endswith('/') == False:
      targetPath += '/'
   if not os.path.exists(os.path.dirname(targetPath)):
      os.makedirs(os.path.dirname(targetPath))
   labelCount = len(labels)
   MAX_K = training.shape[0] / labelCount
   neurons = training.shape[1] - labelCount
   if nn == None:
      nn = knn.getKNearestNeighbours(training, test, MAX_K, neurons)
      with open(targetPath + "nn.npy", "w") as outputFile:
         np.save(outputFile, nn)

   nnByLabel = [[] for i in range(labelCount)]
   currentLabelIndex = 0
   while currentLabelIndex < labelCount:
      currentSelection = nn[nn[:, currentLabelIndex] == 1]
      nnByLabel[currentLabelIndex] = currentSelection
      currentLabelIndex += 1

   allLabels = training[:,neurons:]

   sumPerLabel = np.sum(allLabels, axis=0)
   factorPerLabel = np.max(sumPerLabel) / sumPerLabel


   kCounter = 1
   results = []
   while kCounter < MAX_K:
      logger.info("Evaluating K = " + str(kCounter) + ":")
      overallCorrect = 0
      overallWrong = 0
      meanAveragePrecision = 0
      confusionMatrix = np.zeros((labelCount,labelCount))

      for labelIndex, values in enumerate(nnByLabel):
         confusion = np.zeros((1,len(labels)))
         activationCounter = 0
         while activationCounter < values.shape[0]:
            currentActivation = values[activationCounter]
            searchedLabel = np.argwhere(currentActivation[:labelCount] == 1)
            nIndices = currentActivation[labelCount:labelCount+kCounter].astype(int)
            neighbourLabels = training[nIndices][:,neurons:]

            neighbourLabelsSum = np.sum(neighbourLabels, axis=0)
            neighbourLabelsSum = neighbourLabelsSum * factorPerLabel

            sortedNeighbourLabels = np.argsort(neighbourLabelsSum)[::-1]

            if sortedNeighbourLabels[0] == searchedLabel:
               overallCorrect += 1
            else:
               overallWrong += 1

            confusion[0, sortedNeighbourLabels[0]] += 1

            averagePrecision = 0
            relevant = 0

            for idx, value in enumerate(sortedNeighbourLabels):
               indicator = 0
               if(value == searchedLabel and neighbourLabelsSum[value] != 0):
                  indicator = 1
                  relevant += 1

               precision = float(relevant) / (idx + 1)
               averagePrecision += (precision * indicator)

            if relevant != 0:
               averagePrecision = float(averagePrecision) / relevant
            meanAveragePrecision += averagePrecision
            activationCounter += 1

         confusionMatrix[labelIndex,:] = confusion

      meanAveragePrecision = float(meanAveragePrecision) / test.shape[0]
      logger.info(' Accuracy: ' + str(float(overallCorrect)/(overallWrong + overallCorrect)))
      logger.info(' Mean average precision: '+str(meanAveragePrecision))

      currentTargetPath = targetPath + str(kCounter) + "/"
      if not os.path.exists(os.path.dirname(currentTargetPath)):
         os.makedirs(os.path.dirname(currentTargetPath))

      logger.info('Saving confusion matrix ' + currentTargetPath + "confusion.npy")
      with open(currentTargetPath + "confusion.npy", "w") as outputFile:
         np.save(outputFile, confusionMatrix)

      results.append([kCounter, meanAveragePrecision, overallCorrect, overallWrong])
      utility.plotConfusionMatrix(confusionMatrix, labels, currentTargetPath + "confusion.pdf")

      kCounter += 1

   results = np.array(results)

   logger.info('Writing file ' + targetPath + "overview.csv")
   np.savetxt( targetPath + "overview.csv", results, delimiter=',')

   utility.plotKMeansOverview(results, targetPath + "overview.pdf", False)
Пример #3
0
      logger.info("4) Target path for evaluation results.")
      sys.exit();

   labelIndexInfoPath = sys.argv[1]
   labelIndexMappingPath = sys.argv[2]
   logFilePath = sys.argv[3]
   evaluationTargetPath = sys.argv[4]
   if evaluationTargetPath.endswith('/') == False:
      evaluationTargetPath += '/'

   if not os.path.exists(os.path.dirname(evaluationTargetPath)) and os.path.dirname(evaluationTargetPath) != '':
      os.makedirs(os.path.dirname(evaluationTargetPath))


   plotTrainingLossAndAccuracy(logFilePath, evaluationTargetPath)

   # sys.exit()
   labels = utility.getLabelStrings(labelIndexMappingPath)
   [confusionMatrix, meanAveragePrecision, overallCorrect, overallWrong] = testNeuralNet(labels, labelIndexInfoPath, evaluationTargetPath)

   overviewPath = evaluationTargetPath + "overview.csv"
   logger.info('Writing file ' + overviewPath)
   np.savetxt(overviewPath, np.array([0,meanAveragePrecision, overallCorrect, overallWrong]), delimiter=',')

   confusionMatrixPath = evaluationTargetPath + 'confusionMatrix.npy'
   logger.info('Writing file ' + confusionMatrixPath)
   with open(confusionMatrixPath, "w") as outputFile:
      np.save(outputFile, confusionMatrix)

   utility.plotConfusionMatrix(confusionMatrix, labels, evaluationTargetPath + 'confusionMatrix.pdf')
Пример #4
0
def runTests(training, test, labels, targetPath, nn):
    global MAX_K

    if targetPath.endswith('/') == False:
        targetPath += '/'
    if not os.path.exists(os.path.dirname(targetPath)):
        os.makedirs(os.path.dirname(targetPath))
    labelCount = len(labels)
    MAX_K = training.shape[0] / labelCount
    neurons = training.shape[1] - labelCount
    if nn == None:
        nn = knn.getKNearestNeighbours(training, test, MAX_K, neurons)
        with open(targetPath + "nn.npy", "w") as outputFile:
            np.save(outputFile, nn)

    nnByLabel = [[] for i in range(labelCount)]
    currentLabelIndex = 0
    while currentLabelIndex < labelCount:
        currentSelection = nn[nn[:, currentLabelIndex] == 1]
        nnByLabel[currentLabelIndex] = currentSelection
        currentLabelIndex += 1

    allLabels = training[:, neurons:]

    sumPerLabel = np.sum(allLabels, axis=0)
    factorPerLabel = np.max(sumPerLabel) / sumPerLabel

    kCounter = 1
    results = []
    while kCounter < MAX_K:
        logger.info("Evaluating K = " + str(kCounter) + ":")
        overallCorrect = 0
        overallWrong = 0
        meanAveragePrecision = 0
        confusionMatrix = np.zeros((labelCount, labelCount))

        for labelIndex, values in enumerate(nnByLabel):
            confusion = np.zeros((1, len(labels)))
            activationCounter = 0
            while activationCounter < values.shape[0]:
                currentActivation = values[activationCounter]
                searchedLabel = np.argwhere(
                    currentActivation[:labelCount] == 1)
                nIndices = currentActivation[labelCount:labelCount +
                                             kCounter].astype(int)
                neighbourLabels = training[nIndices][:, neurons:]

                neighbourLabelsSum = np.sum(neighbourLabels, axis=0)
                neighbourLabelsSum = neighbourLabelsSum * factorPerLabel

                sortedNeighbourLabels = np.argsort(neighbourLabelsSum)[::-1]

                if sortedNeighbourLabels[0] == searchedLabel:
                    overallCorrect += 1
                else:
                    overallWrong += 1

                confusion[0, sortedNeighbourLabels[0]] += 1

                averagePrecision = 0
                relevant = 0

                for idx, value in enumerate(sortedNeighbourLabels):
                    indicator = 0
                    if (value == searchedLabel
                            and neighbourLabelsSum[value] != 0):
                        indicator = 1
                        relevant += 1

                    precision = float(relevant) / (idx + 1)
                    averagePrecision += (precision * indicator)

                if relevant != 0:
                    averagePrecision = float(averagePrecision) / relevant
                meanAveragePrecision += averagePrecision
                activationCounter += 1

            confusionMatrix[labelIndex, :] = confusion

        meanAveragePrecision = float(meanAveragePrecision) / test.shape[0]
        logger.info(
            ' Accuracy: ' +
            str(float(overallCorrect) / (overallWrong + overallCorrect)))
        logger.info(' Mean average precision: ' + str(meanAveragePrecision))

        currentTargetPath = targetPath + str(kCounter) + "/"
        if not os.path.exists(os.path.dirname(currentTargetPath)):
            os.makedirs(os.path.dirname(currentTargetPath))

        logger.info('Saving confusion matrix ' + currentTargetPath +
                    "confusion.npy")
        with open(currentTargetPath + "confusion.npy", "w") as outputFile:
            np.save(outputFile, confusionMatrix)

        results.append(
            [kCounter, meanAveragePrecision, overallCorrect, overallWrong])
        utility.plotConfusionMatrix(confusionMatrix, labels,
                                    currentTargetPath + "confusion.pdf")

        kCounter += 1

    results = np.array(results)

    logger.info('Writing file ' + targetPath + "overview.csv")
    np.savetxt(targetPath + "overview.csv", results, delimiter=',')

    utility.plotKMeansOverview(results, targetPath + "overview.pdf", False)
Пример #5
0
def testBayes(training, test, labels, targetPath):

    if targetPath.endswith('/') == False:
        targetPath += '/'
    if not os.path.exists(os.path.dirname(targetPath)):
        os.makedirs(os.path.dirname(targetPath))

    labelCount = len(labels)
    neurons = training.shape[1] - labelCount

    minTraining = np.min(training[:, 0:neurons])
    minTest = np.min(test[:, 0:neurons])
    absoluteMin = 0

    if minTraining < minTest:
        absoluteMin = minTraining
    else:
        absoluteMin = minTest

    training[:, 0:neurons] = training[:, 0:neurons] + np.abs(absoluteMin)
    test[:, 0:neurons] = test[:, 0:neurons] + np.abs(absoluteMin)

    PLog = tb.trainBayes(training, labelCount)

    overallCorrect = 0
    overallWrong = 0
    meanAveragePrecision = 0

    featuresByLabel = utility.splitTestFeaturesByLabel(test, len(labels))

    confusionMatrix = np.zeros((labelCount, labelCount))

    for labelIndex, activations in enumerate(featuresByLabel):
        counter = 0
        while counter < activations.shape[0]:
            currentActivation = activations[counter, 0:neurons]
            searchedLabel = np.argmax(activations[counter, neurons:])

            predictions = currentActivation * PLog
            predictions = np.sum(predictions, axis=1)

            if np.argmax(predictions) == searchedLabel:
                overallCorrect += 1
            else:
                overallWrong += 1

            confusionMatrix[labelIndex, np.argmax(predictions)] += 1
            averagePrecision = 0
            relevant = 0

            predictedLabelsSorted = np.argsort(predictions)[::-1]

            for idx, value in enumerate(predictedLabelsSorted):
                indicator = 0
                if (value == searchedLabel):
                    indicator = 1
                    relevant += 1

                precision = float(relevant) / (idx + 1)
                averagePrecision += (precision * indicator)

            if relevant != 0:
                averagePrecision = float(averagePrecision) / relevant
            meanAveragePrecision += averagePrecision

            counter += 1

    meanAveragePrecision = float(meanAveragePrecision) / test.shape[0]

    logger.info(' Accuracy: ' +
                str(float(overallCorrect) / (overallWrong + overallCorrect)))
    logger.info(' Mean average precision: ' + str(meanAveragePrecision))

    utility.plotConfusionMatrix(confusionMatrix, labels,
                                targetPath + "confusion.pdf")

    results = [0, meanAveragePrecision, overallCorrect, overallWrong]
    logger.info('Writing file ' + targetPath + "overview.csv")
    np.savetxt(targetPath + "overview.csv", results, delimiter=',')
Пример #6
0
    evaluationTargetPath = sys.argv[4]
    if evaluationTargetPath.endswith('/') == False:
        evaluationTargetPath += '/'

    if not os.path.exists(os.path.dirname(evaluationTargetPath)
                          ) and os.path.dirname(evaluationTargetPath) != '':
        os.makedirs(os.path.dirname(evaluationTargetPath))

    plotTrainingLossAndAccuracy(logFilePath, evaluationTargetPath)

    # sys.exit()
    labels = utility.getLabelStrings(labelIndexMappingPath)
    [confusionMatrix, meanAveragePrecision, overallCorrect,
     overallWrong] = testNeuralNet(labels, labelIndexInfoPath,
                                   evaluationTargetPath)

    overviewPath = evaluationTargetPath + "overview.csv"
    logger.info('Writing file ' + overviewPath)
    np.savetxt(overviewPath,
               np.array(
                   [0, meanAveragePrecision, overallCorrect, overallWrong]),
               delimiter=',')

    confusionMatrixPath = evaluationTargetPath + 'confusionMatrix.npy'
    logger.info('Writing file ' + confusionMatrixPath)
    with open(confusionMatrixPath, "w") as outputFile:
        np.save(outputFile, confusionMatrix)

    utility.plotConfusionMatrix(confusionMatrix, labels,
                                evaluationTargetPath + 'confusionMatrix.pdf')
Пример #7
0
def testBayes(training, test, labels, targetPath):

   if targetPath.endswith('/') == False:
      targetPath += '/'
   if not os.path.exists(os.path.dirname(targetPath)):
      os.makedirs(os.path.dirname(targetPath))

   labelCount = len(labels)
   neurons = training.shape[1] - labelCount

   minTraining = np.min(training[:,0:neurons])
   minTest = np.min(test[:,0:neurons])
   absoluteMin = 0

   if minTraining < minTest:
      absoluteMin = minTraining
   else:
      absoluteMin = minTest

   training[:,0:neurons] = training[:,0:neurons] + np.abs(absoluteMin)
   test[:,0:neurons] = test[:,0:neurons] + np.abs(absoluteMin)

   PLog = tb.trainBayes(training, labelCount)

   overallCorrect = 0
   overallWrong = 0
   meanAveragePrecision = 0

   featuresByLabel = utility.splitTestFeaturesByLabel(test, len(labels))

   confusionMatrix = np.zeros((labelCount,labelCount))

   for labelIndex, activations in enumerate(featuresByLabel):
      counter = 0
      while counter < activations.shape[0]:
         currentActivation = activations[counter,0:neurons]
         searchedLabel = np.argmax(activations[counter,neurons:])

         predictions = currentActivation * PLog
         predictions = np.sum(predictions, axis=1)

         if np.argmax(predictions) == searchedLabel:
            overallCorrect += 1
         else:
            overallWrong += 1

         confusionMatrix[labelIndex,np.argmax(predictions)] += 1
         averagePrecision = 0
         relevant = 0

         predictedLabelsSorted = np.argsort(predictions)[::-1]

         for idx, value in enumerate(predictedLabelsSorted):
            indicator = 0
            if(value == searchedLabel):
               indicator = 1
               relevant += 1

            precision = float(relevant) / (idx + 1)
            averagePrecision += (precision * indicator)

         if relevant != 0:
            averagePrecision = float(averagePrecision) / relevant
         meanAveragePrecision += averagePrecision

         counter += 1

   meanAveragePrecision = float(meanAveragePrecision) / test.shape[0]

   logger.info(' Accuracy: ' + str(float(overallCorrect)/(overallWrong + overallCorrect)))
   logger.info(' Mean average precision: '+str(meanAveragePrecision))

   utility.plotConfusionMatrix(confusionMatrix, labels, targetPath + "confusion.pdf")

   results = [0, meanAveragePrecision, overallCorrect, overallWrong]
   logger.info('Writing file ' + targetPath + "overview.csv")
   np.savetxt( targetPath + "overview.csv", results, delimiter=',')