def evalOneFold(foldId):
     
     definedFeatureCosts = realdata.getFeaturesCosts(dataName)
         
     trainData, trainLabels, unlabeledData, testData, testLabels = realdata.loadSubset(dataName, None, foldId, imputationMethod)
     
     if USE_UNLABELED_DATA:
         assert(unlabeledData.shape[0] > 0)
     else:
         unlabeledData = numpy.zeros((0, trainData.shape[1]))
     
     allData = numpy.vstack((trainData, unlabeledData))
     
     assert(definedFeatureCosts.shape[0] == allData.shape[1])
     
     print("training data size = ", trainData.shape[0])
     print("unlabeled data size = ", unlabeledData.shape[0])
     print("test data size = ", testData.shape[0])
     
     print("*****************************")
     print("foldId = ", foldId)
     print("*****************************")
     
     if FULL_MODEL:
         bestFixedFeatures = numpy.arange(trainData.shape[1])
         # print("bestFixedFeatures = ", bestFixedFeatures)
         # assert(False)
         bestModel, misclassificationCosts, totalCostEstimate = prepareFeatureSets.getPredictionModelsAndCosts(trainData, trainLabels, bestFixedFeatures, definedFeatureCosts, falsePositiveCost, targetRecall, useTargetRecall = True, falseNegativeCost = None, classificationModelName = classificationModelName)
         
     else:
         if USE_L1:
             allFeatureSetsInOrder, _ = prepareFeatureSets.getAllFeatureSetsInOrderWithL1LogReg(trainData, trainLabels, unlabeledData, None, definedFeatureCosts)
         else:
             print("NOT YET SUPPORTED !!")
             assert(False)
             # allFeatureSetsInOrder, allEstimatedTotalCosts = prepareFeatureSets.getAllFeatureSetsInOrderWithGreedyMethod(trainData, trainLabels, unlabeledData, misclassificationCosts, definedFeatureCosts)
         
     
         print("GET ALL PREDICTION MODEL AND DETERMINE FALSE NEGATIVE COSTS: ")  
         allPredictionModels, allMisclassificationCosts, allEstimatedTotalCosts = prepareFeatureSets.getAllPredictionModelsAndCosts(trainData, trainLabels, allFeatureSetsInOrder, definedFeatureCosts, falsePositiveCost, targetRecall, useTargetRecall = True, falseNegativeCost = None, classificationModelName = classificationModelName)
         
         bestModelId = numpy.argmin(allEstimatedTotalCosts)
         bestModel = allPredictionModels[bestModelId]
         misclassificationCosts = allMisclassificationCosts[bestModelId]
         bestFixedFeatures = allFeatureSetsInOrder[bestModelId]
    
     return evaluation.getOverallPerformance_fixedCovariateSet(bestModel, testData, testLabels, definedFeatureCosts, misclassificationCosts, bestFixedFeatures, targetRecall)
Пример #2
0
    # only used to compare with the results from previous work on Diabetes data
    assert (dataName == "pima_5foldCV")
    ALL_FALSE_POSITIVE_COSTS = [400, 800]
elif COST_TYPE == "asymmetricCost":
    ALL_FALSE_POSITIVE_COSTS = constants.allFalsePositiveCosts
else:
    assert (False)

DYNAMIC = "dynamic"
STATIC = "static"
FULL_MODEL = "fullModel"
allVariations = [FULL_MODEL, DYNAMIC, STATIC]

startTimeTotal = time.time()

definedFeatureCosts = realdata.getFeaturesCosts(dataName)

trainedModelsFilenameNonLinearL1 = dataName + "_" + classificationModelName + "_nonLinearL1"

with open(
        constants.MODEL_FOLDERNAME + trainedModelsFilenameNonLinearL1 +
        "_models", "rb") as f:
    allPredictionModelsNonLinearL1_allFolds = pickle.load(f)
with open(
        constants.MODEL_FOLDERNAME + trainedModelsFilenameNonLinearL1 +
        "_probs", "rb") as f:
    allTrainingTrueProbsAllModelsNonLinearL1_allFolds = pickle.load(f)
with open(
        constants.MODEL_FOLDERNAME + trainedModelsFilenameNonLinearL1 +
        "_features", "rb") as f:
    allFeatureArraysInOrderNonLinearL1_allFolds = pickle.load(f)