import constants
import realdata
import sklearn.metrics

# dataName = "breastcancer_5foldCV"
# dataName = "pima_5foldCV"
# dataName = "pyhsioNetWithMissing_5foldCV"
dataName = "heartDiseaseWithMissing_5foldCV"







resultsRecorder = experimentHelper.ResultsRecorder(len(constants.allFalsePositiveCosts))
readyForSaving = False

definedFeatureCosts = realdata.getFeaturesCosts(dataName)

for falsePositiveCost in constants.allFalsePositiveCosts:
    
    falseNegativeCost = falsePositiveCost * constants.FN_TO_FP_RATIO
    allBestSettings = numpy.zeros((constants.NUMBER_OF_FOLDS, 2), dtype = numpy.int)
    
    for testFoldId in range(constants.NUMBER_OF_FOLDS):
    
        print("*************************** GREEDY MISER *******************************************")
         
        bestLambdaId, bestTreeId = evaluation.getBestParametersForGreedyMiser_asymmetric(dataName, definedFeatureCosts, testFoldId, falsePositiveCost, falseNegativeCost)
       
Пример #2
0
    if variationName == DYNAMIC:
        infoStr += "_" + densityRegressionModelName

        if USE_UNLABELED_DATA:
            infoStr += "_withUnlabeledData"
        else:
            infoStr += "_noUnlabeledData"

    if variationName == FULL_MODEL:
        infoStr = FULL_MODEL
    else:
        infoStr += "_" + FEATURE_SELECTION_METHOD

    infoStr += "_" + classificationModelName

    resultsRecorder = experimentHelper.ResultsRecorder(
        len(ALL_FALSE_POSITIVE_COSTS))

    for costId in range(len(ALL_FALSE_POSITIVE_COSTS)):

        falsePositiveCost = ALL_FALSE_POSITIVE_COSTS[costId]

        if COST_TYPE == "symmetricCost":
            falseNegativeCost = falsePositiveCost
            assert (dataName == "pima_5foldCV")
            correctClassificationCost = -50.0  # in order to align to the setting in (Ji and Carin, 2007; Dulac-Arnold et al., 2012) for Diabetes.

            misclassificationCosts = numpy.zeros((2, 2))
            misclassificationCosts[0, 1] = falsePositiveCost
            misclassificationCosts[1, 0] = falseNegativeCost
            misclassificationCosts[0, 0] = correctClassificationCost
            misclassificationCosts[1, 1] = correctClassificationCost
dataName = "breastcancer_5foldCV"

if dataName == "pima_5foldCV":
    sameClassCost = -50.0 # set to -50.0 in order to compare to Li and Carin work
    # assert(misclassificationCostsSymmetric == 400 or misclassificationCostsSymmetric == 800)
else:
    sameClassCost = 0.0 
    



NUMBER_OF_FOLDS = 5

allMisclassificationCostsSymmetric = [100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0, 900.0, 1000.0]

resultsRecorder = experimentHelper.ResultsRecorder(len(allMisclassificationCostsSymmetric))
readyForSaving = False

for misclassificationCostsSymmetric in allMisclassificationCostsSymmetric:
    
    validTotalCostsAllFolds = numpy.zeros(NUMBER_OF_FOLDS)
    validFeatureCostsAllFolds = numpy.zeros(NUMBER_OF_FOLDS)
    validMisClassificationCostsAllFolds = numpy.zeros(NUMBER_OF_FOLDS)
    validAccuracyAllFolds = numpy.zeros(NUMBER_OF_FOLDS)
    
    allTestFoldAvgBestTotalCostsValidResult = numpy.zeros(NUMBER_OF_FOLDS) # for analysis only
     
    allBestSettings = numpy.zeros((NUMBER_OF_FOLDS, 2), dtype = numpy.int)
     
    for testFoldId in range(NUMBER_OF_FOLDS):