Exemplo n.º 1
def classifyNewSample(tree, testData, fileName, nominalColumns):

    predictionsPlusExpectedValues = []

    for row in testData:
        leaf = getRelevantLeafNode(tree, row, nominalColumns).leafValues

        predictedLabel = None
        currentPredictionPlusExpectedValues = []

        # Counting the occurences of each possible class label in the leaf
        labelCount = len(leaf)

        #if there is only one label then classify as that label
        if (labelCount == 1):
            predictedLabel = leaf.keys()

        #Else we count the number of occurences of each label and assign the label which has a greater number of occurences
            probabilityOfClassLabels = {}
            #Counting the total number of occurences of each label
            totalNumberOfLabels = 0
            for key in leaf.keys():
                totalNumberOfLabels += leaf[key]

            #Calculating and assigning the probability of each key to the dictionary probabilityOfClassLabels
            for key in leaf.keys():
                probabilityOfClassLabels[key] = float(
                    leaf[key]) / totalNumberOfLabels

            maxProbability = 0.0
            bestKey = None
            Getting the label with Max Probability, if 2 labels are equally probable then the selection
            depends on the order in which the keys are stored, which is generally random, because the dict in Python stores the dict in an unordered manner 
            2 runs of the program will never have keys in the same order. 
            for key in leaf.keys():
                if probabilityOfClassLabels[key] > maxProbability:
                    maxProbability = probabilityOfClassLabels[key]
                    bestKey = key
            predictedLabel = bestKey

        #Handles the case where the label is of the type list, this happens when there are multiple labels in one Node
        if (type(predictedLabel) == list):
        else:  # No issue when there is just one label per node

        #appending the expected result from testData
        currentPredictionPlusExpectedValues.append(row[len(row) - 1])
        #List of lists containing the prediction vs expected values

    writeResult(predictionsPlusExpectedValues, fileName)
    return computeStats(predictionsPlusExpectedValues)
Exemplo n.º 2
def classifyNewSample(tree, testData, fileName, nominalColumns):

    predictionsPlusExpectedValues = []

    for row in testData:
        leaf = getRelevantLeafNode(tree, row, nominalColumns).leafValues

        predictedLabel = None
        currentPredictionPlusExpectedValues = []
        # Counting the occurences of each possible class label in the leaf
        labelCount = len(leaf)

        #if there is only one label then classify as that label
        if(labelCount == 1):
            predictedLabel = leaf.keys()

        #Else we count the number of occurences of each label and assign the label which has a greater number of occurences
            probabilityOfClassLabels = {}
            #Counting the total number of occurences of each label
            totalNumberOfLabels = 0
            for key in leaf.keys():
                totalNumberOfLabels += leaf[key]

            #Calculating and assigning the probability of each key to the dictionary probabilityOfClassLabels
            for key in leaf.keys():
                probabilityOfClassLabels[key] = float(leaf[key])/totalNumberOfLabels

            maxProbability = 0.0
            bestKey = None

            Getting the label with Max Probability, if 2 labels are equally probable then the selection
            depends on the order in which the keys are stored, which is generally random, because the dict in Python stores the dict in an unordered manner 
            2 runs of the program will never have keys in the same order. 
            for key in leaf.keys():
                if probabilityOfClassLabels[key] > maxProbability:
                    maxProbability = probabilityOfClassLabels[key]
                    bestKey = key
            predictedLabel = bestKey

        #Handles the case where the label is of the type list, this happens when there are multiple labels in one Node
        if(type(predictedLabel) == list):
        else: # No issue when there is just one label per node 

        #appending the expected result from testData
        #List of lists containing the prediction vs expected values
    writeResult(predictionsPlusExpectedValues, fileName)
    return computeStats(predictionsPlusExpectedValues)
def classifyNewSample(tree, testData,depth,fileName, nominalColumns):
	predictionsPlusExpectedValues = []

	for row in testData:

		currentNode = tree
		leaf = None
		predictedLabel = None
		currentPredictionPlusExpectedValues = []

		#Handling the Special case of depth = 0 
		if(depth == 0):
			leaf = tree.leafValues
			#Recursively searching for the leaf node that martches the criteria
			while(leaf == None):
				if currentNode.col not in nominalColumns:
					#current node is a nominal column..      
					#print currentNode.col, currentNode.criteria
					if float(row[currentNode.col]) <= float(currentNode.criteria): 
					    currentNode = currentNode.rightBranch
					    currentNode = currentNode.leftBranch
					#current node is a continuous column..                    
					if row[currentNode.col] == currentNode.criteria: 
						currentNode = currentNode.rightBranch
						currentNode = currentNode.leftBranch
				leaf = currentNode.leafValues

		# Counting the occurences of each possible class label in the leaf
		labelCount = len(leaf)

		#if there is only one label then classify as that label
		if(labelCount == 1):
			predictedLabel = leaf.keys()
		#Else we count the number of occurences of each label and assign the label which has a greater number of occurences
			probabilityOfClassLabels = {}
			#Counting the total number of occurences of each label
			totalNumberOfLabels = 0
			for key in leaf.keys():
				totalNumberOfLabels += leaf[key]

			#Calculating and assigning the probability of each key to the dictionary probabilityOfClassLabels
			for key in leaf.keys():
				probabilityOfClassLabels[key] = float(leaf[key])/totalNumberOfLabels

			maxProbability = 0.0
			bestKey = None
			Getting the label with Max Probability, if 2 labels are equally probable then the selection
			depends on the order in which the keys are stored, which is generally random, because the dict in Python stores the dict in an unordered manner 
			2 runs of the program will never have keys in the same order. 
			for key in leaf.keys():
				if probabilityOfClassLabels[key] > maxProbability:
					maxProbability = probabilityOfClassLabels[key]
					bestKey = key
			predictedLabel = bestKey

		#Handles the case where the label is of the type list, this happens when there are multiple labels in one Node
		if(type(predictedLabel) == list):	
		else: # No issue when there is just one label per node 

		#appending the expected result from testData	
		#List of lists containing the prediction vs expected values
	writeResult(predictionsPlusExpectedValues, depth, fileName)
	return computeStats(predictionsPlusExpectedValues)