def classifyNewSample(tree, testData, fileName, nominalColumns): predictionsPlusExpectedValues = [] for row in testData: leaf = getRelevantLeafNode(tree, row, nominalColumns).leafValues predictedLabel = None currentPredictionPlusExpectedValues = [] # Counting the occurences of each possible class label in the leaf labelCount = len(leaf) #if there is only one label then classify as that label if (labelCount == 1): predictedLabel = leaf.keys() #Else we count the number of occurences of each label and assign the label which has a greater number of occurences else: probabilityOfClassLabels = {} #Counting the total number of occurences of each label totalNumberOfLabels = 0 for key in leaf.keys(): totalNumberOfLabels += leaf[key] #Calculating and assigning the probability of each key to the dictionary probabilityOfClassLabels for key in leaf.keys(): probabilityOfClassLabels[key] = float( leaf[key]) / totalNumberOfLabels maxProbability = 0.0 bestKey = None ''' Getting the label with Max Probability, if 2 labels are equally probable then the selection depends on the order in which the keys are stored, which is generally random, because the dict in Python stores the dict in an unordered manner 2 runs of the program will never have keys in the same order. ''' for key in leaf.keys(): if probabilityOfClassLabels[key] > maxProbability: maxProbability = probabilityOfClassLabels[key] bestKey = key predictedLabel = bestKey #Handles the case where the label is of the type list, this happens when there are multiple labels in one Node if (type(predictedLabel) == list): currentPredictionPlusExpectedValues.append(str(predictedLabel[0])) else: # No issue when there is just one label per node currentPredictionPlusExpectedValues.append(str(predictedLabel)) #appending the expected result from testData currentPredictionPlusExpectedValues.append(row[len(row) - 1]) #List of lists containing the prediction vs expected values predictionsPlusExpectedValues.append( currentPredictionPlusExpectedValues) writeResult(predictionsPlusExpectedValues, fileName) return computeStats(predictionsPlusExpectedValues)
def classifyNewSample(tree, testData, fileName, nominalColumns): predictionsPlusExpectedValues = [] for row in testData: leaf = getRelevantLeafNode(tree, row, nominalColumns).leafValues predictedLabel = None currentPredictionPlusExpectedValues = [] # Counting the occurences of each possible class label in the leaf labelCount = len(leaf) #if there is only one label then classify as that label if(labelCount == 1): predictedLabel = leaf.keys() #Else we count the number of occurences of each label and assign the label which has a greater number of occurences else: probabilityOfClassLabels = {} #Counting the total number of occurences of each label totalNumberOfLabels = 0 for key in leaf.keys(): totalNumberOfLabels += leaf[key] #Calculating and assigning the probability of each key to the dictionary probabilityOfClassLabels for key in leaf.keys(): probabilityOfClassLabels[key] = float(leaf[key])/totalNumberOfLabels maxProbability = 0.0 bestKey = None ''' Getting the label with Max Probability, if 2 labels are equally probable then the selection depends on the order in which the keys are stored, which is generally random, because the dict in Python stores the dict in an unordered manner 2 runs of the program will never have keys in the same order. ''' for key in leaf.keys(): if probabilityOfClassLabels[key] > maxProbability: maxProbability = probabilityOfClassLabels[key] bestKey = key predictedLabel = bestKey #Handles the case where the label is of the type list, this happens when there are multiple labels in one Node if(type(predictedLabel) == list): currentPredictionPlusExpectedValues.append(str(predictedLabel[0])) else: # No issue when there is just one label per node currentPredictionPlusExpectedValues.append(str(predictedLabel)) #appending the expected result from testData currentPredictionPlusExpectedValues.append(row[len(row)-1]) #List of lists containing the prediction vs expected values predictionsPlusExpectedValues.append(currentPredictionPlusExpectedValues) writeResult(predictionsPlusExpectedValues, fileName) return computeStats(predictionsPlusExpectedValues)
def classifyNewSample(tree, testData,depth,fileName, nominalColumns): predictionsPlusExpectedValues = [] for row in testData: currentNode = tree leaf = None predictedLabel = None currentPredictionPlusExpectedValues = [] #Handling the Special case of depth = 0 if(depth == 0): leaf = tree.leafValues else: #Recursively searching for the leaf node that martches the criteria while(leaf == None): if currentNode.col not in nominalColumns: #current node is a nominal column.. #print currentNode.col, currentNode.criteria if float(row[currentNode.col]) <= float(currentNode.criteria): currentNode = currentNode.rightBranch else: currentNode = currentNode.leftBranch else: #current node is a continuous column.. if row[currentNode.col] == currentNode.criteria: currentNode = currentNode.rightBranch else: currentNode = currentNode.leftBranch leaf = currentNode.leafValues # Counting the occurences of each possible class label in the leaf labelCount = len(leaf) #if there is only one label then classify as that label if(labelCount == 1): predictedLabel = leaf.keys() #Else we count the number of occurences of each label and assign the label which has a greater number of occurences else: probabilityOfClassLabels = {} #Counting the total number of occurences of each label totalNumberOfLabels = 0 for key in leaf.keys(): totalNumberOfLabels += leaf[key] #Calculating and assigning the probability of each key to the dictionary probabilityOfClassLabels for key in leaf.keys(): probabilityOfClassLabels[key] = float(leaf[key])/totalNumberOfLabels maxProbability = 0.0 bestKey = None ''' Getting the label with Max Probability, if 2 labels are equally probable then the selection depends on the order in which the keys are stored, which is generally random, because the dict in Python stores the dict in an unordered manner 2 runs of the program will never have keys in the same order. ''' for key in leaf.keys(): if probabilityOfClassLabels[key] > maxProbability: maxProbability = probabilityOfClassLabels[key] bestKey = key predictedLabel = bestKey #Handles the case where the label is of the type list, this happens when there are multiple labels in one Node if(type(predictedLabel) == list): currentPredictionPlusExpectedValues.append(str(predictedLabel[0])) else: # No issue when there is just one label per node currentPredictionPlusExpectedValues.append(str(predictedLabel)) #appending the expected result from testData currentPredictionPlusExpectedValues.append(row[len(row)-1]) #List of lists containing the prediction vs expected values predictionsPlusExpectedValues.append(currentPredictionPlusExpectedValues) writeResult(predictionsPlusExpectedValues, depth, fileName) return computeStats(predictionsPlusExpectedValues)