def getEntropy(attribute, examples): entropy = 0 #TODO:can be optimized further for attribVal in Attribute.getAttributes()[attribute]: posEx = 0 negEx = 0 for example in examples: if example.attributes[attribute] == attribVal: if example.label == True: posEx += 1 else: negEx += 1 totalAttribEx = float(posEx + negEx) if totalAttribEx != 0: aInfo, bInfo = float(posEx)/totalAttribEx, float(negEx)/totalAttribEx entropy += ( totalAttribEx/len(examples) )\ *getInfoConent([aInfo, bInfo]) return entropy
def decisionListLearning(examples): #if examples are empty then return trivial list node #with predicted label as 'No'/False if len(examples) == 0: return ListNode({}, {}, {}, False) #find a test S.T. subset of examples either all +ve or -ve (selectedAttributes, isSelectedIncl, selectedPureLabel, selectedOp) \ = findTest(examples, Attribute.getAttributes()) if not len(selectedAttributes): raise (DListCreationErr('couldn\'t find attributes for even split.')) listNode = ListNode(selectedAttributes, isSelectedIncl, selectedOp,\ selectedPureLabel) #set next node from list learned by remaining examples listNode.setNextNode(decisionListLearning(filterExamples(examples,\ selectedAttributes,\ isSelectedIncl,\ selectedOp))) return listNode
def decisionListLearning(examples): #if examples are empty then return trivial list node #with predicted label as 'No'/False if len(examples) == 0: return ListNode({}, {}, {}, False) #find a test S.T. subset of examples either all +ve or -ve (selectedAttributes, isSelectedIncl, selectedPureLabel, selectedOp) \ = findTest(examples, Attribute.getAttributes()) if not len(selectedAttributes): raise(DListCreationErr('couldn\'t find attributes for even split.')) listNode = ListNode(selectedAttributes, isSelectedIncl, selectedOp,\ selectedPureLabel) #set next node from list learned by remaining examples listNode.setNextNode(decisionListLearning(filterExamples(examples,\ selectedAttributes,\ isSelectedIncl,\ selectedOp))) return listNode
def decisionTreeLearning(examples, attribs, default): if len(examples) == 0: #if example list empty then return the majority value #computed previously return TreeNode(None, default) elif checkIfSameClass(examples): #if all examples same class then return the class return TreeNode(None, examples[0].label) elif len(attribs) == 0: #if attributes empty then return majority value return TreeNode(None, majorityValue(examples)) else: bestAttrib = chooseAttribute(attribs, examples) tree = TreeNode(bestAttrib) majorityVal = majorityValue(examples) #for each possible value of attribute for attribVal in Attribute.getAttributes()[bestAttrib]: filteredExamples = filterExamples(bestAttrib, attribVal, examples) subTree = decisionTreeLearning(filteredExamples, \ filterAttributes(attribs, bestAttrib), majorityVal) tree.addSubtree(attribVal, subTree) return tree
def main(): examples = [] examples.append(Example({'alt':True, 'bar':False, 'fri':False, 'hun':True,\ 'pat':'some', 'price':'$$$', 'rain':False,\ 'res':True, 'type':'french', 'est':'0-10'},\ True)) examples.append(Example({'alt':True, 'bar':False, 'fri':False, 'hun':True,\ 'pat':'full', 'price':'$', 'rain':False,\ 'res':False, 'type':'thai', 'est':'30-60'},\ False)) examples.append(Example({'alt':False, 'bar':True, 'fri':False, 'hun':False,\ 'pat':'some', 'price':'$', 'rain':False,\ 'res':False, 'type':'burger', 'est':'0-10'},\ True)) examples.append(Example({'alt':True, 'bar':False, 'fri':True, 'hun':True,\ 'pat':'full', 'price':'$', 'rain':True,\ 'res':False, 'type':'thai', 'est':'10-30'},\ True)) examples.append(Example({'alt':True, 'bar':False, 'fri':True, 'hun':False,\ 'pat':'full', 'price':'$$$', 'rain':False,\ 'res':True, 'type':'french', 'est':'>60'},\ False)) examples.append(Example({'alt':False, 'bar':True, 'fri':False, 'hun':True,\ 'pat':'some', 'price':'$$', 'rain':True,\ 'res':True, 'type':'italian', 'est':'0-10'},\ True)) examples.append(Example({'alt':False, 'bar':True, 'fri':False, 'hun':False,\ 'pat':'none', 'price':'$', 'rain':True,\ 'res':False, 'type':'burger', 'est':'0-10'},\ False)) examples.append(Example({'alt':False, 'bar':False, 'fri':False, 'hun':True,\ 'pat':'some', 'price':'$$', 'rain':True,\ 'res':True, 'type':'thai', 'est':'0-10'},\ True)) examples.append(Example({'alt':False, 'bar':True, 'fri':True, 'hun':False,\ 'pat':'full', 'price':'$', 'rain':True,\ 'res':False, 'type':'burger', 'est':'>60'},\ False)) examples.append(Example({'alt':True, 'bar':True, 'fri':True, 'hun':True,\ 'pat':'full', 'price':'$$$', 'rain':False,\ 'res':True, 'type':'italian', 'est':'10-30'},\ False)) examples.append(Example({'alt':False, 'bar':False, 'fri':False, 'hun':False,\ 'pat':'none', 'price':'$', 'rain':False,\ 'res':False, 'type':'thai', 'est':'0-10'},\ False)) examples.append(Example({'alt':True, 'bar':True, 'fri':True, 'hun':True,\ 'pat':'full', 'price':'$', 'rain':False,\ 'res':False, 'type':'burger', 'est':'30-60'},\ True)) rootNode = decisionTreeLearning(examples, Attribute.getAttributes().keys(),\ False) printDecisionTree(rootNode) print predictFromDecisionTree(rootNode, {'alt':True, 'bar':True, 'fri':True, 'hun':True,\ 'pat':'full', 'price':'$', 'rain':False,\ 'res':False, 'type':'burger', 'est':'30-60'}) print '**** prediction accuracy ****' trainAccuracy = getAccuracy(examples, rootNode) print 'training set error: ' + str(100-trainAccuracy) + '%'