def _CPARdfs(self,objPNDatacopy,lstAntecedent,lstConsequent): """Depth first search for constructing a new rule.""" blnMaxExceeded = False if (len(lstAntecedent) == self.MAX_RULE_SIZE): blnMaxExceeded = True 'GET ALL ATTRIBUTES WITHIN 1.0 - dblGainSimilarityRatio of max' objPNDatacopy.recalculateGains() [dblGain,lstAttributes] = objPNDatacopy.getBestGainAttributes(self.dblGainSimilarityRatio) if dblGain > self.dblMinGainThreshold and not blnMaxExceeded: for intAttribute in lstAttributes: """ This is a check to see if this attribute still within 1%...it may not after the depth first search """ lstAntecedentCopy = deepcopy(lstAntecedent) lstAntecedentCopy.append(intAttribute) objPNDatacopyTemp = objPNDatacopy.copyPrimes() objPNDatacopyTemp.lstAprime[intAttribute] = 0 objPNDatacopyTemp.removeExamplesNotSatisfying(lstAntecedentCopy) self._CPARdfs(objPNDatacopyTemp,lstAntecedentCopy,lstConsequent) else: if len(lstAntecedent)>0: lstAntecedent.sort() dblLaPlaceAccuracy = self.objPNData.getLaPlaceAccuracy(lstAntecedent,lstConsequent) objRule = AssociationRule(lstAntecedent, lstConsequent, {"laplace":dblLaPlaceAccuracy}) objPNDatacopy.updateWeights(objRule,self.dblDecayFactor) self.addRule(objRule) else: log("Empty antecedent.") pass
def train(self, samples, maxRuleSize=9999, mineOnlyClass=None): """Train with CPAR on the sample set, returning an AssociationRuleSet.""" self.MAX_RULE_SIZE = maxRuleSize self.objPNData = PNData(samples) self.lstRules = [] classes = self.objPNData.getClassList() log("Dataset has %d classes over %d samples." % (len(classes), len(samples))) for current_class in classes: if mineOnlyClass != None: if current_class != mineOnlyClass: continue log("Processing class %s" % (current_class)) self.objPNData.setCurrentClass(current_class) dblMinTotalWeight = self.dblTotalWeightFactor * self.objPNData.getTotalWeight( ) lstAntecedent = [] while self.objPNData.getTotalWeight() > dblMinTotalWeight: self.objPNData.refreshPNAData() if self.objPNData.noValidGainsinPNarray( self.dblMinGainThreshold): #log("NO VALID GAINS....Breaking!"); break #log('BEGIN DEPTH FIRST SEARCH - total weight %f > %f'%(self.objPNData.getTotalWeight(),dblMinTotalWeight)) self._CPARdfs(self.objPNData.copyPrimes(), lstAntecedent, [current_class]) trules = len(self.lstRules) self.removeDuplicateRules() #log("End of rule search. Found %d rules total, %d after duplicates removed."%(trules,len(self.lstRules))) arset = AssociationRuleSet() arset.extend(self.lstRules) arset.set_target_accuracy("laplace") return self.remap_index_to_feature(arset, samples)
def train(self,samples,maxRuleSize=9999,mineOnlyClass=None): """Train with CPAR on the sample set, returning an AssociationRuleSet.""" self.MAX_RULE_SIZE = maxRuleSize self.objPNData = PNData(samples) self.lstRules = [] classes = self.objPNData.getClassList() log("Dataset has %d classes over %d samples."%(len(classes),len(samples))) for current_class in classes: if mineOnlyClass != None: if current_class != mineOnlyClass: continue log("Processing class %s"%(current_class)) self.objPNData.setCurrentClass(current_class) dblMinTotalWeight = self.dblTotalWeightFactor * self.objPNData.getTotalWeight() lstAntecedent = [] while self.objPNData.getTotalWeight() > dblMinTotalWeight: self.objPNData.refreshPNAData() if self.objPNData.noValidGainsinPNarray(self.dblMinGainThreshold): #log("NO VALID GAINS....Breaking!"); break #log('BEGIN DEPTH FIRST SEARCH - total weight %f > %f'%(self.objPNData.getTotalWeight(),dblMinTotalWeight)) self._CPARdfs(self.objPNData.copyPrimes(),lstAntecedent,[current_class]) trules = len(self.lstRules) self.removeDuplicateRules() #log("End of rule search. Found %d rules total, %d after duplicates removed."%(trules,len(self.lstRules))) arset = AssociationRuleSet() arset.extend(self.lstRules) arset.set_target_accuracy("laplace") return self.remap_index_to_feature(arset,samples)
def _CPARdfs(self, objPNDatacopy, lstAntecedent, lstConsequent): """Depth first search for constructing a new rule.""" blnMaxExceeded = False if (len(lstAntecedent) == self.MAX_RULE_SIZE): blnMaxExceeded = True 'GET ALL ATTRIBUTES WITHIN 1.0 - dblGainSimilarityRatio of max' objPNDatacopy.recalculateGains() [dblGain, lstAttributes ] = objPNDatacopy.getBestGainAttributes(self.dblGainSimilarityRatio) if dblGain > self.dblMinGainThreshold and not blnMaxExceeded: for intAttribute in lstAttributes: """ This is a check to see if this attribute still within 1%...it may not after the depth first search """ lstAntecedentCopy = deepcopy(lstAntecedent) lstAntecedentCopy.append(intAttribute) objPNDatacopyTemp = objPNDatacopy.copyPrimes() objPNDatacopyTemp.lstAprime[intAttribute] = 0 objPNDatacopyTemp.removeExamplesNotSatisfying( lstAntecedentCopy) self._CPARdfs(objPNDatacopyTemp, lstAntecedentCopy, lstConsequent) else: if len(lstAntecedent) > 0: lstAntecedent.sort() dblLaPlaceAccuracy = self.objPNData.getLaPlaceAccuracy( lstAntecedent, lstConsequent) objRule = AssociationRule(lstAntecedent, lstConsequent, {"laplace": dblLaPlaceAccuracy}) objPNDatacopy.updateWeights(objRule, self.dblDecayFactor) self.addRule(objRule) else: log("Empty antecedent.") pass