示例#1
0
    def testinducer(self):
        # A good example of very poorly written and uninspired unit test
        # Regression tests should take care of this
        data = orange.ExampleTable("zoo")
        rules = orange.AssociationRulesInducer(data,
                                               support=0.5,
                                               confidence=0.9)
        rules2 = orange.AssociationRulesInducer(support=0.5,
                                                confidence=0.9)(data)
        self.assertEqual(rules, rules2)

        data = orange.ExampleTable("iris")
        self.assertRaises(TypeError, orange.AssociationRulesInducer, data)
示例#2
0
 def generateRules(self):
     self.error()
     self.warning(0)
     if self.dataset:
         if self.dataset and self.useSparseAlgorithm and not self.datasetIsSparse:
             self.warning(
                 0,
                 "Using algorithm for sparse data, but data does not appear to be sparse!"
             )
         try:
             num_steps = 20
             for i in range(num_steps):
                 build_support = (i == num_steps -
                                  1) and self.minSupport / 100. or (
                                      1 - float(i) / num_steps *
                                      (1 - self.minSupport / 100.0))
                 if self.useSparseAlgorithm:
                     rules = orange.AssociationRulesSparseInducer(
                         self.dataset,
                         support=build_support,
                         confidence=self.minConfidence / 100.,
                         storeExamples=True)
                 else:
                     rules = orange.AssociationRulesInducer(
                         self.dataset,
                         support=build_support,
                         confidence=self.minConfidence / 100.,
                         classificationRules=self.classificationRules,
                         storeExamples=True)
                 if len(rules) >= self.maxRules:
                     break
             self.send("Association Rules", rules)
         except orange.KernelException, (errValue):
             self.error(str(errValue))
             self.send("Association Rules", None)
示例#3
0
    def startMining(self, var, sup):
        data = orange.ExampleTable("data/finalData.csv")
        #was47
        data = data.select(range(44))

        minSupport = float(sup)

        rules = orange.AssociationRulesInducer(data,
                                               support=minSupport,
                                               max_item_sets=30000000)

        print "%i rules with support higher than or equal to %5.3f found." % (
            len(rules), minSupport)

        orngAssoc.printRules(rules[:10], ["support", "confidence"])
示例#4
0
 def findItemsets(self):
     self.error()
     if self.dataset:
         try:
             if self.useSparseAlgorithm:
                 self.itemsets = orange.AssociationRulesSparseInducer(support = self.minSupport/100., storeExamples = True).getItemsets(self.dataset)
             else:
                 self.itemsets = orange.AssociationRulesInducer(support = self.minSupport/100., storeExamples = True).getItemsets(self.dataset)
             self.send("Itemsets", (self.dataset, self.itemsets))
         except Exception, (errValue):
             errValue = str(errValue)
             if "non-discrete attributes" in errValue and not self.useSparseAlgorithm:
                 errValue += "\nTry using the algorithm for sparse data"
             self.error(str(errValue))
             self.send("Itemsets", None)
示例#5
0
 def generateRules(self):
     self.error()
     if self.dataset:
         try:
             num_steps = 20
             for i in range(num_steps):
                 build_support = (i == num_steps-1) and self.minSupport/100. or (1 - float(i) / num_steps * (1 - self.minSupport/100.0))
                 if self.useSparseAlgorithm:
                     rules = orange.AssociationRulesSparseInducer(self.dataset, support = build_support, confidence = self.minConfidence/100., storeExamples = True)
                 else:
                     rules = orange.AssociationRulesInducer(self.dataset, support = build_support, confidence = self.minConfidence/100., classificationRules = self.classificationRules, storeExamples = True)
                 if len(rules) >= self.maxRules:
                     break
             self.send("Association Rules", rules)
         except orange.KernelException as errValue:
             self.error(str(errValue))
             self.send("Association Rules", None)
     else:
         self.send("Association Rules", None)
示例#6
0
	def startMining(self, var):
		data = orange.ExampleTable("data/finalData.csv") 
		#was47
		data = data.select(range(44))

		minSupport = 0.4	

	
		rules = orange.AssociationRulesInducer(data, support = minSupport, max_item_sets = 30000000) 
	
		orig_stdout = sys.stdout
	
		f = open('results/{}_assocrules.txt'.format(var), 'w')
		sys.stdout = f
		print "%i rules with support higher than or equal to %5.3f found." % (len(rules), minSupport) 
		orngAssoc.printRules(rules[:10], ["support", "confidence"]) > f

		sys.stdout = orig_stdout

		f.close()
示例#7
0
            self.supp_min = min(supps)
            self.supp_max = max(supps)
            del supps

            confs = [rule.confidence for rule in self.rules]
            self.conf_min = min(confs)
            self.conf_max = max(confs)
            del confs

            self.checkScale()
        else:
            self.supp_min, self.supp_max = self.conf_min, self.conf_max = 0., 1.

        self.supp_allmin, self.supp_allmax, self.conf_allmin, self.conf_allmax = self.supp_min, self.supp_max, self.conf_min, self.conf_max
        self.rezoom(self.supp_allmin, self.supp_allmax, self.conf_allmin,
                    self.conf_allmax)

if __name__ == "__main__":
    a = QApplication(sys.argv)
    ow = OWAssociationRulesViewer()

    dataset = orange.ExampleTable('../../doc/datasets/car.tab')
    rules = orange.AssociationRulesInducer(dataset,
                                           minSupport=0.3,
                                           maxItemSets=15000)
    ow.arules(rules)

    ow.show()
    a.exec_()
    ow.saveSettings()
示例#8
0
import orange

data = orange.ExampleTable("lenses")

rules = orange.AssociationRulesInducer(data, support=0.3, storeExamples=True)
rule = rules[0]

print
print "Rule: ", rule
print

print rule
print "Match left: "
print "\n".join(str(rule.examples[i]) for i in rule.matchLeft)
print "\nMatch both: "
print "\n".join(str(rule.examples[i]) for i in rule.matchBoth)

inducer = orange.AssociationRulesInducer(support=0.3, storeExamples=True)
itemsets = inducer.getItemsets(data)
print itemsets[8]
示例#9
0
# Description: Association rule sorting and filtering
# Category:    description
# Uses:        imports-85
# Classes:     orngAssoc.build, Preprocessor_discretize, EquiNDiscretization
# Referenced:  assoc.htm

import orange, orngAssoc

data = orange.ExampleTable("imports-85")
data = orange.Preprocessor_discretize(data, \
  method=orange.EquiNDiscretization(numberOfIntervals=3))
data = data.select(range(10))

rules = orange.AssociationRulesInducer(data, support=0.4)

n = 5
print "%i most confident rules:" % (n)
orngAssoc.sort(rules, ["confidence", "support"])
orngAssoc.printRules(rules[0:n], ['confidence', 'support', 'lift'])

conf = 0.8
lift = 1.1
print "\nRules with confidence>%5.3f and lift>%5.3f" % (conf, lift)
rulesC = rules.filter(lambda x: x.confidence > conf and x.lift > lift)
orngAssoc.sort(rulesC, ['confidence'])
orngAssoc.printRules(rulesC, ['confidence', 'support', 'lift'])
示例#10
0
import orange

data = orange.ExampleTable("lenses")

print "\nAssociation rules"
rules = orange.AssociationRulesInducer(data, support=0.3)
for r in rules:
    print "%5.3f  %5.3f  %s" % (r.support, r.confidence, r)

print "\nClassification rules"
rules = orange.AssociationRulesInducer(data,
                                       support=0.3,
                                       classificationRules=1)
for r in rules:
    print "%5.3f  %5.3f  %s" % (r.support, r.confidence, r)
示例#11
0
    def __call__(self, data, targetClass, max_rules=0):
        '''Returns the Apriori-C classifier.'''

        data_discretized = False
        # If any of the attributes are continuous, discretize them
        if data.domain.hasContinuousAttributes():
            original_data = data
            data_discretized = True
            new_domain = []
            discretize = orange.EntropyDiscretization(forceAttribute=True)
            for attribute in data.domain.attributes:
                if attribute.varType == orange.VarTypes.Continuous:
                    d_attribute = discretize(attribute, data)
                    # An attribute is irrelevant, if it is discretized into a single interval
                    #                        if len(d_attribute.getValueFrom.transformer.points) > 0:
                    new_domain.append(d_attribute)
                else:
                    new_domain.append(attribute)
            data = original_data.select(new_domain +
                                        [original_data.domain.classVar])

        self.data = data
        self.rulesSD = []

        # build association classification rules
        rules = orange.AssociationRulesInducer(data,
                                               support=self.minSup,
                                               classificationRules=1,
                                               maxItemSets=10000000)

        #_______________________________ post-processing step 1
        # select rules that classify in the target class
        right = orange.Example(
            data.domain,
            [orange.Value(orange.VarTypes.Discrete, orange.ValueTypes.DK)] *
            len(data.domain))
        right.setclass(targetClass)
        rules = rules.filter(lambda rule: rule.right == right)

        # select rules with confidence >= minConfidence
        rules = rules.filter(lambda rule: rule.confidence >= self.minConf)

        #________________________________ post processing step 2
        # weighted covering
        self.data.addMetaAttribute(
            self.weightID)  # set weights of all examples to 1
        bestRuleWRacc = 100
        while len(rules) > 0 and self.uncoveredExamples(
        ) > 0 and bestRuleWRacc > 0 and (max_rules == 0
                                         or len(self.rulesSD) < max_rules):
            (bestRule, bestRuleWRacc) = self.findBestRule(rules)
            rules.remove(bestRule)
            self.removeSimilarRules(bestRule, rules)
            self.decreaseExampleWeights(bestRule)
            self.rulesSD.append(bestRule)

        #____________________________ transform rules to SD format
        beam = []
        targetClassRule = SDRule(data, targetClass, conditions=[], g=1)

        for r in self.rulesSD:
            cond = []
            for i in range(len(r.left)):
                if not orange.Value.is_DC(r.left[i]):
                    cond.append(
                        orange.ValueFilter_discrete(
                            position=i,
                            values=[
                                orange.Value(data.domain.attributes[i],
                                             r.left[i])
                            ]))
            rSD = SDRule(data, targetClass, cond)
            beam.append(rSD)

        if data_discretized:
            targetClassRule = SDRule(original_data,
                                     targetClass,
                                     conditions=[],
                                     g=1)
            # change beam so the rules apply to original data
            beam = [rule.getUndiscretized(original_data) for rule in beam]
        else:
            targetClassRule = SDRule(data, targetClass, conditions=[], g=1)

        return SDRules(beam, targetClassRule, "Apriori-SD")