Пример #1
0
    def filterAndStore(self, oldRule=None):
        #print 'hola, hola'
        self.examples = self.filter(self.data)           # set examples
        self.classifier = self.learner(self.examples)        # set classifier
        distribution = [0.0]* len(self.data.domain.classVar.values)
        self.complexity = len(self.filter.conditions)    # set rule complexity
        self.ruleString = self.ruleToString()
        self.orderedRuleString = self.orderedRuleToString()

        if len(self.examples)>0:
            for d in self.examples:
                distribution[int(d.getclass())]+=1
            distribution = map (lambda d: d/len(self.examples), distribution)
            self.classDistribution = orange.Distribution(distribution)  # set distribution
            self.TP = filter(lambda e: e.getclass()==self.targetClass, self.examples)   # True positives
            self.FP = filter(lambda e: e.getclass()!=self.targetClass, self.examples)   # flase positives
            self.N_examples = len(self.data)
            self.NTC = int(orange.getClassDistribution(self.data)[self.targetClass])

            if oldRule == None or  oldRule.complexity == 0:
                self.P = len(self.data.filter({self.data.domain.classVar : self.targetClass}))
                self.N = len(self.data.filter({self.data.domain.classVar : self.targetClass}, negate=1))
            else:
                self.P = len(oldRule.TP)
                self.N = len(oldRule.FP)

            self.TPlen = len(self.TP) * 1.0
            self.FPlen = len(self.FP) * 1.0

            self.calculateRefinementQuality()
            self.calculateSelectionQuality()
            self.calculateWeightedSelectionQuality()

            self.quality = self.TPlen / (len(self.FP) + self.g)   # set rule quality: generalization quocient
            #print self.quality
            self.support = 1.0* len(self.examples)/len(self.data)        # set rule support
            self.confidence = self.TPlen/len(self.examples)

        else:
            self.classDistribution = distribution
            self.TP= []
            self.FP= []
            self.quality = 0        # set rule quality: generalization kvocient
            self.support = 0        # set rule support
            self.TPlen = 0
            self.FPlen = 0
            self.N_examples = 0
            self.NTC = 0
            self.refinement_quality = 0.0
            self.selection_quality = 0.0
            self.weighted_selection_quality = 0.0
Пример #2
0
def listOfClusters(root):
    l = []
    listOfClusters0(root, l)
    return l


prune(root, 1.4)

for n, cluster in enumerate(listOfClusters(root)):
    print "\n\n*** Cluster %i ***\n" % n
    for ex in cluster:
        print ex

for cluster in listOfClusters(root):
    dist = orange.getClassDistribution(cluster)
    for e, d in enumerate(dist):
        print "%s: %3.0f   " % (data.domain.classVar.values[e], d),
    print


def listOfClustersT0(cluster, alist):
    if not cluster.branches:
        alist.append(orange.ExampleTable(cluster))
    else:
        for branch in cluster.branches:
            listOfClustersT0(branch, alist)


def listOfClustersT(root):
    l = []
indices2.randomGenerator = None
indices2.randseed = 42
for i in range(5):
    print indices2(data)


print "\nIndices with p0 set as probability (not 'a number of')"
indices2.p0 = 0.25
print indices2(data)

print "\n... with stratification"
indices2.stratified = indices2.Stratified
ind = indices2(data)
print ind
data2 = data.select(ind)
od = orange.getClassDistribution(data)
sd = orange.getClassDistribution(data2)
od.normalize()
sd.normalize()
print od
print sd

print "\n... and without stratification"
indices2.stratified = indices2.NotStratified
print indices2(data)
ind = indices2(data)
print ind
data2 = data.select(ind)
od = orange.getClassDistribution(data)
sd = orange.getClassDistribution(data2)
od.normalize()
Пример #4
0
random.seed(0)

data = orange.ExampleTable("lenses")

id = -42
# Note that this is wrong. Id should be assigned by
# id = orange.newmetaid()
# We only do this so that the script gives the same output each time it's run

for example in data:
    example[id] = orange.Value(random.random())

print data[0]

print orange.getClassDistribution(data)
print orange.getClassDistribution(data, id)

w = orange.FloatVariable("w")
data.domain.addmeta(id, w)

print data[0]

print data[0][id]
print data[0][w]
print data[0]["w"]

data[0][id] = orange.Value(w, 2.0)
data[0][id] = "2.0"
data[0][id] = 2.0
Пример #5
0
            listOfClusters0(branch, alist)

def listOfClusters(root):
    l = []
    listOfClusters0(root, l)
    return l

prune(root, 1.4)

for n, cluster in enumerate(listOfClusters(root)):
    print "\n\n*** Cluster %i ***\n" % n
    for ex in cluster:
        print ex

for cluster in listOfClusters(root):
    dist = orange.getClassDistribution(cluster)
    for e, d in enumerate(dist):
        print "%s: %3.0f   " % (data.domain.classVar.values[e], d),
    print

def listOfClustersT0(cluster, alist):
    if not cluster.branches:
        alist.append(orange.ExampleTable(cluster))
    else:
        for branch in cluster.branches:
            listOfClustersT0(branch, alist)

def listOfClustersT(root):
    l = []
    listOfClustersT0(root, l)
    return l
Пример #6
0
print "\nIndices with randseed"
indices2.randomGenerator = None
indices2.randseed = 42
for i in range(5):
    print indices2(data)

print "\nIndices with p0 set as probability (not 'a number of')"
indices2.p0 = 0.25
print indices2(data)

print "\n... with stratification"
indices2.stratified = indices2.Stratified
ind = indices2(data)
print ind
data2 = data.select(ind)
od = orange.getClassDistribution(data)
sd = orange.getClassDistribution(data2)
od.normalize()
sd.normalize()
print od
print sd

print "\n... and without stratification"
indices2.stratified = indices2.NotStratified
print indices2(data)
ind = indices2(data)
print ind
data2 = data.select(ind)
od = orange.getClassDistribution(data)
sd = orange.getClassDistribution(data2)
od.normalize()