Exemplo n.º 1
0
 def checkAttRelevance(self, data, margin):
     print "Before feature subset selection (%d attributes):" % len(data.domain.attributes)
     old = orngFSS.attMeasure(data)
     for i in old:
         print "%5.3f %s" % (i[1], i[0])
     print "\nRelevance of best attributes"
     new = orngFSS.attMeasure(Fss.filtering(self, data, margin))
     for j in new:
         print "%5.3f %s" % (j[1], j[0])
Exemplo n.º 2
0
def cforange_score_estimation(input_dict):
    import orange
    import orngFSS
    data = input_dict['dataset']
    ma = orngFSS.attMeasure(data,orange.MeasureAttribute_relief(k=int(input_dict['k']), m=int(input_dict['m'])))
    output_string = ""
    output_dict = {}
    output_dict['results'] = ma
    return output_dict
Exemplo n.º 3
0
def feature_selection(data, classify, k=5, m=100):
    '''
    
    perform feature selection using orange
    
    For more details see `orange feature selection <http://orange.biolab.si/doc/modules/orngFSS.htm>`_ and
    `orange measure attribute <http://orange.biolab.si/doc/reference/MeasureAttribute.htm>`_
    
    the default measure is ReliefF ((MeasureAttribute_relief in Orange).
    
    :param data: data from :meth:`perform_experiments`.
    :param classify: function for classifying runs.
    :param k: the number of neighbors for each example (default 5).
    :param m: number of examples to use, Set to -1 to use all (default 100).
    :rtype: sorted list of tuples with uncertainty names and reliefF attribute 
            scores.
    
    Orange provides other metrics for feature selection
    
    * Information Gain
    * Gain ratio 
    * Gini index 
    * Relevance of attributes 
    * Costs
    
    If you want to use any of of these instead of ReliefF, use the code
    supplied here as a template, but modify the measure. That is replace::
    
        measure = orange.MeasureAttribute_relief(k=k, m=m)
        
    with the measure of choice. See the above provided links for more details.
    
    '''
    data = build_orange_data(data, classify)

    info("executing feature selection")
    measure = orange.MeasureAttribute_relief(k=k, m=m)
    ma = orngFSS.attMeasure(data, measure)
    
    results = [] 
    for m in ma:
        results.append((m[1], m[0]))
    results.sort(reverse=True)
    
    results = [(entry[1], entry[0]) for entry in results]
    return results
Exemplo n.º 4
0
def report_relevance(data):
  m = orngFSS.attMeasure(data)
  for i in m:
    print "%5.3f %s" % (i[1], i[0])
Exemplo n.º 5
0
# Description: Ranking and selection of best N attributes
# Category:    preprocessing
# Uses:        voting
# Referenced:  orngFSS.htm
# Classes:     orngFSS.attMeasure, orngFSS.bestNAtts

import orange, orngFSS
data = orange.ExampleTable("voting")

print 'Attribute scores for best three attributes:'
ma = orngFSS.attMeasure(data)
for m in ma[:3]:
  print "%5.3f %s" % (m[1], m[0])

n = 3
best = orngFSS.bestNAtts(ma, n)
print '\nBest %d attributes:' % n
for s in best:
  print s
Exemplo n.º 6
0
def report_relevance(data):
    m = orngFSS.attMeasure(data)
    for i in m:
        print "%5.3f %s" % (i[1], i[0])
Exemplo n.º 7
0
# Description: Ranking of attributes with two different measures (Relief and gain ratio)
# Category:    preprocessing
# Uses:        voting.tab
# Referenced:  orngFSS.htm
# Classes:     orngFSS.attMeasure, MeasureAttribute_gainRatio

import orange, orngFSS
data = orange.ExampleTable("voting")

print 'Relief GainRt Attribute'
ma_def = orngFSS.attMeasure(data)
gainRatio = orange.MeasureAttribute_gainRatio()
ma_gr = orngFSS.attMeasure(data, gainRatio)
for i in range(5):
    print "%5.3f  %5.3f  %s" % (ma_def[i][1], ma_gr[i][1], ma_def[i][0])
Exemplo n.º 8
0
 def __call__(self, data, weight=None):
   ma = orngFSS.attMeasure(data)
   filtered = orngFSS.selectBestNAtts(data, ma, self.N)
   model = orange.BayesLearner(filtered)
   return BayesFSS_Classifier(classifier=model, N=self.N, name=self.name)
Exemplo n.º 9
0
# Description: Ranking of attributes with two different measures (Relief and gain ratio)
# Category:    preprocessing
# Uses:        voting.tab
# Referenced:  orngFSS.htm
# Classes:     orngFSS.attMeasure, MeasureAttribute_gainRatio

import orange, orngFSS
data = orange.ExampleTable("voting")

print 'Relief GainRt Attribute'
ma_def = orngFSS.attMeasure(data)
gainRatio = orange.MeasureAttribute_gainRatio()
ma_gr  = orngFSS.attMeasure(data, gainRatio)
for i in range(5):
  print "%5.3f  %5.3f  %s" % (ma_def[i][1], ma_gr[i][1], ma_def[i][0])
Exemplo n.º 10
0
 def __call__(self, data, weight=None):
   ma = orngFSS.attMeasure(data)
   filtered = orngFSS.selectBestNAtts(data, ma, self.N)
   model = orange.BayesLearner(filtered)
   return BayesFSS_Classifier(classifier=model, N=self.N, name=self.name)
Exemplo n.º 11
0
# Description: Demonstrates the use of attribute evaluation
# Category:    feature scoring, FSS
# Classes:     MeasureAttribute_Distance, MeasureAttribute_MDL
# Uses:        zoo.tab

import orange
import orngEvalAttr
import orngCI
import orngFSS
data = orange.ExampleTable("../datasets/zoo")

print 'Distance(1-D)  MDL    Attribute'

distance = orngEvalAttr.MeasureAttribute_Distance()
ma_d = orngFSS.attMeasure(data, distance)

mdl = orngEvalAttr.MeasureAttribute_MDL()
ma_mdl = orngFSS.attMeasure(data, mdl)
for i in range(5):
    print "%5.3f          %5.3f  %s" % (ma_d[i][1], ma_mdl[i][1], ma_d[i][0])
Exemplo n.º 12
0
def selectAttributes(data, attrContOrder, attrDiscOrder, projections = None):
    if data.domain.classVar == None or data.domain.classVar.varType != orange.VarTypes.Discrete:
        return ([attr.name for attr in data.domain.attributes], [], 0)

    shown = [data.domain.classVar.name]; hidden = []; maxIndex = 0    # initialize outputs

    # # both are RELIEF
    if attrContOrder == "ReliefF" and attrDiscOrder == "ReliefF":
        attrVals = orngFSS.attMeasure(data, orange.MeasureAttribute_relief())
        s,h = getTopAttrs(attrVals, 0.95)
        return (shown + s, hidden + h, 0)

    # # both are NONE
    elif attrContOrder == "None" and attrDiscOrder == "None":
        for item in data.domain.attributes:    shown.append(item.name)
        return (shown, hidden, 0)


    # disc and cont attribute list
    discAttrs = []; contAttrs = []
    for attr in data.domain.attributes:
        if attr.varType == orange.VarTypes.Continuous: contAttrs.append(attr.name)
        elif attr.varType == orange.VarTypes.Discrete: discAttrs.append(attr.name)
        

    ###############################
    # sort continuous attributes
    if attrContOrder == "None":
        shown += contAttrs
    elif attrContOrder in ["ReliefF", "Fisher discriminant", "Signal to Noise", "Signal to Noise For Each Class"]:
        if attrContOrder == "ReliefF":               measure = orange.MeasureAttribute_relief(k=10, m=50)
        elif attrContOrder == "Fisher discriminant": measure = MeasureFisherDiscriminant()
        elif attrContOrder == "Signal to Noise":     measure = S2NMeasure()
        else:                                        measure = S2NMeasureMix()

        dataNew = data.select(contAttrs + [data.domain.classVar])
        attrVals = orngFSS.attMeasure(dataNew, measure)
        s,h = getTopAttrs(attrVals, 0.95)
        shown += s
        hidden += h
    else:
        print "Unknown value for attribute order: ", attrContOrder

    # ###############################
    # sort discrete attributes
    if attrDiscOrder == "None":
        shown += discAttrs
    elif attrDiscOrder == "GainRatio" or attrDiscOrder == "Gini" or attrDiscOrder == "ReliefF":
        if attrDiscOrder == "GainRatio":   measure = orange.MeasureAttribute_gainRatio()
        elif attrDiscOrder == "Gini":       measure = orange.MeasureAttribute_gini()
        else:                               measure = orange.MeasureAttribute_relief()

        dataNew = data.select(discAttrs + [data.domain.classVar])
        attrVals = orngFSS.attMeasure(dataNew, measure)
        s,h = getTopAttrs(attrVals, 0.95)
        shown += s; hidden += h

    elif attrDiscOrder == "Oblivious decision graphs":
            #shown.append(data.domain.classVar.name)
            attrs = getFunctionalList(data)
            for item in attrs:
                shown.append(item)
            for attr in data.domain.attributes:
                if attr.name not in shown and attr.varType == orange.VarTypes.Discrete:
                    hidden.append(attr.name)
    else:
        print "Unknown value for attribute order: ", attrDiscOrder

    return (shown, hidden, maxIndex)
Exemplo n.º 13
0
def selectAttributes(data, attrContOrder, attrDiscOrder, projections=None):
    if data.domain.classVar == None or data.domain.classVar.varType != orange.VarTypes.Discrete:
        return ([attr.name for attr in data.domain.attributes], [], 0)

    shown = [data.domain.classVar.name]
    hidden = []
    maxIndex = 0  # initialize outputs

    # # both are RELIEF
    if attrContOrder == "ReliefF" and attrDiscOrder == "ReliefF":
        attrVals = orngFSS.attMeasure(data, orange.MeasureAttribute_relief())
        s, h = getTopAttrs(attrVals, 0.95)
        return (shown + s, hidden + h, 0)

    # # both are NONE
    elif attrContOrder == "None" and attrDiscOrder == "None":
        for item in data.domain.attributes:
            shown.append(item.name)
        return (shown, hidden, 0)

    # disc and cont attribute list
    discAttrs = []
    contAttrs = []
    for attr in data.domain.attributes:
        if attr.varType == orange.VarTypes.Continuous:
            contAttrs.append(attr.name)
        elif attr.varType == orange.VarTypes.Discrete:
            discAttrs.append(attr.name)

    ###############################
    # sort continuous attributes
    if attrContOrder == "None":
        shown += contAttrs
    elif attrContOrder in [
            "ReliefF", "Fisher discriminant", "Signal to Noise",
            "Signal to Noise For Each Class"
    ]:
        if attrContOrder == "ReliefF":
            measure = orange.MeasureAttribute_relief(k=10, m=50)
        elif attrContOrder == "Fisher discriminant":
            measure = MeasureFisherDiscriminant()
        elif attrContOrder == "Signal to Noise":
            measure = S2NMeasure()
        else:
            measure = S2NMeasureMix()

        dataNew = data.select(contAttrs + [data.domain.classVar])
        attrVals = orngFSS.attMeasure(dataNew, measure)
        s, h = getTopAttrs(attrVals, 0.95)
        shown += s
        hidden += h
    else:
        print "Unknown value for attribute order: ", attrContOrder

    # ###############################
    # sort discrete attributes
    if attrDiscOrder == "None":
        shown += discAttrs
    elif attrDiscOrder == "GainRatio" or attrDiscOrder == "Gini" or attrDiscOrder == "ReliefF":
        if attrDiscOrder == "GainRatio":
            measure = orange.MeasureAttribute_gainRatio()
        elif attrDiscOrder == "Gini":
            measure = orange.MeasureAttribute_gini()
        else:
            measure = orange.MeasureAttribute_relief()

        dataNew = data.select(discAttrs + [data.domain.classVar])
        attrVals = orngFSS.attMeasure(dataNew, measure)
        s, h = getTopAttrs(attrVals, 0.95)
        shown += s
        hidden += h

    elif attrDiscOrder == "Oblivious decision graphs":
        #shown.append(data.domain.classVar.name)
        attrs = getFunctionalList(data)
        for item in attrs:
            shown.append(item)
        for attr in data.domain.attributes:
            if attr.name not in shown and attr.varType == orange.VarTypes.Discrete:
                hidden.append(attr.name)
    else:
        print "Unknown value for attribute order: ", attrDiscOrder

    return (shown, hidden, maxIndex)
# Description: Demonstrates the use of attribute evaluation
# Category:    feature scoring, FSS
# Classes:     MeasureAttribute_Distance, MeasureAttribute_MDL
# Uses:        zoo.tab

import orange
import orngEvalAttr
import orngCI
import orngFSS
data = orange.ExampleTable("../datasets/zoo")

print 'Distance(1-D)  MDL    Attribute'

distance = orngEvalAttr.MeasureAttribute_Distance()
ma_d  = orngFSS.attMeasure(data, distance)

mdl = orngEvalAttr.MeasureAttribute_MDL()
ma_mdl = orngFSS.attMeasure(data, mdl)
for i in range(5):
  print "%5.3f          %5.3f  %s" % (ma_d[i][1], ma_mdl[i][1], ma_d[i][0])