示例#1
0
	def _makeInstance(self, i, instances=None):
		inst = Instance(len(i))
		if instances is not None:
			inst.setDataset(instances)
		for (attName, value) in i.iteritems():
			if attName in self.numericAttributes: value = Double(value)
			else: value = String(value)
			attr = self.attName2Obj[attName]
			#print self.attName2Domain
			#print "attName, value", attName, value
			inst.setValue(attr, value)
		return inst
def build_instances(state,dataset):
    class_attributes = ["Sunny", "Fog", "Rain", "Snow", "Hail", "Thunder", "Tornado"]
    header = ["state","lat", "lon", "day","temp","dewp","weather"]

    #build attributes based on the header and types
    attributes = []
    for h in header[:-1]:
        attributes.append(Attribute(h))

    #add the classification attribute
    classification_vector = FastVector(len(class_attributes))
    for c in class_attributes:
        classification_vector.addElement(c)
    attributes.append(Attribute("toClassify", classification_vector))

    fvWekaAttributes = FastVector(len(dataset[0]))

    for a in attributes:
        fvWekaAttributes.addElement(a)
    
    training_set = Instances("C4.5Set", fvWekaAttributes, len(dataset))
    training_set.setClassIndex(len(header)-1)

    for d in dataset:
        inst = Instance(len(d))
        for i in range(len(d)-1):
            try:
                inst.setValue(fvWekaAttributes.elementAt(i), float(d[i]))
            except:
                pass
                #print "failed on", i, d[i], d[i].__class__
        inst.setValue(fvWekaAttributes.elementAt(len(d)-1), d[-1])
        
        training_set.add(inst)


    j48 = J48()
    j48.buildClassifier(training_set)
    return state,parse_tree(str(j48))
示例#3
0
 def _makeInstance(self, i, instances=None):
     inst = Instance(len(i))
     if instances is not None:
         inst.setDataset(instances)
     for (attName, value) in i.iteritems():
         if attName in self.numericAttributes: value = Double(value)
         else: value = String(value)
         attr = self.attName2Obj[attName]
         #print self.attName2Domain
         #print "attName, value", attName, value
         inst.setValue(attr, value)
     return inst
示例#4
0
class JeroR(Classifier, JythonSerializableObject):
    """
    JeroR is a Jython implementation of the Weka classifier ZeroR
    
    'author' -- FracPete (fracpete at waikato dot ac dot nz)
    
    'version' -- $Revision$
    """

    # the documentation can be generated with HappyDoc:
    #    http://happydoc.sourceforge.net/
    # Example command:
    #     happydoc --title Weka -d ./doc ./src

    # the chosen class value
    __ClassValue = Instance.missingValue()

    # the class attribute
    __Class = None

    # the counts for each class label
    __Counts = None

    def listOptions(self):
        """
        Returns an enumeration describing the available options.
        
        Return:
         
            an enumeration of all the available options.
        """

        return Classifier.listOptions(self)

    def setOptions(self, options):
        """
        Parses a given list of options.
         
        Parameter(s):
        
            'options' -- the list of options as an array of strings
        """

        Classifier.setOptions(self, options)

        return

    def getOptions(self):
        """
        Gets the current settings of the Classifier as string array.
         
        Return:
         
            an array of strings suitable for passing to setOptions
        """

        return Classifier.getOptions(self)

    def getCapabilities(self):
        """
        returns the capabilities of this classifier
        
        Return:
        
            the capabilities of this classifier
        """

        result = Classifier.getCapabilities(self)

        # attributes
        result.enable(Capability.NOMINAL_ATTRIBUTES)
        result.enable(Capability.NUMERIC_ATTRIBUTES)
        result.enable(Capability.DATE_ATTRIBUTES)
        result.enable(Capability.STRING_ATTRIBUTES)
        result.enable(Capability.RELATIONAL_ATTRIBUTES)
        result.enable(Capability.MISSING_VALUES)

        # class
        result.enable(Capability.NOMINAL_CLASS)
        result.enable(Capability.NUMERIC_CLASS)
        result.enable(Capability.DATE_CLASS)
        result.enable(Capability.MISSING_CLASS_VALUES)

        # instances
        result.setMinimumNumberInstances(0)

        return result

    def buildClassifier(self, instances):
        """
        builds the ZeroR classifier with the given data
        
        Parameter(s):
        
            'instances' -- the data to build the classifier from
        """

        self.getCapabilities().testWithFail(instances)

        # remove instances with missing class
        instances = Instances(instances)
        instances.deleteWithMissingClass()

        sumOfWeights = 0
        self.__Class = instances.classAttribute()
        self.__ClassValue = 0
        self.__Counts = None

        if (instances.classAttribute().isNumeric()):
            self.__Counts = None
        elif (instances.classAttribute().isNominal()):
            self.__Counts = jarray.zeros(instances.numClasses(), 'd')
            for i in range(len(self.__Counts)):
                self.__Counts[i] = 1
            sumOfWeights = instances.numClasses()

        enu = instances.enumerateInstances()
        while (enu.hasMoreElements()):
            instance = enu.nextElement()
            if (not instance.classIsMissing()):
                if (instances.classAttribute().isNominal()):
                    self.__Counts[int(
                        instance.classValue())] += instance.weight()
                else:
                    self.__ClassValue += instance.weight(
                    ) * instance.classValue()
                sumOfWeights += instance.weight()

        if (instances.classAttribute().isNumeric()):
            if (Utils.gr(sumOfWeights, 0)):
                self.__ClassValue /= sumOfWeights
        else:
            self.__ClassValue = Utils.maxIndex(self.__Counts)
            Utils.normalize(self.__Counts, sumOfWeights)

        return

    def classifyInstance(self, instance):
        """
        returns the prediction for the given instance
        
        Parameter(s):
        
            'instance' -- the instance to predict the class value for

        Return:
        
            the prediction for the given instance
        """

        return self.__ClassValue

    def distributionForInstance(self, instance):
        """
        returns the class distribution for the given instance
        
        Parameter(s):
        
            'instance' -- the instance to calculate the class distribution for
            
        Return:
        
            the class distribution for the given instance
        """

        result = None
        if (self.__Counts == None):
            result = jarray.zeros(1, 'd')
            result[0] = self.__ClassValue
        else:
            result = self.__Counts[:]

        return result

    def toString(self):
        """
        Prints a string representation of the classifier

        Return:
            
            string representation of the classifier
        """

        if (self.__Class == None):
            return "JeroR: No model built yet."
        if (self.__Counts == None):
            return "JeroR predicts class value: " + str(self.__ClassValue)
        else:
            return "JeroR predicts class value: " + str(
                self.__Class.value(int(self.__ClassValue)))
示例#5
0
	def processThread(self, tempCursor, postChain, data, dataNum, j48, linreg):
		if len(postChain) < 2: return  # return if there are no replies
		threadSoFar = []
		self.threadWrapper_add(threadSoFar, postChain[0], postChain[1])
		preprocessCorpus([item[3] for item in postChain])
		dbHandler.insert(tempCursor, postChain[0][0], postChain[1][0])
		for i in xrange(2, len(postChain)):
			child = [postChain[i][0], []]
			hiLinreg = [[-1, -1.0], [-1, -1.0]]  # [postid, linregValue]
			posJ48 = [-1, 0]  # [postid, sum of yields]
			for j in xrange(0, i):
				self.threadWrapper_add(threadSoFar, postChain[j], postChain[i])
				time, cos, quoted, newestparent, dist, pos, lastown, diaLen, diaDist, prevsparent, firstpost = self.computeFeatures(threadSoFar)
				threadSoFar = threadSoFar[:-1]

				postPair = Instance(data.numAttributes())
				postPair.setValue(0, time)
				postPair.setValue(1, cos)
				postPair.setValue(2, quoted)
				postPair.setValue(3, newestparent)
				postPair.setValue(4, dist)
				postPair.setValue(5, pos)
				postPair.setValue(6, data.attribute(6).indexOfValue(lastown))
				postPair.setValue(7, diaLen)
				postPair.setValue(8, diaDist)
				postPair.setValue(9, prevsparent)
				postPair.setValue(10, firstpost)

				postPair.setDataset(dataNum)
				linregValue = linreg.classifyInstance(postPair)
				if linregValue > hiLinreg[0][1]:
					hiLinreg[1] = hiLinreg[0]
					hiLinreg[0] = [postChain[j][0], linregValue]
				else:
					if linregValue > hiLinreg[1][1]:
						hiLinreg[1] = [postChain[j][0], linregValue]
				
				postPair.setDataset(data)
				j48Class = j48.classifyInstance(postPair)
				if j48Class == 1.0:
					posJ48 = [postChain[j][0], posJ48[1]+1]
			if child[1] == []:
				if posJ48[1] == 0:
					child[1] = [hiLinreg[1][0]]
				else:
					if posJ48[1] > 1:
						child[1] = [hiLinreg[0][0]]
					else:
						child[1] = [posJ48[0]]
			self.threadWrapper_add(threadSoFar, [item for item in postChain if item[0] == child[1][0]][0], postChain[i])
			dbHandler.insert(tempCursor, child[1][0], child[0])