示例#1
0
    def _getInstances(self, classAttr):
        # create attributes
        self.classAttr = classAttr
        attName2Obj = {}
        attVector = FastVector()
        for attName in self.numericAttributes:
            attr = Attribute(attName)
            attVector.addElement(attr)
            attName2Obj[attName] = attr
        for (attName, domain) in self.attName2Domain.iteritems():
            vDomain = FastVector(len(domain))
            for v in domain:
                #print v
                vDomain.addElement(String(str(v)))
            attr = Attribute(attName, vDomain)
            attVector.addElement(attr)
            attName2Obj[attName] = attr
        self.attName2Obj = attName2Obj

        # create Instances object
        instances = Instances("instances", attVector, len(self.instances))
        for i in self.instances:
            inst = self._makeInstance(i)
            instances.add(inst)

        instances.setClass(attName2Obj[classAttr])
        return instances
示例#2
0
	def _getInstances(self, classAttr):
		# create attributes
		self.classAttr = classAttr
		attName2Obj = {}
		attVector = FastVector()
		for attName in self.numericAttributes:
			attr = Attribute(attName)
			attVector.addElement(attr)
			attName2Obj[attName] = attr
		for (attName, domain) in self.attName2Domain.iteritems():
			vDomain = FastVector(len(domain))
			for v in domain:
				#print v
				vDomain.addElement(String(str(v)))
			attr = Attribute(attName, vDomain)
			attVector.addElement(attr)
			attName2Obj[attName] = attr
		self.attName2Obj = attName2Obj
		
		# create Instances object
		instances = Instances("instances", attVector, len(self.instances))
		for i in self.instances:
			inst = self._makeInstance(i)
			instances.add(inst)
			
		instances.setClass(attName2Obj[classAttr])
		return instances
def build_instances(state,dataset):
    class_attributes = ["Sunny", "Fog", "Rain", "Snow", "Hail", "Thunder", "Tornado"]
    header = ["state","lat", "lon", "day","temp","dewp","weather"]

    #build attributes based on the header and types
    attributes = []
    for h in header[:-1]:
        attributes.append(Attribute(h))

    #add the classification attribute
    classification_vector = FastVector(len(class_attributes))
    for c in class_attributes:
        classification_vector.addElement(c)
    attributes.append(Attribute("toClassify", classification_vector))

    fvWekaAttributes = FastVector(len(dataset[0]))

    for a in attributes:
        fvWekaAttributes.addElement(a)
    
    training_set = Instances("C4.5Set", fvWekaAttributes, len(dataset))
    training_set.setClassIndex(len(header)-1)

    for d in dataset:
        inst = Instance(len(d))
        for i in range(len(d)-1):
            try:
                inst.setValue(fvWekaAttributes.elementAt(i), float(d[i]))
            except:
                pass
                #print "failed on", i, d[i], d[i].__class__
        inst.setValue(fvWekaAttributes.elementAt(len(d)-1), d[-1])
        
        training_set.add(inst)


    j48 = J48()
    j48.buildClassifier(training_set)
    return state,parse_tree(str(j48))