def _getInstances(self, classAttr): # create attributes self.classAttr = classAttr attName2Obj = {} attVector = FastVector() for attName in self.numericAttributes: attr = Attribute(attName) attVector.addElement(attr) attName2Obj[attName] = attr for (attName, domain) in self.attName2Domain.iteritems(): vDomain = FastVector(len(domain)) for v in domain: #print v vDomain.addElement(String(str(v))) attr = Attribute(attName, vDomain) attVector.addElement(attr) attName2Obj[attName] = attr self.attName2Obj = attName2Obj # create Instances object instances = Instances("instances", attVector, len(self.instances)) for i in self.instances: inst = self._makeInstance(i) instances.add(inst) instances.setClass(attName2Obj[classAttr]) return instances
def build_instances(state,dataset): class_attributes = ["Sunny", "Fog", "Rain", "Snow", "Hail", "Thunder", "Tornado"] header = ["state","lat", "lon", "day","temp","dewp","weather"] #build attributes based on the header and types attributes = [] for h in header[:-1]: attributes.append(Attribute(h)) #add the classification attribute classification_vector = FastVector(len(class_attributes)) for c in class_attributes: classification_vector.addElement(c) attributes.append(Attribute("toClassify", classification_vector)) fvWekaAttributes = FastVector(len(dataset[0])) for a in attributes: fvWekaAttributes.addElement(a) training_set = Instances("C4.5Set", fvWekaAttributes, len(dataset)) training_set.setClassIndex(len(header)-1) for d in dataset: inst = Instance(len(d)) for i in range(len(d)-1): try: inst.setValue(fvWekaAttributes.elementAt(i), float(d[i])) except: pass #print "failed on", i, d[i], d[i].__class__ inst.setValue(fvWekaAttributes.elementAt(len(d)-1), d[-1]) training_set.add(inst) j48 = J48() j48.buildClassifier(training_set) return state,parse_tree(str(j48))