def build_instances(state,dataset): class_attributes = ["Sunny", "Fog", "Rain", "Snow", "Hail", "Thunder", "Tornado"] header = ["state","lat", "lon", "day","temp","dewp","weather"] #build attributes based on the header and types attributes = [] for h in header[:-1]: attributes.append(Attribute(h)) #add the classification attribute classification_vector = FastVector(len(class_attributes)) for c in class_attributes: classification_vector.addElement(c) attributes.append(Attribute("toClassify", classification_vector)) fvWekaAttributes = FastVector(len(dataset[0])) for a in attributes: fvWekaAttributes.addElement(a) training_set = Instances("C4.5Set", fvWekaAttributes, len(dataset)) training_set.setClassIndex(len(header)-1) for d in dataset: inst = Instance(len(d)) for i in range(len(d)-1): try: inst.setValue(fvWekaAttributes.elementAt(i), float(d[i])) except: pass #print "failed on", i, d[i], d[i].__class__ inst.setValue(fvWekaAttributes.elementAt(len(d)-1), d[-1]) training_set.add(inst) j48 = J48() j48.buildClassifier(training_set) return state,parse_tree(str(j48))