예제 #1
0
    def _getInstances(self, classAttr):
        # create attributes
        self.classAttr = classAttr
        attName2Obj = {}
        attVector = FastVector()
        for attName in self.numericAttributes:
            attr = Attribute(attName)
            attVector.addElement(attr)
            attName2Obj[attName] = attr
        for (attName, domain) in self.attName2Domain.iteritems():
            vDomain = FastVector(len(domain))
            for v in domain:
                #print v
                vDomain.addElement(String(str(v)))
            attr = Attribute(attName, vDomain)
            attVector.addElement(attr)
            attName2Obj[attName] = attr
        self.attName2Obj = attName2Obj

        # create Instances object
        instances = Instances("instances", attVector, len(self.instances))
        for i in self.instances:
            inst = self._makeInstance(i)
            instances.add(inst)

        instances.setClass(attName2Obj[classAttr])
        return instances
예제 #2
0
	def _getInstances(self, classAttr):
		# create attributes
		self.classAttr = classAttr
		attName2Obj = {}
		attVector = FastVector()
		for attName in self.numericAttributes:
			attr = Attribute(attName)
			attVector.addElement(attr)
			attName2Obj[attName] = attr
		for (attName, domain) in self.attName2Domain.iteritems():
			vDomain = FastVector(len(domain))
			for v in domain:
				#print v
				vDomain.addElement(String(str(v)))
			attr = Attribute(attName, vDomain)
			attVector.addElement(attr)
			attName2Obj[attName] = attr
		self.attName2Obj = attName2Obj
		
		# create Instances object
		instances = Instances("instances", attVector, len(self.instances))
		for i in self.instances:
			inst = self._makeInstance(i)
			instances.add(inst)
			
		instances.setClass(attName2Obj[classAttr])
		return instances
예제 #3
0
def readDataFromResultsTable(attributes, rt):
    data = Instances("results", ArrayList(attributes), rt.size())
    nrOfFeatures = len(attributes)
    for i in range(0, rt.size()):
        inst = DenseInstance(nrOfFeatures)
        for j in range(0, nrOfFeatures):
            value = rt.getValue(attributes[j].name(), i)
            inst.setValue(attributes[j], value)
            data.add(inst)
    return data
예제 #4
0
def createTrainingInstances(matchingExamples, mismatchingExamples):
  """ Expects the matchingExamples to be a list of feature lists,
      i.e. the feature vector is a list. """
  numFeatures = len(matchingExamples[0])
  attributes = [Attribute(str(i) + " numeric") for i in range(numFeatures)]
  attributes.append(Attribute("class", ArrayList(["true", "false"])))
  trainingData = Instances("matches", ArrayList(attributes), len(matchingExamples) + len(mismatchingExamples))
  trainingData.setClassIndex(len(attributes) -1) # the last index
  for f in matchingExamples:
    trainingData.add(DenseInstance(1.0, f + [1])) # 1 is True
  for f in mismatchingExamples:
    trainingData.add(DenseInstance(1.0, f + [0])) # 0 is False
  return trainingData
예제 #5
0
def classify(classifier, matches):
  """ Expects one vector numFeatures length """
  """ returns a list of [result, distributionforinstance match]"""
  attributes = createAttributes(matches[0])
  instances = Instances("tests", attributes, 1)
  instances.setClassIndex(len(attributes) -1)
  distribution=[] ###
  for match in matches:
    instances.add(DenseInstance(1.0, match + [0]))
  for i in range(len(matches)):
    result=classifier.classifyInstance(instances.instance(i))
    dist=(classifier.distributionForInstance(instances.instance(i)))
    results=[result, dist[1]]
  return results


  


  
def build_instances(state,dataset):
    class_attributes = ["Sunny", "Fog", "Rain", "Snow", "Hail", "Thunder", "Tornado"]
    header = ["state","lat", "lon", "day","temp","dewp","weather"]

    #build attributes based on the header and types
    attributes = []
    for h in header[:-1]:
        attributes.append(Attribute(h))

    #add the classification attribute
    classification_vector = FastVector(len(class_attributes))
    for c in class_attributes:
        classification_vector.addElement(c)
    attributes.append(Attribute("toClassify", classification_vector))

    fvWekaAttributes = FastVector(len(dataset[0]))

    for a in attributes:
        fvWekaAttributes.addElement(a)
    
    training_set = Instances("C4.5Set", fvWekaAttributes, len(dataset))
    training_set.setClassIndex(len(header)-1)

    for d in dataset:
        inst = Instance(len(d))
        for i in range(len(d)-1):
            try:
                inst.setValue(fvWekaAttributes.elementAt(i), float(d[i]))
            except:
                pass
                #print "failed on", i, d[i], d[i].__class__
        inst.setValue(fvWekaAttributes.elementAt(len(d)-1), d[-1])
        
        training_set.add(inst)


    j48 = J48()
    j48.buildClassifier(training_set)
    return state,parse_tree(str(j48))
예제 #7
0
def createTrainingData(img, samples, class_names, n_samples=0, ops=None):
  """ img: a 2D RandomAccessibleInterval.
      samples: a sequence of long[] (or int numeric sequence or Localizable) and class_index pairs; can be a generator.
      n_samples: optional, the number of samples (in case samples is e.g. a generator).
      class_names: a list of class names, as many as different class_index.
      ops: optional, the sequence of ImgMath ops to apply to the img, defaults to filterBank(img)

      return an instance of WEKA Instances
  """
  ops = ops if ops else filterBank(img)

  if 0 == n_samples:
    n_samples = len(samples)
  
  # Define a WEKA Attribute for each feature (one for op in the filter bank, plus the class)
  attribute_names = ["attr-%i" % (i+1) for i in xrange(len(ops))]
  attributes = ArrayList()
  for name in attribute_names:
    attributes.add(Attribute(name))
  # Add an attribute at the end for the classification classes
  attributes.add(Attribute("class", class_names))

  # Create the training data structure
  training_data = Instances("training", attributes, n_samples)
  training_data.setClassIndex(len(attributes) -1)

  opImgs = [compute(op).into(ArrayImgs.floats([img.dimension(0), img.dimension(1)])) for op in ops]
  ra = Views.collapse(Views.stack(opImgs)).randomAccess()

  for position, class_index in samples:
    ra.setPosition(position)
    tc = ra.get()
    vector = array((tc.get(i).getRealDouble() for i in xrange(len(opImgs))), 'd')
    vector += array([class_index], 'd')
    training_data.add(DenseInstance(1.0, vector))

  return training_data
target = ArrayImgs.floats([width, height])
interval = FinalInterval([14, 14], [17, 17])
n_samples = Intervals.numElements(interval)
for ci, v in enumerate([fillValue, backgroundValue]):
    for _ in xrange(training_data.size() /
                    4):  # the other 2/4 are the membrane and mit boundary
        other = syntheticEM([], width, height, 0, v, noise=True)
        vectors = [zeros(len(attributes), 'd') for _ in xrange(n_samples)]
        for k, op in enumerate(filterBank(IL.wrap(other),
                                          sumType=DoubleType())):
            imgOp = compute(op).into(target)
            for i, v in enumerate(Views.interval(imgOp, interval)):
                vectors[i][k] = v.getRealDouble()
        for vector in vectors:
            vector[-1] = ci + 2  # class index
            training_data.add(DenseInstance(1.0, vector))

# Create a classifier: support vector machine (SVM, an SMO in WEKA)
classifier = SMO()
classifier.buildClassifier(training_data)
print classifier.toString()

# Save the trained classifier for later
SerializationHelper.write("/tmp/svm-em-mem-mit", classifier)
"""
imp = WindowManager.getImage("180-220-sub512x512-30.tif") # IJ.getImage() # e.g. 8-bit EM of Drosophila neurons 180-220-sub512x512-30.tif
#imp = IJ.openImage("/home/albert/lab/TEM/abd/microvolumes/Seg/180-220-sub/180-220-sub512x512-30.tif")
img = IL.wrap(imp)
"""
"""
from hr.irb.fastRandomForest import FastRandomForest