def train(self, withFeatureImportance=False):
        logger.info(
        "Training classifier from {} positive and {} negative labels".format(
            np.count_nonzero(np.asarray(self.labels)), len(self.labels) - np.count_nonzero(np.asarray(self.labels))))
        logger.info("Training classifier from a feature vector of length {}".format(self.mydata.shape))

        if withFeatureImportance:
            oob, featImportance = self.rf.learnRFWithFeatureSelection(
                self.mydata.astype("float32"),
                (np.asarray(self.labels)).astype("uint32").reshape(-1, 1))
            logger.debug("RF feature importance: {}".format(featImportance))
            # logger.debug('Feature names: {}'.format(self.featureNames))
        else:
            oob = self.rf.learnRF(
                self.mydata.astype("float32"),
                (np.asarray(self.labels)).astype("uint32").reshape(-1, 1))
        logger.info("RF trained with OOB Error {}".format(oob))
    def train(self, withFeatureImportance=False):
        logger.info(
        "Training classifier from {} positive and {} negative labels".format(
            np.count_nonzero(np.asarray(self.labels)), len(self.labels) - np.count_nonzero(np.asarray(self.labels))))
        logger.info("Training classifier from a feature vector of length {}".format(self.mydata.shape))

        if withFeatureImportance:
            oob, featImportance = self.rf.learnRFWithFeatureSelection(
                self.mydata.astype("float32"),
                (np.asarray(self.labels)).astype("uint32").reshape(-1, 1))
            logger.debug("RF feature importance: {}".format(featImportance))
            # logger.debug('Feature names: {}'.format(self.featureNames))
        else:
            oob = self.rf.learnRF(
                self.mydata.astype("float32"),
                (np.asarray(self.labels)).astype("uint32").reshape(-1, 1))
        logger.info("RF trained with OOB Error {}".format(oob))
def TrainRF(filepath, gt_rawimage_filename, initFrame, endFrame, outputFilename):
    gt_rawimage = vigra.impex.readHDF5(gt_rawimage_filename, 'volume/data')
    features = compute_features(gt_rawimage, read_in_images(initFrame, endFrame, filepath), initFrame, endFrame)
    mylabels = read_positiveLabels(initFrame,endFrame,filepath)
    neg_labels = negativeLabels(features,mylabels)
    mydata, endlabels =  allFeatures(features, mylabels, neg_labels)
    rf = vigra.learning.RandomForest()
    rf.learnRF(mydata.astype("float32"), (np.asarray(endlabels)).astype("uint32").reshape(-1,1))
    rf.writeHDF5(outputFilename)
示例#4
0
	def computeFeatures(self, rawImage, labelImage, framenumber, rawFilename):

		d = framenumber
		u = dataset
		#print rawFilename
		#loading json
		with open('/home/jo/Documents/Bachelor/flow_json/Data_%02u/flow_%03d'%(u,d)) as data_file:    
			flow = {'OpticalFlow' : np.asarray(json.load(data_file))}
			#ignoreNames

		return flow['OpticalFlow']
def getFeatures(f1,f2,o1,o2): #what to do with NaNs?
    res=[]; res2=[]
    for key in f1:
        if key == "Global<Maximum >" or key=="Global<Minimum >": #this ones have only one element
            res.append(f1[key]-f2[key])
            res2.append(f1[key]*f2[key])
        elif key == 'RegionCenter':
            res.append(np.linalg.norm(f1[key][o1]-f2[key][o2])) #difference of features
            res2.append(np.linalg.norm(f1[key][o1]*f2[key][o2])) #product of features
        elif key=='Histogram': #contains only zeros, so trying to see what the prediction is without it
            continue
        elif key == 'Polygon': #vect has always another length for different objects, so center would be relevant
            continue
        else:
            res.append((f1[key][o1]-f2[key][o2]).tolist() )  #prepare for flattening
            res2.append((f1[key][o1]*f2[key][o2]).tolist() )  #prepare for flattening
    x= np.asarray(flatten(res)) #flatten
    x2= np.asarray(flatten(res2)) #flatten
    #x= x[~np.isnan(x)]
    #x2= x2[~np.isnan(x2)] #not getting the nans out YET
    return np.concatenate((x,x2))
    def addSample(self, f1, f2, label):
        #if self.labels == []:
        self.labels.append(label)
        #else:
        #    self.labels = np.concatenate((np.array(self.labels),label)) # for adding batches of features
        res=[]
        res2=[]
        
        for key in selectedFeatures:
            if key == "Global<Maximum >" or key=="Global<Minimum >":
                # the global min/max intensity is not interesting
                continue
            elif key == 'RegionCenter':
                res.append(np.linalg.norm(f1[key]-f2[key])) #difference of features
                res2.append(np.linalg.norm(f1[key]*f2[key])) #product of features
            elif key == 'Histogram': #contains only zeros, so trying to see what the prediction is without it
                continue
            elif key == 'Polygon': #vect has always another length for different objects, so center would be relevant
                continue
            else:
                if not isinstance(f1[key], np.ndarray):
                    res.append(float(f1[key]) - float(f2[key]) )  #prepare for flattening
                    res2.append(float(f1[key]) * float(f2[key]) )  #prepare for flattening
                else:
                    res.append((f1[key]-f2[key]).tolist() )  #prepare for flattening
                    res2.append((f1[key]*f2[key]).tolist() )  #prepare for flattening

        x= np.asarray(flatten(res)) #flatten
        x2= np.asarray(flatten(res2)) #flatten
        assert(np.any(np.isnan(x)) == False)
        assert(np.any(np.isnan(x2)) == False)
        assert(np.any(np.isinf(x)) == False)
        assert(np.any(np.isinf(x2)) == False)
        #x= x[~np.isnan(x)]
        #x2= x2[~np.isnan(x2)] #not getting the nans out YET
        features = np.concatenate((x,x2))
        if self.mydata is None:
            self.mydata = features
        else:
            self.mydata = np.vstack((self.mydata, features))
def allFeatures(features, labels, neg_labels):
    j=0
    lab=[]
    for i in range(0,len(features)-1):
        for k in labels[i]:
            if j == 0:
                x = getFeatures(features[i],features[i+1],k[0],k[1])
                j+=1
            else:
                x = np.vstack((x,getFeatures(features[i],features[i+1],k[0],k[1])))
            lab.append(1)
        for k in neg_labels[i]:
            if k not in labels[i].tolist():
                x = np.vstack((x,getFeatures(features[i],features[i+1],k[0],k[1])))
                lab.append(0)
    x = x[:,~np.isnan(x).any(axis=0)] #now removing the nans
    return x,np.asarray(lab)
def filter_labels(a, min_size, max_size=None):
    """
    Remove (set to 0) labeled connected components that are too small or too large.
    Note: Operates in-place.
    """
    if min_size == 0 and (max_size is None or max_size > np.prod(a.shape)): # shortcut for efficiency
        return a

    try:
        component_sizes = np.bincount( a.ravel() )
    except TypeError:
        # On 32-bit systems, must explicitly convert from uint32 to int
        # (This fix is just for VM testing.)
        component_sizes = np.bincount( np.asarray(a.ravel(), dtype=int) )

    bad_sizes = component_sizes < min_size
    if max_size is not None:
        np.logical_or( bad_sizes, component_sizes > max_size, out=bad_sizes )
    
    bad_locations = bad_sizes[a]
    a[bad_locations] = 0
    return a
def filter_labels(a, min_size, max_size=None):
    """
    Remove (set to 0) labeled connected components that are too small or too large.
    Note: Operates in-place.
    """
    if min_size == 0 and (max_size is None or max_size > np.prod(a.shape)
                          ):  # shortcut for efficiency
        return a

    try:
        component_sizes = np.bincount(a.ravel())
    except TypeError:
        # On 32-bit systems, must explicitly convert from uint32 to int
        # (This fix is just for VM testing.)
        component_sizes = np.bincount(np.asarray(a.ravel(), dtype=int))

    bad_sizes = component_sizes < min_size
    if max_size is not None:
        np.logical_or(bad_sizes, component_sizes > max_size, out=bad_sizes)

    bad_locations = bad_sizes[a]
    a[bad_locations] = 0
    return a
示例#10
0
    initFrame = 0
    endFrame = 20

    #read in raw images  - here ALL
    filepath = '/net/hciserver03/storage/lparcala/mitocheck_006--01--06/manual_tracking2/'
    gt_rawimage_filename = '/net/hciserver03/storage/lparcala/mitocheck_006--01--06/mitocheck_94570_2D+t_00-92.h5'
    gt_rawimage = vigra.impex.readHDF5(gt_rawimage_filename, 'volume/data')

    #this are the features for the first 5 time frames
    features = compute_features(gt_rawimage,read_in_images(initFrame,endFrame, filepath),initFrame,endFrame)
    mylabels = read_positiveLabels(initFrame,endFrame,filepath)
    neg_labels = negativeLabels(features,mylabels)
    mydata, endlabels =  allFeatures(features, mylabels, neg_labels)
    rf = vigra.learning.RandomForest()
    rf.learnRF(mydata.astype("float32"), (np.asarray(endlabels)).astype("uint32").reshape(-1,1))

    # Cross Validation
    X, Y = allFeatures(features, mylabels, neg_labels)
    total_number_of_samples = Y.shape[0]
    kf = KFold(total_number_of_samples, 4, shuffle=True)
    print "Starting cross validation"

    final_measure = []
    for train, test in kf:
        rf = vigra.learning.RandomForest()
        print "oob=",rf.learnRF(X[train].astype("float32"),Y[train].astype("uint32").reshape(-1,1))
        test_error = precision_recall_fscore_support(Y[test],rf.predictLabels(X[test].astype("float32")),average='weighted')
        print "test error=",test_error
        final_measure.append(test_error[:3])
    print np.vstack(tuple(final_measure))
 def train(self):
     print("Training classifier from {} positive and {} negative labels".format(np.count_nonzero(np.asarray(self.labels)),
                                                                                len(self.labels)- np.count_nonzero(np.asarray(self.labels))))
     oob = self.rf.learnRF(self.mydata.astype("float32"), (np.asarray(self.labels)).astype("uint32").reshape(-1,1))
     print("RF trained with OOB Error ", oob)