def train(self, withFeatureImportance=False): logger.info( "Training classifier from {} positive and {} negative labels".format( np.count_nonzero(np.asarray(self.labels)), len(self.labels) - np.count_nonzero(np.asarray(self.labels)))) logger.info("Training classifier from a feature vector of length {}".format(self.mydata.shape)) if withFeatureImportance: oob, featImportance = self.rf.learnRFWithFeatureSelection( self.mydata.astype("float32"), (np.asarray(self.labels)).astype("uint32").reshape(-1, 1)) logger.debug("RF feature importance: {}".format(featImportance)) # logger.debug('Feature names: {}'.format(self.featureNames)) else: oob = self.rf.learnRF( self.mydata.astype("float32"), (np.asarray(self.labels)).astype("uint32").reshape(-1, 1)) logger.info("RF trained with OOB Error {}".format(oob))
def TrainRF(filepath, gt_rawimage_filename, initFrame, endFrame, outputFilename): gt_rawimage = vigra.impex.readHDF5(gt_rawimage_filename, 'volume/data') features = compute_features(gt_rawimage, read_in_images(initFrame, endFrame, filepath), initFrame, endFrame) mylabels = read_positiveLabels(initFrame,endFrame,filepath) neg_labels = negativeLabels(features,mylabels) mydata, endlabels = allFeatures(features, mylabels, neg_labels) rf = vigra.learning.RandomForest() rf.learnRF(mydata.astype("float32"), (np.asarray(endlabels)).astype("uint32").reshape(-1,1)) rf.writeHDF5(outputFilename)
def computeFeatures(self, rawImage, labelImage, framenumber, rawFilename): d = framenumber u = dataset #print rawFilename #loading json with open('/home/jo/Documents/Bachelor/flow_json/Data_%02u/flow_%03d'%(u,d)) as data_file: flow = {'OpticalFlow' : np.asarray(json.load(data_file))} #ignoreNames return flow['OpticalFlow']
def getFeatures(f1,f2,o1,o2): #what to do with NaNs? res=[]; res2=[] for key in f1: if key == "Global<Maximum >" or key=="Global<Minimum >": #this ones have only one element res.append(f1[key]-f2[key]) res2.append(f1[key]*f2[key]) elif key == 'RegionCenter': res.append(np.linalg.norm(f1[key][o1]-f2[key][o2])) #difference of features res2.append(np.linalg.norm(f1[key][o1]*f2[key][o2])) #product of features elif key=='Histogram': #contains only zeros, so trying to see what the prediction is without it continue elif key == 'Polygon': #vect has always another length for different objects, so center would be relevant continue else: res.append((f1[key][o1]-f2[key][o2]).tolist() ) #prepare for flattening res2.append((f1[key][o1]*f2[key][o2]).tolist() ) #prepare for flattening x= np.asarray(flatten(res)) #flatten x2= np.asarray(flatten(res2)) #flatten #x= x[~np.isnan(x)] #x2= x2[~np.isnan(x2)] #not getting the nans out YET return np.concatenate((x,x2))
def addSample(self, f1, f2, label): #if self.labels == []: self.labels.append(label) #else: # self.labels = np.concatenate((np.array(self.labels),label)) # for adding batches of features res=[] res2=[] for key in selectedFeatures: if key == "Global<Maximum >" or key=="Global<Minimum >": # the global min/max intensity is not interesting continue elif key == 'RegionCenter': res.append(np.linalg.norm(f1[key]-f2[key])) #difference of features res2.append(np.linalg.norm(f1[key]*f2[key])) #product of features elif key == 'Histogram': #contains only zeros, so trying to see what the prediction is without it continue elif key == 'Polygon': #vect has always another length for different objects, so center would be relevant continue else: if not isinstance(f1[key], np.ndarray): res.append(float(f1[key]) - float(f2[key]) ) #prepare for flattening res2.append(float(f1[key]) * float(f2[key]) ) #prepare for flattening else: res.append((f1[key]-f2[key]).tolist() ) #prepare for flattening res2.append((f1[key]*f2[key]).tolist() ) #prepare for flattening x= np.asarray(flatten(res)) #flatten x2= np.asarray(flatten(res2)) #flatten assert(np.any(np.isnan(x)) == False) assert(np.any(np.isnan(x2)) == False) assert(np.any(np.isinf(x)) == False) assert(np.any(np.isinf(x2)) == False) #x= x[~np.isnan(x)] #x2= x2[~np.isnan(x2)] #not getting the nans out YET features = np.concatenate((x,x2)) if self.mydata is None: self.mydata = features else: self.mydata = np.vstack((self.mydata, features))
def allFeatures(features, labels, neg_labels): j=0 lab=[] for i in range(0,len(features)-1): for k in labels[i]: if j == 0: x = getFeatures(features[i],features[i+1],k[0],k[1]) j+=1 else: x = np.vstack((x,getFeatures(features[i],features[i+1],k[0],k[1]))) lab.append(1) for k in neg_labels[i]: if k not in labels[i].tolist(): x = np.vstack((x,getFeatures(features[i],features[i+1],k[0],k[1]))) lab.append(0) x = x[:,~np.isnan(x).any(axis=0)] #now removing the nans return x,np.asarray(lab)
def filter_labels(a, min_size, max_size=None): """ Remove (set to 0) labeled connected components that are too small or too large. Note: Operates in-place. """ if min_size == 0 and (max_size is None or max_size > np.prod(a.shape)): # shortcut for efficiency return a try: component_sizes = np.bincount( a.ravel() ) except TypeError: # On 32-bit systems, must explicitly convert from uint32 to int # (This fix is just for VM testing.) component_sizes = np.bincount( np.asarray(a.ravel(), dtype=int) ) bad_sizes = component_sizes < min_size if max_size is not None: np.logical_or( bad_sizes, component_sizes > max_size, out=bad_sizes ) bad_locations = bad_sizes[a] a[bad_locations] = 0 return a
def filter_labels(a, min_size, max_size=None): """ Remove (set to 0) labeled connected components that are too small or too large. Note: Operates in-place. """ if min_size == 0 and (max_size is None or max_size > np.prod(a.shape) ): # shortcut for efficiency return a try: component_sizes = np.bincount(a.ravel()) except TypeError: # On 32-bit systems, must explicitly convert from uint32 to int # (This fix is just for VM testing.) component_sizes = np.bincount(np.asarray(a.ravel(), dtype=int)) bad_sizes = component_sizes < min_size if max_size is not None: np.logical_or(bad_sizes, component_sizes > max_size, out=bad_sizes) bad_locations = bad_sizes[a] a[bad_locations] = 0 return a
initFrame = 0 endFrame = 20 #read in raw images - here ALL filepath = '/net/hciserver03/storage/lparcala/mitocheck_006--01--06/manual_tracking2/' gt_rawimage_filename = '/net/hciserver03/storage/lparcala/mitocheck_006--01--06/mitocheck_94570_2D+t_00-92.h5' gt_rawimage = vigra.impex.readHDF5(gt_rawimage_filename, 'volume/data') #this are the features for the first 5 time frames features = compute_features(gt_rawimage,read_in_images(initFrame,endFrame, filepath),initFrame,endFrame) mylabels = read_positiveLabels(initFrame,endFrame,filepath) neg_labels = negativeLabels(features,mylabels) mydata, endlabels = allFeatures(features, mylabels, neg_labels) rf = vigra.learning.RandomForest() rf.learnRF(mydata.astype("float32"), (np.asarray(endlabels)).astype("uint32").reshape(-1,1)) # Cross Validation X, Y = allFeatures(features, mylabels, neg_labels) total_number_of_samples = Y.shape[0] kf = KFold(total_number_of_samples, 4, shuffle=True) print "Starting cross validation" final_measure = [] for train, test in kf: rf = vigra.learning.RandomForest() print "oob=",rf.learnRF(X[train].astype("float32"),Y[train].astype("uint32").reshape(-1,1)) test_error = precision_recall_fscore_support(Y[test],rf.predictLabels(X[test].astype("float32")),average='weighted') print "test error=",test_error final_measure.append(test_error[:3]) print np.vstack(tuple(final_measure))
def train(self): print("Training classifier from {} positive and {} negative labels".format(np.count_nonzero(np.asarray(self.labels)), len(self.labels)- np.count_nonzero(np.asarray(self.labels)))) oob = self.rf.learnRF(self.mydata.astype("float32"), (np.asarray(self.labels)).astype("uint32").reshape(-1,1)) print("RF trained with OOB Error ", oob)