class RandomForest(object): def __init__(self, ntrees=255, use_feature_importance=False, sample_classes_individually=False): self.rf = VigraRandomForest(treeCount=ntrees, sample_classes_individually=sample_classes_individually) self.use_feature_importance = use_feature_importance self.sample_classes_individually=sample_classes_individually def fit(self, features, labels, **kwargs): features = self.check_features_vector(features) labels = self.check_labels_vector(labels) if self.use_feature_importance: self.oob, self.feature_importance = \ self.rf.learnRFWithFeatureSelection(features, labels) else: self.oob = self.rf.learnRF(features, labels) return self def predict_proba(self, features): features = self.check_features_vector(features) return self.rf.predictProbabilities(features) def predict(self, features): features = self.check_features_vector(features) return self.rf.predictLabels(features) def check_features_vector(self, features): if features.dtype != float32: features = features.astype(float32) if features.ndim == 1: features = features[newaxis,:] return features def check_labels_vector(self, labels): if labels.dtype != uint32: if len(unique(labels[labels < 0])) == 1 and not (labels==0).any(): labels[labels < 0] = 0 else: labels = labels + labels.min() labels = labels.astype(uint32) labels = labels.reshape((labels.size, 1)) return labels def save_to_disk(self, fn, rfgroupname='rf', overwrite=True): self.rf.writeHDF5(fn, rfgroupname, overwrite) attr_list = ['oob', 'feature_importance', 'use_feature_importance'] f = h5py.File(fn) for attr in attr_list: if hasattr(self, attr): f[attr] = getattr(self, attr) def load_from_disk(self, fn, rfgroupname='rf'): self.rf = VigraRandomForest(fn, rfgroupname) f = h5py.File(fn, 'r') groups = [] f.visit(groups.append) attrs = [g for g in groups if not g.startswith(rfgroupname)] for attr in attrs: setattr(self, attr, array(f[attr]))
class VigraRandomForest(object): def __init__(self, ntrees=255, use_feature_importance=False, sample_classes_individually=False): self.rf = BaseVigraRandomForest(treeCount=ntrees, sample_classes_individually=sample_classes_individually) self.use_feature_importance = use_feature_importance self.sample_classes_individually = sample_classes_individually def fit(self, features, labels): features = self.check_features_vector(features) labels = self.check_labels_vector(labels) if self.use_feature_importance: self.oob, self.feature_importance = \ self.rf.learnRFWithFeatureSelection(features, labels) else: self.oob = self.rf.learnRF(features, labels) return self def predict_proba(self, features): features = self.check_features_vector(features) return self.rf.predictProbabilities(features) def predict(self, features): features = self.check_features_vector(features) return self.rf.predictLabels(features) def check_features_vector(self, features): if features.dtype != np.float32: features = features.astype(np.float32) if features.ndim == 1: features = features[np.newaxis, :] return features def check_labels_vector(self, labels): if labels.dtype != np.uint32: if len(np.unique(labels[labels < 0])) == 1 \ and not (labels==0).any(): labels[labels < 0] = 0 else: labels = labels + labels.min() labels = labels.astype(np.uint32) labels = labels.reshape((labels.size, 1)) return labels def save_to_disk(self, fn, rfgroupname='rf'): self.rf.writeHDF5(fn, rfgroupname) attr_list = ['oob', 'feature_importance', 'use_feature_importance', 'feature_description'] f = h5py.File(fn) for attr in attr_list: if hasattr(self, attr): f[rfgroupname].attrs[attr] = getattr(self, attr) def load_from_disk(self, fn, rfgroupname='rf'): self.rf = BaseVigraRandomForest(str(fn), rfgroupname) f = h5py.File(fn, 'r') for attr in f[rfgroupname].attrs: print("f[%s] = %s" % (attr, f[rfgroupname].attrs[attr])) setattr(self, attr, f[rfgroupname].attrs[attr])
def _compute_partial_predictions( feature_data: "np.ndarray[Any, np.dtype[np.float32]]", forest: VigraRandomForest) -> "np.ndarray[Any, np.dtype[np.float32]]": return forest.predictProbabilities(feature_data) * forest.treeCount()
def do_predict(forest: VigraRandomForest): return forest.predictProbabilities( feature_data.linear_raw()) * forest.treeCount()
class VigraRandomForest(object): def __init__(self, ntrees=255, use_feature_importance=False, sample_classes_individually=False): self.rf = BaseVigraRandomForest( treeCount=ntrees, sample_classes_individually=sample_classes_individually) self.use_feature_importance = use_feature_importance self.sample_classes_individually = sample_classes_individually def fit(self, features, labels): features = self.check_features_vector(features) labels = self.check_labels_vector(labels) if self.use_feature_importance: self.oob, self.feature_importance = \ self.rf.learnRFWithFeatureSelection(features, labels) else: self.oob = self.rf.learnRF(features, labels) return self def predict_proba(self, features): features = self.check_features_vector(features) return self.rf.predictProbabilities(features) def predict(self, features): features = self.check_features_vector(features) return self.rf.predictLabels(features) def check_features_vector(self, features): if features.dtype != np.float32: features = features.astype(np.float32) if features.ndim == 1: features = features[np.newaxis, :] return features def check_labels_vector(self, labels): if labels.dtype != np.uint32: if len(np.unique(labels[labels < 0])) == 1 \ and not (labels==0).any(): labels[labels < 0] = 0 else: labels = labels + labels.min() labels = labels.astype(np.uint32) labels = labels.reshape((labels.size, 1)) return labels def save_to_disk(self, fn, rfgroupname='rf'): self.rf.writeHDF5(fn, rfgroupname) attr_list = [ 'oob', 'feature_importance', 'use_feature_importance', 'feature_description' ] f = h5py.File(fn) for attr in attr_list: if hasattr(self, attr): f[rfgroupname].attrs[attr] = getattr(self, attr) def load_from_disk(self, fn, rfgroupname='rf'): self.rf = BaseVigraRandomForest(str(fn), rfgroupname) f = h5py.File(fn, 'r') for attr in f[rfgroupname].attrs: print("f[%s] = %s" % (attr, f[rfgroupname].attrs[attr])) setattr(self, attr, f[rfgroupname].attrs[attr])
class RandomForest(object): def __init__(self, ntrees=255, use_feature_importance=False, sample_classes_individually=False): self.rf = VigraRandomForest( treeCount=ntrees, sample_classes_individually=sample_classes_individually) self.use_feature_importance = use_feature_importance self.sample_classes_individually = sample_classes_individually def fit(self, features, labels, **kwargs): features = self.check_features_vector(features) labels = self.check_labels_vector(labels) if self.use_feature_importance: self.oob, self.feature_importance = \ self.rf.learnRFWithFeatureSelection(features, labels) else: self.oob = self.rf.learnRF(features, labels) return self def predict_proba(self, features): features = self.check_features_vector(features) return self.rf.predictProbabilities(features) def predict(self, features): features = self.check_features_vector(features) return self.rf.predictLabels(features) def check_features_vector(self, features): if features.dtype != float32: features = features.astype(float32) if features.ndim == 1: features = features[newaxis, :] return features def check_labels_vector(self, labels): if labels.dtype != uint32: if len(unique( labels[labels < 0])) == 1 and not (labels == 0).any(): labels[labels < 0] = 0 else: labels = labels + labels.min() labels = labels.astype(uint32) labels = labels.reshape((labels.size, 1)) return labels def save_to_disk(self, fn, rfgroupname='rf', overwrite=True): self.rf.writeHDF5(fn, rfgroupname, overwrite) attr_list = ['oob', 'feature_importance', 'use_feature_importance'] f = h5py.File(fn) for attr in attr_list: if hasattr(self, attr): f[attr] = getattr(self, attr) def load_from_disk(self, fn, rfgroupname='rf'): self.rf = VigraRandomForest(fn, rfgroupname) f = h5py.File(fn, 'r') groups = [] f.visit(groups.append) attrs = [g for g in groups if not g.startswith(rfgroupname)] for attr in attrs: setattr(self, attr, array(f[attr]))
img = tiffcvt.h5_file["ordinal_train_volume"][:,:,:] else: labels_name = "%s_test_labels" labels_shape = tiffcvt.test_volume.shape img = tiffcvt.h5_file["ordinal_test_volume"][:,:,:] if len(sys.argv) < 3 or sys.argv[2] != "eigentexture": extract_fn = extract_features labels_name = labels_name % "predicted" else: components = tiffcvt.h5_file["components"][:,:] extract_fn = lambda img, bimg, indices:\ extract_eigenfeatures(img, bimg, components, indices) labels_name = labels_name % "eigenpredicted" predicted = tiffcvt.h5_file.require_dataset(labels_name, labels_shape, np.float32, chunks=(64,64,1)) bimg = blur_image(img) for i in range(0, img.shape[0], 64): for j in range(0, img.shape[1], 64): for k in range(img.shape[2]): coords = np.mgrid[i:(i+64), j:(j+64),k:(k+1)].reshape(3, 64*64).transpose() features = extract_fn(img, bimg, coords) score = clf.predictProbabilities(features)[:,1] score.shape = (64,64) predicted[i:(i+64),j:(j+64),k] = score print "Finished block %d, %d, %d" % (i, j, k) tiffcvt.h5_file.close()