def test_mirror_edges(self): X = np.random.rand(10, 2, 3, 3) b = 2 # b := border size Xm = emlib.mirror_edges(X, b) # make sure the result has the proper size assert (Xm.shape[0] == X.shape[0]) assert (Xm.shape[1] == X.shape[1]) assert (Xm.shape[2] == X.shape[2] + 2 * b) assert (Xm.shape[3] == X.shape[3] + 2 * b) # make sure the data looks reasonable self.assertTrue(np.all(Xm[:, :, :, b - 1] == Xm[:, :, :, b + 1])) self.assertTrue(np.all(Xm[:, :, b:-b, b:-b] == X)) ## another test case X = np.zeros((1, 1, 10, 10)) X[0, 0, 2, :] = 1 Xm = emlib.mirror_edges(X, 3) self.assertTrue(np.all(Xm[0, 0, 0, :] == 0)) self.assertTrue(np.all(Xm[0, 0, 1, :] == 1)) self.assertTrue(np.all(Xm[0, 0, 2, :] == 0)) self.assertTrue(np.all(Xm[0, 0, 3, :] == 0)) self.assertTrue(np.all(Xm[0, 0, 4, :] == 0)) self.assertTrue(np.all(Xm[0, 0, 5, :] == 1)) self.assertTrue(np.all(Xm[0, 0, 6, :] == 0))
def _load_data(xName, yName, tileRadius, onlySlices, omitLabels=None): """Loads data sets and does basic preprocessing. """ X = emlib.load_cube(xName, np.float32) # usually we expect fewer slices in Z than pixels in X or Y. # Make sure the dimensions look ok before proceeding. assert(X.shape[0] < X.shape[1]) assert(X.shape[0] < X.shape[2]) if onlySlices: X = X[onlySlices,:,:] print('[emCNN]: data shape: %s' % str(X.shape)) X = emlib.mirror_edges(X, tileRadius) # Scale data to live in [0 1]. # *** ASSUMPTION *** original data is in [0 255] if np.max(X) > 1: X = X / 255. print('[emCNN]: data min/max: %0.2f / %0.2f' % (np.min(X), np.max(X))) # Also obtain labels file (if provided - e.g. in deploy mode # we may not have labels...) if yName: Y = emlib.load_cube(yName, np.float32) if onlySlices: Y = Y[onlySlices,:,:] print('[emCNN]: labels shape: %s' % str(Y.shape)) # ** ASSUMPTION **: Special case code for membrane detection / ISBI volume yAll = np.unique(Y) yAll.sort() if (len(yAll) == 2) and (yAll[0] == 0) and (yAll[1] == 255): print('[emCNN]: ISBI-style labels detected. converting 0->1, 255->0') Y[Y==0] = 1; # membrane Y[Y==255] = 0; # non-membrane # Labels must be natural numbers (contiguous integers starting at 0) # because they are mapped to indices at the output of the network. # This next bit of code remaps the native y values to these indices. omitLabels, pctOmitted = _omit_labels(Y, omitLabels) Y = emlib.fix_class_labels(Y, omitLabels).astype(np.int32) print('[emCNN]: yAll is %s' % str(np.unique(Y))) print('[emCNN]: will use %0.2f%% of volume' % (100.0 - pctOmitted)) Y = emlib.mirror_edges(Y, tileRadius) return X, Y else: return X
def _load_data(xName, yName, args, tileSize): """Loads data sets and does basic preprocessing. """ X = emlib.load_cube(xName, np.float32) # usually we expect fewer slices in Z than pixels in X or Y. # Make sure the dimensions look ok before proceeding. assert (X.shape[0] < X.shape[1]) assert (X.shape[0] < X.shape[2]) print('[emCNN]: data shape: %s' % str(X.shape)) if args.onlySlices: X = X[args.onlySlices, :, :] X = emlib.mirror_edges(X, tileSize) # Scale data to live in [0 1]. # I'm assuming original data is in [0 255] if np.max(X) > 1: X = X / 255. # Also obtain labels file (if provided - e.g. in deploy mode # we may not have labels...) if yName: Y = emlib.load_cube(yName, np.float32) if args.onlySlices: Y = Y[args.onlySlices, :, :] # Labels must be natural numbers (contiguous integers starting at 0) # because they are mapped to indices at the output of the network. # This next bit of code remaps the native y values to these indices. Y = emlib.fix_class_labels(Y, args.omitLabels) print('[emCNN]: yAll is %s' % str(np.unique(Y))) print('[emCNN]: will use %0.2f%% of volume' % (100. * np.sum(Y >= 0) / numel(Y))) Y = emlib.mirror_edges(Y, tileSize) else: Y = None return X, Y
def _load_data(xName, yName, args, tileSize): """Loads data sets and does basic preprocessing. """ X = emlib.load_cube(xName, np.float32) # usually we expect fewer slices in Z than pixels in X or Y. # Make sure the dimensions look ok before proceeding. assert(X.shape[0] < X.shape[1]) assert(X.shape[0] < X.shape[2]) print('[emCNN]: data shape: %s' % str(X.shape)) if args.onlySlices: X = X[args.onlySlices,:,:] X = emlib.mirror_edges(X, tileSize) # Scale data to live in [0 1]. # I'm assuming original data is in [0 255] if np.max(X) > 1: X = X / 255. # Also obtain labels file (if provided - e.g. in deploy mode # we may not have labels...) if yName: Y = emlib.load_cube(yName, np.float32) if args.onlySlices: Y = Y[args.onlySlices,:,:] # Labels must be natural numbers (contiguous integers starting at 0) # because they are mapped to indices at the output of the network. # This next bit of code remaps the native y values to these indices. Y = emlib.fix_class_labels(Y, args.omitLabels) print('[emCNN]: yAll is %s' % str(np.unique(Y))) print('[emCNN]: will use %0.2f%% of volume' % (100.*np.sum(Y>=0)/numel(Y))) Y = emlib.mirror_edges(Y, tileSize) else: Y = None return X, Y
def test_mirror_edges(self): X = np.random.rand(10,3,3); b = 2 # b := border size Xm = emlib.mirror_edges(X,b) # make sure the result has the proper size assert(Xm.shape[0] == X.shape[0]); assert(Xm.shape[1] == X.shape[1]+2*b); assert(Xm.shape[2] == X.shape[2]+2*b); # make sure the data looks reasonable self.assertTrue(np.all(Xm[:,:,b-1] == Xm[:,:,b])) self.assertTrue(np.all(Xm[:, b:-b, b:-b] == X))
if len(args.outFileName): outFileName = args.outFileName else: outFileName = os.path.join(os.path.split(args.dataFileName)[0], 'Yhat_' + os.path.split(args.dataFileName)[-1]) print('[deploy]: output file will be: %s' % outFileName) #---------------------------------------- # Load and preprocess data set #---------------------------------------- X = emlib.load_tiff_data(args.dataFileName, np.float32) # mirror edges of images so that every pixel in the original data set can act # as a center pixel of some tile borderSize = int(batchDim[2]/2) X = emlib.mirror_edges(X, borderSize) if len(args.evalSliceExpr): # optional: pare down to a subset of slices idx = eval(args.evalSliceExpr) X = X[idx,:,:] print('[deploy]: data shape: %s' % str(X.shape)) # Some pixels are trivial to classify based on their intensity. # We don't need a CNN for these - skip them in training (and in deploy). Mask = np.ones(X.shape, dtype=bool) Mask[X > args.ub] = 0 Mask[X < args.lb] = 0 if np.any(Mask == 0): nz = np.sum(Mask==0) print('[deploy]: bandpass mask is omitting %0.2f%% of the raw data' % (100 * nz / np.prod(Mask.shape)))
omitLabels = eval(args.omitLabels) yAll = [y for y in yAll if y not in omitLabels] Ytmp = -1 * np.ones( Y.shape, dtype=Y.dtype) # default label is -1, which is omitted from evaluation for yIdx, y in enumerate(yAll): Ytmp[Y == y] = yIdx Y = Ytmp print('[train]: yAll is %s' % str(yAll)) print('[train]: %d pixels will be omitted\n' % np.sum(Y == -1)) # mirror edges of images so that every pixel in the original data set can act # as a center pixel of some tile borderSize = int(batchDim[2] / 2) X = emlib.mirror_edges(X, borderSize) Y = emlib.mirror_edges(Y, borderSize) # Identify the subset of the data to use for training. # These slices should create views (rather than copies) of # the original data volumes (so should not consume a lot # of extra memory...) trainIdx = eval(args.trainSlicesExpr) validIdx = eval(args.validSlicesExpr) if not set(trainIdx).isdisjoint(set(validIdx)): raise RuntimeError('Training and validation slices are not disjoint!') Xtrain = X[trainIdx, :, :] Ytrain = Y[trainIdx, :, :] Xvalid = X[validIdx, :, :] Yvalid = Y[validIdx, :, :] print('[train]: training data shape: %s' % str(Xtrain.shape))
outFileNameY = args.outFileNameY else: outFileNameY = os.path.join(os.path.split(args.dataFileName)[0], 'Yhat_' + os.path.split(args.dataFileName)[-1]) outFileNameX = args.outFileNameX print('[deploy]: probability output file: %s' % outFileNameY) print('[deploy]: features output file: %s' % outFileNameX) #---------------------------------------- # Load and preprocess data set #---------------------------------------- X = emlib.load_cube(args.dataFileName, np.float32) # mirror edges of images so that every pixel in the original data set can act # as a center pixel of some tile borderSize = int(batchDim[2]/2) X = emlib.mirror_edges(X, borderSize) if len(args.evalSliceExpr): # optional: pare down to a subset of slices idx = eval(args.evalSliceExpr) X = X[idx,:,:] print('[deploy]: data shape: %s' % str(X.shape)) # There may be reasons for not evaluating certain pixels. # The mask allows the caller to specify which pixels to omit. if len(args.maskFileName): Mask = emlib.load_cube(args.maskFileName, dtype=np.bool) Mask = emlib.mirror_edges(Mask, borderSize) else: Mask = np.ones(X.shape, dtype=np.bool) if np.any(Mask == 0):