def _load_data(xName, yName, tileRadius, onlySlices, omitLabels=None): """Loads data sets and does basic preprocessing. """ X = emlib.load_cube(xName, np.float32) # usually we expect fewer slices in Z than pixels in X or Y. # Make sure the dimensions look ok before proceeding. assert(X.shape[0] < X.shape[1]) assert(X.shape[0] < X.shape[2]) if onlySlices: X = X[onlySlices,:,:] print('[emCNN]: data shape: %s' % str(X.shape)) X = emlib.mirror_edges(X, tileRadius) # Scale data to live in [0 1]. # *** ASSUMPTION *** original data is in [0 255] if np.max(X) > 1: X = X / 255. print('[emCNN]: data min/max: %0.2f / %0.2f' % (np.min(X), np.max(X))) # Also obtain labels file (if provided - e.g. in deploy mode # we may not have labels...) if yName: Y = emlib.load_cube(yName, np.float32) if onlySlices: Y = Y[onlySlices,:,:] print('[emCNN]: labels shape: %s' % str(Y.shape)) # ** ASSUMPTION **: Special case code for membrane detection / ISBI volume yAll = np.unique(Y) yAll.sort() if (len(yAll) == 2) and (yAll[0] == 0) and (yAll[1] == 255): print('[emCNN]: ISBI-style labels detected. converting 0->1, 255->0') Y[Y==0] = 1; # membrane Y[Y==255] = 0; # non-membrane # Labels must be natural numbers (contiguous integers starting at 0) # because they are mapped to indices at the output of the network. # This next bit of code remaps the native y values to these indices. omitLabels, pctOmitted = _omit_labels(Y, omitLabels) Y = emlib.fix_class_labels(Y, omitLabels).astype(np.int32) print('[emCNN]: yAll is %s' % str(np.unique(Y))) print('[emCNN]: will use %0.2f%% of volume' % (100.0 - pctOmitted)) Y = emlib.mirror_edges(Y, tileRadius) return X, Y else: return X
def _load_data(xName, yName, args, tileSize): """Loads data sets and does basic preprocessing. """ X = emlib.load_cube(xName, np.float32) # usually we expect fewer slices in Z than pixels in X or Y. # Make sure the dimensions look ok before proceeding. assert (X.shape[0] < X.shape[1]) assert (X.shape[0] < X.shape[2]) print('[emCNN]: data shape: %s' % str(X.shape)) if args.onlySlices: X = X[args.onlySlices, :, :] X = emlib.mirror_edges(X, tileSize) # Scale data to live in [0 1]. # I'm assuming original data is in [0 255] if np.max(X) > 1: X = X / 255. # Also obtain labels file (if provided - e.g. in deploy mode # we may not have labels...) if yName: Y = emlib.load_cube(yName, np.float32) if args.onlySlices: Y = Y[args.onlySlices, :, :] # Labels must be natural numbers (contiguous integers starting at 0) # because they are mapped to indices at the output of the network. # This next bit of code remaps the native y values to these indices. Y = emlib.fix_class_labels(Y, args.omitLabels) print('[emCNN]: yAll is %s' % str(np.unique(Y))) print('[emCNN]: will use %0.2f%% of volume' % (100. * np.sum(Y >= 0) / numel(Y))) Y = emlib.mirror_edges(Y, tileSize) else: Y = None return X, Y
def _load_data(xName, yName, args, tileSize): """Loads data sets and does basic preprocessing. """ X = emlib.load_cube(xName, np.float32) # usually we expect fewer slices in Z than pixels in X or Y. # Make sure the dimensions look ok before proceeding. assert(X.shape[0] < X.shape[1]) assert(X.shape[0] < X.shape[2]) print('[emCNN]: data shape: %s' % str(X.shape)) if args.onlySlices: X = X[args.onlySlices,:,:] X = emlib.mirror_edges(X, tileSize) # Scale data to live in [0 1]. # I'm assuming original data is in [0 255] if np.max(X) > 1: X = X / 255. # Also obtain labels file (if provided - e.g. in deploy mode # we may not have labels...) if yName: Y = emlib.load_cube(yName, np.float32) if args.onlySlices: Y = Y[args.onlySlices,:,:] # Labels must be natural numbers (contiguous integers starting at 0) # because they are mapped to indices at the output of the network. # This next bit of code remaps the native y values to these indices. Y = emlib.fix_class_labels(Y, args.omitLabels) print('[emCNN]: yAll is %s' % str(np.unique(Y))) print('[emCNN]: will use %0.2f%% of volume' % (100.*np.sum(Y>=0)/numel(Y))) Y = emlib.mirror_edges(Y, tileSize) else: Y = None return X, Y
batchDim = emlib.infer_data_dimensions(netFn) print('[train]: batch shape: %s' % str(batchDim)) if len(args.snapPrefix): outDir = args.snapPrefix else: outDir = str(solverParam.snapshot_prefix) # unicode -> str if not os.path.isdir(outDir): os.mkdir(outDir) #---------------------------------------- # Load and preprocess data set #---------------------------------------- print('[train]: loading file: %s' % args.trainFileName) X = emlib.load_cube(args.trainFileName, np.float32) print('[train]: loading file: %s' % args.labelsFileName) Y = emlib.load_cube(args.labelsFileName, np.float32) # usually we expect fewer slices in Z than pixels in X or Y. # Make sure the dimensions look ok before proceeding. assert (X.shape[0] < X.shape[1]) assert (X.shape[0] < X.shape[2]) # Class labels must be natural numbers (contiguous integers starting at 0) # because they are mapped to indices at the output of the network. # This next bit of code remaps the native y values to these indices. yAll = np.sort(np.unique(Y)) omitLabels = eval(args.omitLabels) yAll = [y for y in yAll if y not in omitLabels] Ytmp = -1 * np.ones(
logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setFormatter(logging.Formatter('[%(asctime)s:%(name)s:%(levelname)s] %(message)s')) logger.addHandler(ch) args = _train_mode_args() # Use command line args to override default args for train_model(). # Note to self: the first co_argcount varnames are the # function's parameters. validArgs = train_model.__code__.co_varnames[0:train_model.__code__.co_argcount] cmdLineArgs = dict_subset(vars(args), validArgs) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # load training and validation volumes #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Xtrain = emlib.load_cube(args.emTrainFile, addChannel=True) Ytrain = emlib.load_cube(args.labelsTrainFile, addChannel=False) Xvalid = emlib.load_cube(args.emValidFile, addChannel=True) Yvalid = emlib.load_cube(args.labelsValidFile, addChannel=False) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # do it #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ model = train_model(Xtrain, Ytrain, Xvalid, Yvalid, log=logger, **cmdLineArgs) # vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
return args if __name__ == "__main__": # setup logging logger = logging.getLogger("deploy_model") logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setFormatter(logging.Formatter('[%(asctime)s:%(name)s:%(levelname)s] %(message)s')) logger.addHandler(ch) args = _deploy_mode_args() # Use command line args to override default args for train_model(). # Note to self: the first co_argcount varnames are the # function's parameters. from train import dict_subset validArgs = deploy_model.__code__.co_varnames[0:deploy_model.__code__.co_argcount] cmdLineArgs = dict_subset(vars(args), validArgs) # load data volume X = emlib.load_cube(args.emFile) # do it Prob = deploy_model(X, args.weightFile, log=logger, **cmdLineArgs) # vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
return args if __name__ == "__main__": # setup logging logger = logging.getLogger("deploy_model") logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setFormatter( logging.Formatter('[%(asctime)s:%(name)s:%(levelname)s] %(message)s')) logger.addHandler(ch) args = _deploy_mode_args() # Use command line args to override default args for train_model(). # Note to self: the first co_argcount varnames are the # function's parameters. from train import dict_subset validArgs = deploy_model.__code__.co_varnames[0:deploy_model.__code__. co_argcount] cmdLineArgs = dict_subset(vars(args), validArgs) # load data volume X = emlib.load_cube(args.emFile) # do it Prob = deploy_model(X, args.weightFile, log=logger, **cmdLineArgs) # vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
def main(args): tileRadius = np.floor(args.tileSize/2) nMiniBatch = 1000 # here, a "mini-batch" specifies LMDB transaction size # make sure we don't clobber an existing output if os.path.exists(args.outDir): raise RuntimeError('Output path "%s" already exists; please move out of the way and try again' % args.outDir) # load the data volumes (EM image and labels, if any) print('[make_lmdb]: loading EM data file: %s' % args.emFileName) X = emlib.load_cube(args.emFileName, np.float32) if args.labelsFileName: print('[make_lmdb]: loading labels file: %s' % args.labelsFileName) Y = emlib.load_cube(args.labelsFileName, np.float32) Y = emlib.fix_class_labels(Y, eval(args.omitLabels)) assert(Y.shape == X.shape) else: print('[make_lmdb]: no labels file; assuming this is a test volume') Y = np.zeros(X.shape) # usually we expect fewer slices in Z than pixels in X or Y. # Make sure the dimensions look ok before proceeding. assert(X.shape[0] < X.shape[1]) assert(X.shape[0] < X.shape[2]) # Identify the subset of the data to use for training. # (default is to use it all) if len(args.slicesExpr): sliceIdx = eval(args.slicesExpr) X = X[sliceIdx, :, :] # python puts the z dimension first... Y = Y[sliceIdx, :, :] X = X.astype(np.uint8) # critical!! otherwise, Caffe just flails... print('[make_lmdb]: EM volume shape: %s' % str(X.shape)) print('[make_lmdb]: yAll is %s' % np.unique(Y)) print('[make_lmdb]: %0.2f%% pixels will be omitted' % (100.0*np.sum(Y==-1)/numel(Y))) print('[make_lmdb]: writing results to: %s' % args.outDir) print('') sys.stdout.flush() # Create the output database. # Multiply the actual size by a fudge factor to get a safe upper bound dbSize = (X.nbytes * args.tileSize * args.tileSize + Y.nbytes) * 10 env = lmdb.open(args.outDir, map_size=dbSize) # Extract all possible tiles. # This corresponds to extracting one "epoch" worth of tiles. tileId = 0 lastChatter = -1 tic = time.time() yCnt = np.zeros(sum(np.unique(Y) >= 0)) if np.any(Y > 0): # generates a balanced training data set (subsamples and shuffles) it = emlib.stratified_interior_pixel_generator(Y, tileRadius, nMiniBatch, omitLabels=[-1]) else: # enumerates all possible tiles in order (no shuffling) it = emlib.interior_pixel_generator(X, tileRadius, nMiniBatch) for Idx, epochPct in it: # respect upper bound on number of examples if tileId > args.maxNumExamples: print('[make_lmdb]: stopping at %d (max number of examples reached\n)' % (tileId-1)) break # Each mini-batch will be added to the database as a single transaction. with env.begin(write=True) as txn: # Translate indices Idx -> tiles Xi and labels yi. for jj in range(Idx.shape[0]): yi = Y[ Idx[jj,0], Idx[jj,1], Idx[jj,2] ] yi = int(yi) a = Idx[jj,1] - tileRadius b = Idx[jj,1] + tileRadius + 1 c = Idx[jj,2] - tileRadius d = Idx[jj,2] + tileRadius + 1 Xi = X[ Idx[jj,0], a:b, c:d ] assert(Xi.shape == (args.tileSize, args.tileSize)) datum = caffe.proto.caffe_pb2.Datum() datum.channels = 1 datum.height = Xi.shape[0] datum.width = Xi.shape[1] datum.data = Xi.tostring() # use tobytes() for newer numpy datum.label = yi strId = '{:08}'.format(tileId) txn.put(strId.encode('ascii'), datum.SerializeToString()) tileId += 1 yCnt[yi] += 1 # check early termination conditions if tileId > args.maxNumExamples: break #if np.floor(epochPct) > lastChatter: print('[make_lmdb] %% %0.2f done (%0.2f min; yCnt=%s)' % ((100*epochPct), (time.time() - tic)/60, str(yCnt))) lastChatter = epochPct
netParam = caffe_pb2.NetParameter() text_format.Merge(open(netFn).read(), netParam) batchDim = emlib.infer_data_dimensions(netFn) if len(args.outFileNameY): outFileNameY = args.outFileNameY else: outFileNameY = os.path.join( os.path.split(args.netFile)[0], 'Yhat_' + os.path.split(args.dataFileName)[-1]) #---------------------------------------- # Load and preprocess data set #---------------------------------------- X = emlib.load_cube(args.dataFileName, np.float32) # mirror edges so that every pixel in the original data set can act # as the center pixel of some tile borderSize = int(batchDim[2] / 2) X = emlib.mirror_edges(X, borderSize) if len(args.evalSliceExpr): # optional: pare down to a subset of slices idx = eval(args.evalSliceExpr) X = X[idx, :, :] # pixels that are sufficiently bright are trivial to classify # and can be omitted. Mask = np.ones(X.shape, dtype=np.bool) Mask[X > args.maxBrightness] = False
print('[deploy]: batch shape: %s' % str(batchDim)) if len(args.outFileNameY): outFileNameY = args.outFileNameY else: outFileNameY = os.path.join( os.path.split(args.dataFileName)[0], 'Yhat_' + os.path.split(args.dataFileName)[-1]) outFileNameX = args.outFileNameX print('[deploy]: probability output file: %s' % outFileNameY) print('[deploy]: features output file: %s' % outFileNameX) #---------------------------------------- # Load and preprocess data set #---------------------------------------- X = emlib.load_cube(args.dataFileName, np.float32) # mirror edges of images so that every pixel in the original data set can act # as a center pixel of some tile borderSize = int(batchDim[2] / 2) X = emlib.mirror_edges(X, borderSize) if len(args.evalSliceExpr): # optional: pare down to a subset of slices idx = eval(args.evalSliceExpr) X = X[idx, :, :] print('[deploy]: data shape: %s' % str(X.shape)) # There may be reasons for not evaluating certain pixels. # The mask allows the caller to specify which pixels to omit. if len(args.maskFileName): Mask = emlib.load_cube(args.maskFileName, dtype=np.bool)
batchDim = emlib.infer_data_dimensions(netFn) print('[deploy]: batch shape: %s' % str(batchDim)) if len(args.outFileNameY): outFileNameY = args.outFileNameY else: outFileNameY = os.path.join(os.path.split(args.dataFileName)[0], 'Yhat_' + os.path.split(args.dataFileName)[-1]) outFileNameX = args.outFileNameX print('[deploy]: probability output file: %s' % outFileNameY) print('[deploy]: features output file: %s' % outFileNameX) #---------------------------------------- # Load and preprocess data set #---------------------------------------- X = emlib.load_cube(args.dataFileName, np.float32) # mirror edges of images so that every pixel in the original data set can act # as a center pixel of some tile borderSize = int(batchDim[2]/2) X = emlib.mirror_edges(X, borderSize) if len(args.evalSliceExpr): # optional: pare down to a subset of slices idx = eval(args.evalSliceExpr) X = X[idx,:,:] print('[deploy]: data shape: %s' % str(X.shape)) # There may be reasons for not evaluating certain pixels. # The mask allows the caller to specify which pixels to omit. if len(args.maskFileName): Mask = emlib.load_cube(args.maskFileName, dtype=np.bool)
args.trainSlices = eval(args.trainSlices) args.validSlices = eval(args.validSlices) args.testSlices = eval(args.testSlices) return args if __name__ == "__main__": args = get_args(); #outDir = os.path.split(args.dataFileName)[0] if not os.path.isdir(args.outDir): os.mkdir(args.outDir) X = emlib.load_cube(args.dataFileName, np.uint8) Y = emlib.load_cube(args.labelsFileName, np.uint8) # remap Y labels from ISBI convention to membrane-vs-non-membrane Y[Y==0] = 1; # membrane Y[Y==255] = 0; # non-membrane # change type of Y so can use -1 as a value. Y = Y.astype(np.int8) Xtrain = X[args.trainSlices,:,:]; Ytrain = Y[args.trainSlices,:,:] Xvalid = X[args.validSlices,:,:]; Yvalid = Y[args.validSlices,:,:] Xtest = X[args.testSlices,:,:]; Ytest = Y[args.testSlices,:,:] # brightness thresholding thresh = mquantiles(np.concatenate((Xtrain[Ytrain==1], Xvalid[Yvalid==1])), args.brightQuant)
args.validSlices = eval(args.validSlices) args.testSlices = eval(args.testSlices) return args if __name__ == "__main__": args = get_args(); #outDir = os.path.split(args.dataFileName)[0] if not os.path.isdir(args.outDir): os.mkdir(args.outDir) X = emlib.load_cube(args.dataFileName, np.uint8) Y = emlib.load_cube(args.labelsFileName, np.uint8) # remap Y labels from ISBI convention to membrane-vs-non-membrane Y[Y==0] = 1; # membrane Y[Y==255] = 0; # non-membrane # change type of Y so can use -1 as a value. Y = Y.astype(np.int8) Xtrain = X[args.trainSlices,:,:]; Ytrain = Y[args.trainSlices,:,:] Xvalid = X[args.validSlices,:,:]; Yvalid = Y[args.validSlices,:,:] Xtest = X[args.testSlices,:,:]; Ytest = Y[args.testSlices,:,:] # brightness thresholding thresh = mquantiles(np.concatenate((Xtrain[Ytrain==1], Xvalid[Yvalid==1])), args.brightQuant)
batchDim = emlib.infer_data_dimensions(netFn) print('[train]: batch shape: %s' % str(batchDim)) if len(args.snapPrefix): outDir = args.snapPrefix else: outDir = str(solverParam.snapshot_prefix) # unicode -> str if not os.path.isdir(outDir): os.mkdir(outDir) #---------------------------------------- # Load and preprocess data set #---------------------------------------- print('[train]: loading file: %s' % args.trainFileName) X = emlib.load_cube(args.trainFileName, np.float32) print('[train]: loading file: %s' % args.labelsFileName) Y = emlib.load_cube(args.labelsFileName, np.float32) # usually we expect fewer slices in Z than pixels in X or Y. # Make sure the dimensions look ok before proceeding. assert(X.shape[0] < X.shape[1]) assert(X.shape[0] < X.shape[2]) # Class labels must be natural numbers (contiguous integers starting at 0) # because they are mapped to indices at the output of the network. # This next bit of code remaps the native y values to these indices. yAll = np.sort(np.unique(Y)) omitLabels = eval(args.omitLabels) yAll = [y for y in yAll if y not in omitLabels] Ytmp = -1*np.ones(Y.shape, dtype=Y.dtype) # default label is -1, which is omitted from evaluation
text_format.Merge(open(netFn).read(), netParam) batchDim = emlib.infer_data_dimensions(netFn) print('[train]: batch shape: %s' % str(batchDim)) if len(args.snapPrefix): outDir = args.snapPrefix else: outDir = str(solverParam.snapshot_prefix) # unicode -> str if not os.path.isdir(outDir): os.mkdir(outDir) #---------------------------------------- # Load and preprocess data set #---------------------------------------- X = emlib.load_cube(args.trainFileName, np.float32) Y = emlib.load_cube(args.labelsFileName, np.float32) # Class labels must be natural numbers (contiguous integers starting at 0) # because they are mapped to indices at the output of the network. # This next bit of code remaps the native y values to these indices. yAll = np.sort(np.unique(Y)) Yhat = np.zeros(Y.shape, dtype=Y.dtype) for yIdx, y in enumerate(yAll): Yhat[Y==y] = yIdx Y = Yhat # mirror edges of images so that every pixel in the original data set can act # as a center pixel of some tile borderSize = int(batchDim[2]/2) X = emlib.mirror_edges(X, borderSize)
netParam = caffe_pb2.NetParameter() text_format.Merge(open(netFn).read(), netParam) batchDim = emlib.infer_data_dimensions(netFn) if len(args.outFileNameY): outFileNameY = args.outFileNameY else: outFileNameY = os.path.join(os.path.split(args.netFile)[0], 'Yhat_' + os.path.split(args.dataFileName)[-1]) #---------------------------------------- # Load and preprocess data set #---------------------------------------- X = emlib.load_cube(args.dataFileName, np.float32) # mirror edges so that every pixel in the original data set can act # as the center pixel of some tile borderSize = int(batchDim[2]/2) X = emlib.mirror_edges(X, borderSize) if len(args.evalSliceExpr): # optional: pare down to a subset of slices idx = eval(args.evalSliceExpr) X = X[idx,:,:] # pixels that are sufficiently bright are trivial to classify # and can be omitted. Mask = np.ones(X.shape, dtype=np.bool) Mask[X > args.maxBrightness] = False