def threshold(hfile, resultsfile): images, labels, keys = data.load_dataset(hfile) for image, label, key in zip(images, labels, keys): methods = filter(lambda f: 'threshold' in f, filters.__all__) for method in methods: if 'adaptive' in method: # skip adaptive for now. continue threshold = getattr(filters, method) # do threshold pred = (image > threshold(image)).astype(int) # save results. if os.path.isfile(resultsfile): mode = 'r+' else: mode = 'w' with h5py.File(resultsfile, mode) as f: # save validation predictions try: g = f[key] except KeyError: g = f.create_group(key) g[method] = pred
def crossval(hfile, resultsfile, crop): """Run LOOCV with reduced hypercolumn features and SGD with linear SVM loss. Read data from hdf5 HFILE with input images and annotations. """ cv = LeaveOneOut() images, labels, keys = data.load_dataset(hfile, cropbar=crop) for train_idx, val_idx in cv.split(images): print('CV iteration {}'.format(val_idx[0])) hc = hypercolumn.ReducedHyperColumn() clf = tensorsgd.TensorSGD() X_train = hc.fit(images[train_idx], verbose=True) clf.fit(X_train, labels[train_idx]) train_pred = clf.predict(X_train) X_val = hc.predict(images[val_idx], verbose=True) val_pred = clf.predict(X_val) if os.path.isfile(resultsfile): mode = 'r+' else: mode = 'w' with h5py.File(resultsfile, mode) as f: # save validation predictions for pred, key in zip(val_pred, keys[val_idx]): try: g = f[key] except KeyError: g = f.create_group(key) g['validation'] = pred # save training predictions for pred, key in zip(train_pred, keys[train_idx]): try: g = f[key] except KeyError: g = f.create_group(key) g['train{}'.format(val_idx)] = pred
def train_pixelnet(dataset, batchsize, npix, max_epochs, validation_steps, run_id, bottleneck): datadir = 'data' datafile = os.path.join(datadir, '{}.h5'.format(dataset)) validation_set_path = os.path.join(datadir, '{}-validation-sets.json'.format(dataset)) validation_set = data.load_validation_set(validation_set_path, run_id) if dataset == 'uhcs': nclasses = 4 cropbar = 38 elif dataset == 'spheroidite': nclasses = 2 cropbar = None model_dir = os.path.join('models', 'crossval', dataset, 'run{:02d}'.format(run_id)) if not os.path.isdir(model_dir): os.makedirs(model_dir) images, labels, names = data.load_dataset(datafile, cropbar=cropbar) images = data.preprocess_images(images, equalize=True, tf=False) # add a channel axis (of size 1 since these are grayscale inputs) images = images[:,:,:,np.newaxis] images = np.repeat(images, 3, axis=-1) images = applications.vgg16.preprocess_input(images) # train/validation split train_idx, val_idx = data.validation_split(validation_set, names) ntrain = len(train_idx) X_train, y_train, names_train = images[train_idx], labels[train_idx], names[train_idx] X_val, y_val, names_val = images[val_idx], labels[val_idx], names[val_idx] inv_freq = y_train.size / np.bincount(y_train.flat) class_weights = np.squeeze(normalize(np.sqrt(inv_freq), order=1)) # don't use alpha-balanced version of focal loss... # class_weights = None focus_param = 2.0 # write the validation set to the model directory as well... with open(os.path.join(model_dir, 'validation_set.txt'), 'w') as vf: for name in names_val: print(name, file=vf) N, h, w, _ = images.shape steps_per_epoch = int(ntrain / batchsize) print('steps_per_epoch: {}'.format(steps_per_epoch)) max_epochs = 25 validation_steps = 10 base_model = vgg.fully_conv_model() layernames = [ 'block1_conv2_relu', 'block2_conv2_relu', 'block3_conv3_relu', 'block4_conv3_relu', 'block5_conv3_relu', 'fc2_relu' ] hc = hypercolumn.build_model(base_model, layernames, batchnorm=True, mode='sparse', relu=False) model = pixelnet.build_model(hc, nclasses=nclasses, width=1024, mode='sparse', dropout_rate=0.1, l2_reg=0.0) opt = adamw.AdamW(lr=1e-3, weight_decay=5e-4, amsgrad=True) for layer in base_model.layers: layer.trainable = False # model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc']) model.compile( loss=losses.focal_crossentropy_loss(focus_param=focus_param, class_weights=class_weights), optimizer=opt, metrics=['acc'] ) csv_logger = callbacks.CSVLogger(os.path.join(model_dir, 'training-1.log')) checkpoint = callbacks.ModelCheckpoint( os.path.join( model_dir, 'weights.{epoch:03d}-{val_loss:.4f}.hdf5' ), save_best_only=False, save_weights_only=True, period=25 ) training_data = px_utils.random_pixel_samples( X_train, y_train, nclasses=nclasses, replace_samples=False, horizontal_flip=True, vertical_flip=True, rotation_range=360, zoom_range=0.5, intensity_shift=0.05 ) f = model.fit_generator( training_data, steps_per_epoch, epochs=max_epochs, callbacks=[csv_logger, checkpoint], validation_data=px_utils.random_pixel_samples(X_val, y_val, nclasses=nclasses, replace_samples=False), validation_steps=validation_steps, ) for layer in base_model.layers: layer.trainable = True # fine-tune the whole network opt = adamw.AdamW(lr=1e-5, weight_decay=5e-4, amsgrad=True) model.compile( loss=losses.focal_crossentropy_loss(focus_param=focus_param, class_weights=class_weights), optimizer=opt, metrics=['acc'] ) csv_logger = callbacks.CSVLogger(os.path.join(model_dir, 'finetune-1.log')) checkpoint = callbacks.ModelCheckpoint( os.path.join( model_dir, 'weights-finetune.{epoch:03d}-{val_loss:.4f}.hdf5' ), save_best_only=False, save_weights_only=True, period=25 ) f = model.fit_generator( training_data, steps_per_epoch, epochs=max_epochs, callbacks=[csv_logger, checkpoint], validation_data=px_utils.random_pixel_samples(X_val, y_val, nclasses=nclasses, replace_samples=False), validation_steps=validation_steps, )
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, EarlyStopping import sys sys.path.append(os.getcwd()) sys.path.append('../uhcs-segment') from pixelnet.pixelnet import pixelnet_model from pixelnet.utils import random_pixel_samples from uhcsseg import data # suppress some of the noisier tensorflow log messages os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' if __name__ == '__main__': datafile = '../uhcs-segment/data/uhcs.h5' images, labels, names = data.load_dataset(datafile, cropbar=38) print(images.shape) images = data.preprocess_images(images) # add channel axis images = images[:, :, :, np.newaxis] N, h, w, _ = images.shape batchsize = 4 ntrain = 20 npix = 2048 nclasses = 4 steps_per_epoch = ntrain * h * w / (batchsize * npix) # steps_per_epoch = 100 print('steps_per_epoch:')
def train_pixelnet(dataset, batchsize, npix, max_epochs, validation_steps, run_id, bottleneck): datadir = 'data' datafile = os.path.join(datadir, '{}.h5'.format(dataset)) validation_set_path = os.path.join( datadir, '{}-validation-sets.json'.format(dataset)) validation_set = data.load_validation_set(validation_set_path, run_id) if dataset == 'uhcs': nclasses = 4 cropbar = 38 elif dataset == 'spheroidite': nclasses = 2 cropbar = None model_dir = os.path.join('models', 'crossval', dataset, 'run{:02d}'.format(run_id)) os.makedirs(model_dir, exist_ok=True) images, labels, names = data.load_dataset(datafile, cropbar=cropbar) images = data.preprocess_images(images) # add a channel axis (of size 1 since these are grayscale inputs) images = images[:, :, :, np.newaxis] # train/validation split train_idx, val_idx = data.validation_split(validation_set, names) ntrain = len(train_idx) X_train, y_train, names_train = images[train_idx], labels[ train_idx], names[train_idx] X_val, y_val, names_val = images[val_idx], labels[val_idx], names[val_idx] class_weight = np.array([ y_train.size / np.sum(y_train == label) for label in np.unique(y_train) ]) print(class_weight) # write the validation set to the model directory as well... with open(os.path.join(model_dir, 'validation_set.txt'), 'w') as vf: for name in names_val: print(name, file=vf) N, h, w, _ = images.shape steps_per_epoch = 5 # steps_per_epoch = 100 # steps_per_epoch = ntrain * h * w / (batchsize*npix) print('steps_per_epoch: {}'.format(steps_per_epoch)) opt = optimizers.Adam(lr=1e-3) model = pixelnet_model(nclasses=nclasses, bottleneck=bottleneck) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc']) csv_logger = CSVLogger(os.path.join(model_dir, 'training-1.log')) checkpoint = ModelCheckpoint(os.path.join( model_dir, 'weights.{epoch:03d}-{val_loss:.4f}.hdf5'), save_best_only=False, save_weights_only=True, period=10) early_stopping = EarlyStopping(monitor='val_loss', patience=3) training_data = random_pixel_samples(X_train, y_train, nclasses=nclasses, replace_samples=False, rotation_range=360, zoom_range=0.5, horizontal_flip=True, vertical_flip=True, intensity_shift=0.05) # note: keras/engine/training.py:L132 --> is not None f = model.fit_generator(training_data, steps_per_epoch, epochs=max_epochs, callbacks=[csv_logger, checkpoint], validation_data=random_pixel_samples( X_val, y_val, nclasses=nclasses, replace_samples=False), validation_steps=validation_steps, class_weight=class_weight) # load best model and evaluate # sort by epoch -- use with ModelCheckpoint(..., save_best_only=True) # file path format should be 'weights.{epoch}...' weights_files = glob.glob(os.path.join(model_dir, 'weights*.hdf5')) best_weights = sorted(weights_files)[-1] # re-instantiate model because of keras requirement that tensors # have the same shape at train and test time model = pixelnet_model(nclasses=nclasses, inference=True, bottleneck=bottleneck) model.load_weights(best_weights) for X, y in [(X_train, y_train), (X_val, y_val)]: # run with batch_size=1 for inference due to dense feature upsampling p_validate = model.predict(X, batch_size=1) pred = np.argmax(p_validate, axis=-1) # measure accuracy over the whole validation set print('accuracy: {}'.format(perf.accuracy(pred, y))) print('IU_avg: {}'.format(perf.IU_avg(pred, y))) print('IU') for c in range(nclasses): iu = perf.IU(pred, y, c) print('IU({}): {}'.format(c, iu))