def view_mistakes(): foldername = 'myval' id_to_pred = get_decisions( foldername, '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binary13b') fileids = sb.clean_glob( glob.glob( '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/%s/images/*.png' % foldername)) # imgs = sd.get_salt_labels(folder='myval') # imgs = np.reshape(imgs, (imgs.shape[0], 101,101)) imgs = sd.get_salt_images(folder='myval') val_pix_num = sd.salt_pixel_num(folder='myval') valY = np.array(val_pix_num > 0).astype(int) mistake_i = 0 for file_id_i, file_id in enumerate(fileids): fileid, file_extension = os.path.splitext(file_id) if valY[file_id_i] != id_to_pred[fileid]: imgs[mistake_i, :, :] = imgs[file_id_i, :, :] mistake_i += 1 print('made %i mistakes' % mistake_i) X = np.swapaxes(np.swapaxes(imgs[:mistake_i, :, :], 0, 2), 0, 1).astype(float) fig, ax = plt.subplots(1, 1) tracker = ScrollThruPlot(ax, X, fig) fig.canvas.mpl_connect('scroll_event', tracker.onscroll) plt.show()
def eval_masks( outpath='/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/predictions/myval/' ): valX = get_salt_images(folder='myval') fileids = sb.clean_glob( glob.glob( '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/myval/images/*.png' )) model_dir = '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binarypix1' model = tf.estimator.Estimator(model_fn, model_dir=model_dir) input_fn = tf.estimator.inputs.numpy_input_fn( x={'images': valX[:384, :, :, :]}, batch_size=batch_size, shuffle=False) gen = model.predict(input_fn) for file_i, prediction in enumerate(tqdm(gen, total=384)): p_label = prediction['mask'] pred = np.array(p_label).reshape((101, 101)) plt.imsave(outpath + fileids[file_i], pred, cmap=cm.gray) # now get the tail input_fn = tf.estimator.inputs.numpy_input_fn( x={'images': valX[-32:, :, :, :]}, batch_size=batch_size, shuffle=False) gen = model.predict(input_fn) for file_i, prediction in enumerate(gen): fileid = fileids[404 - 32 + file_i] p_label = prediction['mask'] pred = np.array(p_label).reshape((101, 101)) plt.imsave(outpath + fileid, pred, cmap=cm.gray)
def kaggle_test( outpath='/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/predictions/' ): testX = get_salt_images(folder='test') fileids = sb.clean_glob( glob.glob( '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/test/images/*.png' )) model_dir = '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binarypix1' model = tf.estimator.Estimator(model_fn, model_dir=model_dir) input_fn = tf.estimator.inputs.numpy_input_fn( x={'images': testX[:17984, :, :, :]}, batch_size=batch_size, shuffle=False) gen = model.predict(input_fn) id_to_pred = np.load( '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binary1/test_bin_pred.npy' ).tolist() with open(outpath + 'binarypix2.csv', 'a') as fd: fd.write('id,rle_mask\n') for file_i, prediction in enumerate(tqdm(gen, total=17984)): fileid, file_extension = os.path.splitext(fileids[file_i]) p_label = prediction['mask'] pred = np.array(p_label).reshape( (101, 101)).transpose().reshape(101**2) * id_to_pred[fileid] fd.write('%s,%s\n' % (fileid, myrlestring(pred))) # now get the tail input_fn = tf.estimator.inputs.numpy_input_fn( x={'images': testX[-32:, :, :, :]}, batch_size=batch_size, shuffle=False) gen = model.predict(input_fn) for file_i, prediction in enumerate(gen): if file_i >= 16: fileid, file_extension = os.path.splitext(fileids[18000 - 32 + file_i]) p_label = prediction['mask'] pred = np.array(p_label).reshape( (101, 101)).transpose().reshape(101** 2) * id_to_pred[fileid] fd.write('%s,%s\n' % (fileid, myrlestring(pred)))
def get_decisions(foldername='test', model_dir=None, outfile=None): """ change the first two vars to run on different sets """ valX = get_salt_images(folder=foldername) fileids = sb.clean_glob( glob.glob( '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/%s/images/*.png' % foldername)) setsize = len(fileids) headsz = int(setsize / float(batch_size)) * batch_size input_fn = tf.estimator.inputs.numpy_input_fn( x={'images': valX[:headsz, :, :, :]}, batch_size=batch_size, shuffle=False) # '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binary1' bin_model = tf.estimator.Estimator(model_fn, model_dir=model_dir) gen = bin_model.predict(input_fn) id_to_pred = {} for file_i, prediction in enumerate(tqdm(gen, total=headsz)): fileid, file_extension = os.path.splitext(fileids[file_i]) id_to_pred[fileid] = prediction # now get the tail input_fn = tf.estimator.inputs.numpy_input_fn( x={'images': valX[-batch_size:, :, :, :]}, batch_size=batch_size, shuffle=False) gen = bin_model.predict(input_fn) for file_i, prediction in enumerate(gen): idx = setsize - batch_size + file_i fileid, file_extension = os.path.splitext(fileids[idx]) id_to_pred[fileid] = prediction # '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binary1/test_bin_pred' if outfile: np.save(outfile, id_to_pred) return id_to_pred
def kaggle_summary( outpath='/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/predictions/myval/' ): valX = get_salt_images(folder='myval') valY = get_salt_labels(folder='myval') fileids = sb.clean_glob( glob.glob( '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/myval/images/*.png' )) model_dir = '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binarypix1' model = tf.estimator.Estimator(model_fn, model_dir=model_dir) input_fn = tf.estimator.inputs.numpy_input_fn( x={'images': valX[:384, :, :, :]}, batch_size=batch_size, shuffle=False) gen = model.predict(input_fn) id_to_pred = np.load( '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binary13b/val_bin_pred.npy' ).tolist() threshes = np.array( [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]) tps = np.zeros(threshes.shape[0]) fps = np.zeros(threshes.shape[0]) fns = np.zeros(threshes.shape[0]) def tp_fp_fn_calc(gt, predicted, tps, fps, fns): component1 = np.array(predicted).astype(bool) component2 = gt.astype(bool) overlap = component1 * component2 # Logical AND union = component1 + component2 # Logical OR iou = overlap.sum() / float(union.sum() + 1e-5) mask_present_gt = np.any(gt > 0) # these metrics methods return a tuple container if mask_present_gt: if np.all(predicted < 1): fns += np.ones(threshes.shape[0]) else: tps += (threshes < iou).astype(int) else: if np.any(predicted > 0): fps += np.ones(threshes.shape[0]) for file_i, prediction in enumerate(tqdm(gen, total=384)): fileid, file_extension = os.path.splitext(fileids[file_i]) p_label = prediction['mask'] p_label = np.array(p_label) * id_to_pred[fileid] tp_fp_fn_calc(valY[file_i, :], p_label, tps, fps, fns) # now get the tail input_fn = tf.estimator.inputs.numpy_input_fn( x={'images': valX[-32:, :, :, :]}, batch_size=batch_size, shuffle=False) gen = model.predict(input_fn) for file_i, prediction in enumerate(gen): idx = 404 - 32 + file_i fileid, file_extension = os.path.splitext(fileids[idx]) p_label = prediction['mask'] p_label = np.array(p_label) * id_to_pred[fileid] tp_fp_fn_calc(valY[idx, :], p_label, tps, fps, fns) precisions = tps / (tps + fps + fns) avg_precision = precisions.sum() / threshes.shape[0] print('%d masks in dataset' % np.sum(valY.sum(axis=1) > 0)) for idx, thresh in enumerate(threshes): print('tp at %f: %f' % (thresh, tps[idx])) print('fp at %f: %f' % (thresh, fps[idx])) print('fn at %f: %f' % (thresh, fns[idx])) print('precisions at %f: %f' % (thresh, precisions[idx])) print('avg precision: %f' % (avg_precision))