def train_valid_split(): df = load_from_cache('train_df_fixed') df_256 = load_from_cache('train_df_256_fixed') df_256['id'] = df.loc[df_256['image_id'].values, 'id'].values train_ids = df['id'][::2] valid_ids = df['id'][1::2] train_df = df_256.loc[df_256['id'].isin(train_ids)] valid_df = df_256.loc[df_256['id'].isin(valid_ids)] save_to_cache(train_df, 'train_final_df') save_to_cache(valid_df, 'valid_df')
def main(): config = DsbConfig() df = load_from_cache('train_df_fixed') df_cut = resave(df, config) df_cut['shape_id'] = df.loc[df_cut['image_id'].values, 'shape_id'].values save_to_cache(df_cut, 'train_df_256_fixed') for df_name in ['2009isbi', 'TNBC', 'weebly']: df = load_from_cache(df_name) df_cut = resave(df, config) save_to_cache(df_cut, '{}_256'.format(df_name))
def get_mask_scale(df_name, config, weight_dir, save_name, tag='half', n_jobs=18): fl_name = os.path.join(weight_dir, df_name + '_' + tag) if os.path.exists('{}.dat'.format(fl_name)): preds_df = load_from_cache(fl_name) else: fl_names = glob.glob(os.path.join(weight_dir, '{}_{}_[0-9+].dat'.format(df_name, tag)))+\ glob.glob(os.path.join(weight_dir, '{}_{}_[0-9][0-9].dat'.format(df_name, tag))) preds_df = load_from_cache_multi(fl_name, nb=len(fl_names)) print(preds_df.shape) save_dir = os.path.join(weight_dir, save_name) os.path.exists(save_dir) df = load_from_cache(df_name) masks0_scale = Parallel(n_jobs)(delayed(postprocess)(np.concatenate( preds_df.loc[ind, ['mask', 'ly', 'lx', 'ldr', 'ldl']], -1), config) for ind in preds_df.index) masks0_scale = Parallel(n_jobs)(delayed(renumber_mask)(mask) for mask in masks0_scale) preds_df['pred0_scale'] = [x[0].astype('int16') for x in masks0_scale] masks_scale = Parallel(n_jobs)(delayed(modify_w_unet)( preds_df.loc[ind, 'pred0_scale'], np.concatenate(preds_df.loc[ind, ['mask', 'ly', 'lx', 'ldr', 'ldl']], -1)) for ind in preds_df.index) preds_df['pred_scale'] = [x.astype('int16') for x in masks_scale] masks = Parallel(n_jobs)( delayed(cv2.resize)(preds_df.loc[ind, 'pred_scale'], dsize=(df.loc[ind, 'shape'][1], df.loc[ind, 'shape'][0]), interpolation=cv2.INTER_NEAREST) for ind in df.index) preds_df['pred'] = [x.astype('int16') for x in masks] if tag != 'two': save_to_cache(preds_df, os.path.join(weight_dir, '{}_{}'.format(df_name, tag))) else: save_to_cache_multi( preds_df, os.path.join(weight_dir, '{}_{}'.format(df_name, tag), '{}_{}'.format(df_name, tag)), 10)
def get_masks(): ############################################################################### boundary_dir = '/home/work/dsb/ext/2009isbi/segmented-lpc-boundary' save_dir = '/home/work/dsb/ext/2009isbi/segmented-lpc-mask' ################################################################################ os.path.exists(boundary_dir) folders = next(os.walk(boundary_dir))[1] masks = [] for folder in folders: fls = next(os.walk(os.path.join(boundary_dir, folder)))[2] fls = sorted(fls, key=lambda x: int(x[:-4].split('-')[-1])) for fl in fls: pass id = os.path.basename(fl)[:-4] filepath = os.path.join(boundary_dir, folder, fl) boundary = cv2.imread(filepath, 0) mask = boundary2mask(boundary) check_sanity(mask, boundary, folder, fl) plt.imsave( os.path.join(save_dir, folder, '{}_mask.png'.format(id)), mask) masks.append({'id': '{}_{}'.format(folder, id), 'mask': mask}) masks = pd.DataFrame(masks) df = load_from_cache('2009isbi') df = pd.merge(df, masks, how='left', on='id') df = df.dropna(0) df['nb_instance'] = df['mask'].apply(lambda x: x.max()) save_to_cache(df, '2009isbi')
def main(): config = DsbConfig() df = load_from_cache('train_df_256_clean') labels0 = get_labels(config.IMAGE_SHAPE[0]) coords = get_coords(config.IMAGE_SHAPE[0]) for nb in np.arange(0,4000,50): mask = df.loc[nb, 'mask'] start = time.time() t0 = mask_localpos(mask, 'all') time0 = time.time()-start t = mask_localpos_new(mask, 'all', labels0, coords) time1 = time.time()-start-time0 print(nb, np.allclose(t, t0), 'time_ratio:{}'.format(k2(time0/time1,4))) nb = 800 mask_local = mask_localpos(mask, tp='all') ax = get_ax(2, 3, 8) ax[0,0].imshow(df.loc[nb,'image']) ax[0,1].imshow(df.loc[nb,'mask']) ax[0,2].imshow(mask_local[:,:,0]) ax[1,0].imshow(mask_local[:,:,1]) ax[1,1].imshow(mask_local[:,:,2]) ax[1,2].imshow(mask_local[:,:,3])
def get_cons_scale(df_name, weight_dir, tag='half', n_jobs=18): fl_name = os.path.join(weight_dir, '{}_{}'.format(df_name, tag)) if tag != 'two': preds_df = load_from_cache(fl_name) else: fl_names = glob.glob(os.path.join(weight_dir, '{}_{}'.format(df_name, tag), '{}_{}_[0-9+].dat'.format(df_name, tag)))+\ glob.glob(os.path.join(weight_dir, '{}_{}'.format(df_name, tag), '{}_{}_[0-9][0-9].dat'.format(df_name, tag))) preds_df = load_from_cache_multi(os.path.join( weight_dir, '{}_{}'.format(df_name, tag), '{}_{}'.format(df_name, tag)), nb=len(fl_names)) print(preds_df.shape) cons_total = Parallel(n_jobs)(delayed(get_cons_local_valid)( preds_df.loc[ind, 'pred_scale'], np.concatenate(preds_df.loc[ind, ['mask', 'ly', 'lx', 'ldr', 'ldl']], -1)) for ind in preds_df.index) preds_df['con'] = cons_total if tag != 'two': save_to_cache(preds_df, os.path.join(weight_dir, '{}_{}'.format(df_name, tag))) else: save_to_cache_multi( preds_df, os.path.join(weight_dir, '{}_{}'.format(df_name, tag), '{}_{}'.format(df_name, tag)), 10)
def get_mask(df_name, config, weight_dir, save_name, n_jobs=18): save_dir = os.path.join(weight_dir, save_name) os.path.exists(save_dir) #df = load_from_cache(df_name) fl_name = os.path.join(weight_dir, df_name) if os.path.exists('{}.dat'.format(fl_name)): preds_df = load_from_cache(fl_name) else: fl_names = glob.glob( os.path.join(weight_dir, '{}_[0-9+].dat'.format(df_name))) preds_df = load_from_cache_multi(fl_name, nb=len(fl_names)) print(preds_df.shape) masks0 = Parallel(n_jobs)(delayed(postprocess)(np.concatenate( preds_df.loc[ind, ['mask', 'ly', 'lx', 'ldr', 'ldl']], -1), config) for ind in preds_df.index) masks0 = Parallel(n_jobs)(delayed(renumber_mask)(mask) for mask in masks0) masks0 = [x[0] for x in masks0] preds_df['pred0'] = masks0 masks = Parallel(n_jobs)(delayed(modify_w_unet)( preds_df.loc[ind, 'pred0'], np.concatenate(preds_df.loc[ind, ['mask', 'ly', 'lx', 'ldr', 'ldl']], -1)) for ind in preds_df.index) preds_df['pred'] = masks save_to_cache(preds_df, os.path.join(weight_dir, df_name))
def predict_test_rcnn_half(df_name, weight_dir, weight_fl=None): config = DsbConfig() config.BATCH_SIZE = 1 config.GPU_COUNT = 1 unet_ratio, tp = 1, 'all' df = load_from_cache(df_name) df['shape'] = df['image'].apply( lambda x: (x.shape[0] // 2, x.shape[1] // 2, x.shape[2])) output_names = ['mask', 'ly', 'lx', 'ldr', 'ldl'] nb_outputs = [0, 9, 10, 11, 12] preds_df = pd.DataFrame(index=df.index, columns=output_names) vals = np.unique(df['shape']) if weight_fl is None: weight_fls = glob.glob(os.path.join(weight_dir, '*.hdf5')) weight_fls = sorted( weight_fls, key=lambda x: float(os.path.basename(x)[:-5].split('_')[-1])) weight_fl = weight_fls[0] for nb, shape in enumerate(vals): ind_shape = df[df['shape'] == shape].index new_shape = 64 * int(np.ceil(max(shape) / 64)) print('{}/{}'.format(nb, len(vals)), len(ind_shape), shape, new_shape) model = model_rcnn(tp, unet_ratio, 'sgd', config, weight_dir, new_shape) #model = model_unet(new_shape, unet_ratio, tp, config=config) model.load_weights(weight_fl) model.compile(1e-3) images = np.stack([ cv2.resize(image, dsize=(shape[1], shape[0]), interpolation=cv2.INTER_LINEAR) for image in df.loc[ind_shape, 'image'] ], 0) if (new_shape, new_shape) != shape[:2]: y1, x1 = (new_shape - images.shape[1]) // 2, (new_shape - images.shape[2]) // 2 y2, x2 = new_shape - images.shape[ 1] - y1, new_shape - images.shape[2] - x1 images = np.pad(images, ((0, 0), (y1, y2), (x1, x2), (0, 0)), mode='constant', constant_values=0) else: y1, x1, y2, x2 = 0, 0, 0, 0 inputs = get_inputs_rcnn(images) y_preds = model.keras_model.predict(inputs, batch_size=1, verbose=1) for nb_output, output_name in zip(nb_outputs, output_names): y_pred = y_preds[nb_output][:, :, :, :1] if (new_shape, new_shape) != shape: y_pred = y_pred[:, y1:new_shape - y2, x1:new_shape - x2, :1] preds_df.loc[ind_shape, output_name] = list(y_pred.astype('float16')) save_to_cache(preds_df, os.path.join(weight_dir, '{}_half'.format(df_name)))
def main(): ################################################################################ weight_dir = '/media/work/Data/dsb/cache/UnetRCNN_180410-221747' ################################################################################ df_name = 'stage2_df' df = load_from_cache(df_name) tags = ['quarter', 'half', None, 'two'] preds = [] for tag in tags: if tag is None: fl = os.path.join(weight_dir, '{}.dat'.format(df_name)) pred = load_file(fl) elif tag == 'two': fl_names = glob.glob(os.path.join(weight_dir, '{}_{}'.format(df_name, tag), '{}_{}_[0-9+].dat'.format(df_name, tag)))+\ glob.glob(os.path.join(weight_dir, '{}_{}'.format(df_name, tag), '{}_{}_[0-9][0-9].dat'.format(df_name, tag))) pred = load_from_cache_multi(os.path.join(weight_dir, '{}_{}'.format(df_name, tag), '{}_{}'.format(df_name, tag)), nb=len(fl_names)) else: fl = os.path.join(weight_dir, '{}_{}.dat'.format(df_name,tag)) pred = load_file(fl) preds.append(pred) nb_fls = len(tags) results = [] for ind in df.index: masks = [pred.loc[ind, 'pred'] for pred in preds] scores = [pred.loc[ind, 'con'] for pred in preds] res={} for key, vals in zip(np.arange(nb_fls),scores): for nb in range(len(vals)): res['{}_{}'.format(key, nb)] = vals[nb] res = pd.Series(res).sort_values() res = res[res<0.2] mask = np.zeros_like(masks[0], dtype='int16') val = 1 for ind_res in res.index: size, label = ind_res.split('_') size, label = int(size), int(label) index = masks[size]==label+1 if (np.sum(mask[index]>0)/np.sum(index))<0.5: mask[(index)&(mask==0)] = val val = val+1 results.append(mask) preds_df = pd.DataFrame(index = df.index) preds_df['pred'] = results save_to_cache(preds_df, os.path.join(weight_dir, 'preds_df_scale_01')) make_submission(preds_df)
def get_cons(df_name, weight_dir, n_jobs=18): fl_name = os.path.join(weight_dir, df_name) preds_df = load_from_cache(fl_name) print(preds_df.shape) cons_total = Parallel(n_jobs)(delayed(get_cons_local_valid)( preds_df.loc[ind, 'pred'], np.concatenate(preds_df.loc[ind, ['mask', 'ly', 'lx', 'ldr', 'ldl']], -1)) for ind in preds_df.index) preds_df['con'] = cons_total save_to_cache(preds_df, os.path.join(weight_dir, '{}'.format(df_name)))
def train_generator(config, shuffle=False, augment=False): isbi = load_from_cache('2009isbi_256') weebly = load_from_cache('weebly_256') tnbc = load_from_cache('TNBC_256') train = load_from_cache('train_final_df') gen_isbi = data_generator_multi(np.stack(isbi['image'], 0), np.stack(isbi['mask'], 0), config, shuffle=shuffle, augment=augment, batch_size=1, tp_value=7) gen_weebly = data_generator_multi(np.stack(weebly['image'], 0), np.stack(weebly['mask'], 0), config, shuffle=shuffle, augment=augment, batch_size=1) gen_tnbc = data_generator_multi(np.stack(tnbc['image'], 0), np.stack(tnbc['mask'], 0), config, shuffle=shuffle, augment=augment, batch_size=1) gen_train = data_generator_multi(np.stack(train['image'], 0), np.stack(train['mask'], 0), config, shuffle=shuffle, augment=augment, batch_size=1) images = np.zeros((config.BATCH_SIZE, ) + tuple(config.IMAGE_SHAPE), dtype='float32') masks = np.zeros((config.BATCH_SIZE, ) + tuple(config.IMAGE_SHAPE[:2]), dtype='int32') while True: for nb, gen in enumerate([gen_isbi, gen_weebly, gen_tnbc, gen_train]): image, mask = next(gen) images[nb] = image[0] masks[nb] = mask[0] yield images, masks
def make_submission(preds_df): df = load_from_cache('stage2_df') result = [] for ind in preds_df.index: mask = preds_df.loc[ind, 'pred'] assert len(np.unique(mask))==mask.max()+1 result.append(list(prob_to_rles(mask))) new_test_ids=[] rles=[] for n, id_ in enumerate(df['id']): rles.extend(result[n]) new_test_ids.extend([id_]*len(result[n])) sub = pd.DataFrame() sub['ImageId'] = new_test_ids sub['EncodedPixels'] = pd.Series(rles).apply(lambda x: ' '.join(str(y) for y in x)) sub.to_csv(os.path.join('..//cache','sub-scale3.csv'), index=False)
def train_1s(): config = DsbConfig() tp, unet_ratio, opt = 'all', 1, 'sgd' save_dir = makefolder('..//cache//UnetRCNN', tosecond=True) weight_fl = '../cache/mask_rcnn_coco.h5' valid_df = load_from_cache('valid_df') valid_images = np.stack(valid_df['image'], 0) valid_masks = np.stack(valid_df['mask'], 0) #print(len(valid_masks)) model = UnetRCNN(tp, unet_ratio, opt, config, save_dir) model.load_weights(weight_fl, by_name=True) train_gen = train_generator(config, shuffle=True, augment=True) tr_ms1 = generator_1s(train_gen, config, tp) val_generator = data_generator_multi(valid_images, valid_masks, config, shuffle=False, augment=False) val_ms1 = generator_1s(val_generator, config, tp) #model.train_generator(tr_ms1, val_ms1, 1e-2, 1, 'head') model.train_generator(tr_ms1, val_ms1, 1e-3, 100, 'all')
def predict_test_rcnn(df_name, weight_dir, weight_fl=None): config = DsbConfig() config.BATCH_SIZE = 1 config.GPU_COUNT = 1 max_shape = 1024 unet_ratio, tp = 1, 'all' df = load_from_cache(df_name) output_names = ['mask', 'ly', 'lx', 'ldr', 'ldl'] nb_outputs = [0, 9, 10, 11, 12] preds_df = pd.DataFrame(index=df.index, columns=output_names) vals = np.unique(df['shape']) if weight_fl is None: weight_fls = glob.glob(os.path.join(weight_dir, '*.hdf5')) weight_fls = sorted( weight_fls, key=lambda x: float(os.path.basename(x)[:-5].split('_')[-1])) weight_fl = weight_fls[0] for nb, shape in enumerate(vals): ind_shape = df[df['shape'] == shape].index if max(shape) <= max_shape: new_shape = 64 * int(np.ceil(max(shape) / 64)) else: new_shape = 512 * int(np.ceil(max(shape) / 512)) print('{}/{}'.format(nb, len(vals)), len(ind_shape), shape, new_shape) model = model_rcnn(tp, unet_ratio, 'sgd', config, weight_dir, min(new_shape, max_shape)) #model = model_unet(new_shape, unet_ratio, tp, config=config) model.load_weights(weight_fl) model.compile(1e-3) images = np.stack(df.loc[ind_shape, 'image'], 0) if (new_shape, new_shape) != shape: y1, x1 = (new_shape - images.shape[1]) // 2, (new_shape - images.shape[2]) // 2 y2, x2 = new_shape - images.shape[ 1] - y1, new_shape - images.shape[2] - x1 images = np.pad(images, ((0, 0), (y1, y2), (x1, x2), (0, 0)), mode='constant', constant_values=0) else: y1, x1, y2, x2 = 0, 0, 0, 0 X = get_inputs_rcnn(images) if new_shape > max_shape: nb_cut = int(np.ceil(new_shape / 512)) - 1 y_preds = [np.zeros((images.shape[0], new_shape, new_shape, 1), dtype='float32')]+\ [np.zeros((images.shape[0], new_shape, new_shape, 2), dtype='float32') for _ in range(12)] for nb_y in range(nb_cut): start_y, end_y = 512 * nb_y, 512 * (nb_y + 2) shift_start_y = 0 if nb_y == 0 else 256 shift_end_y = 0 if nb_y == nb_cut - 1 else -256 for nb_x in range(nb_cut): start_x, end_x = 512 * nb_x, 512 * (nb_x + 2) shift_start_x = 0 if nb_x == 0 else 256 shift_end_x = 0 if nb_x == nb_cut - 1 else -256 print(start_y, end_y, start_x, end_x) print(shift_start_y, shift_end_y, shift_start_x, shift_end_x) X_nb = [ X[0][:, 4 * start_y:4 * end_y, 4 * start_x:4 * end_x], X[1][:, start_y:end_y, start_x:end_x] ] preds = model.keras_model.predict(X_nb, batch_size=1, verbose=1) for nb_output in range(13): y_preds[nb_output][:, start_y+shift_start_y:end_y+shift_end_y, start_x+shift_start_x:end_x+shift_end_x]=\ preds[nb_output][:,shift_start_y:max_shape+shift_end_y, shift_start_x:max_shape+shift_end_x] else: y_preds = model.keras_model.predict(X, batch_size=1, verbose=1) for nb_output, output_name in zip(nb_outputs, output_names): y_pred = y_preds[nb_output][:, :, :, :1] if (new_shape, new_shape) != shape: y_pred = y_pred[:, y1:new_shape - y2, x1:new_shape - x2, :1] preds_df.loc[ind_shape, output_name] = list(y_pred.astype('float16')) if len(preds_df) > 500: save_to_cache_multi(preds_df, os.path.join(weight_dir, df_name), len(preds_df) // 500 + 1) else: save_to_cache(preds_df, os.path.join(weight_dir, df_name))
def predict_test_rcnn_two(df_name, weight_dir, weight_fl=None, start=0, end=100, start_run=0): config = DsbConfig() config.BATCH_SIZE = 1 config.GPU_COUNT = 1 max_shape = 1024 unet_ratio, tp = 1, 'all' df = load_from_cache(df_name) df['shape'] = df['image'].apply( lambda x: (x.shape[0] * 2, x.shape[1] * 2, x.shape[2])) output_names = ['mask', 'ly', 'lx', 'ldr', 'ldl'] nb_outputs = [0, 9, 10, 11, 12] vals = np.unique(df['shape']) if weight_fl is None: weight_fls = glob.glob(os.path.join(weight_dir, '*.hdf5')) weight_fls = sorted( weight_fls, key=lambda x: float(os.path.basename(x)[:-5].split('_')[-1])) weight_fl = weight_fls[0] for nb, shape in enumerate(vals): if (nb < start) | (nb >= end): continue if max(shape) <= max_shape: new_shape = 64 * int(np.ceil(max(shape) / 64)) else: new_shape = 512 * int(np.ceil(max(shape) / 512)) ind_shape = df[df['shape'] == shape].index print('{}/{}'.format(nb, len(vals)), len(ind_shape), shape, new_shape) if len(ind_shape) * (new_shape // 512)**2 > 800: nb_run = (len(ind_shape) * (new_shape // 512)**2) // 800 + 1 size = int(len(ind_shape) / nb_run) + 1 ind_shape0 = ind_shape.copy() else: nb_run = 1 size = len(ind_shape) for run in range(nb_run): if run < start_run: continue if nb_run != 1: start, end = run * size, min((run + 1) * size, len(ind_shape0)) ind_shape = ind_shape0[start:end] preds_df = pd.DataFrame(index=df.index[ind_shape], columns=output_names) model = model_rcnn(tp, unet_ratio, 'sgd', config, weight_dir, min(new_shape, max_shape)) #model = model_unet(new_shape, unet_ratio, tp, config=config) model.load_weights(weight_fl) model.compile(1e-3) images = np.stack([ cv2.resize(image, dsize=(shape[1], shape[0]), interpolation=cv2.INTER_LINEAR) for image in df.loc[ind_shape, 'image'] ], 0) print(images.shape) if (new_shape, new_shape) != shape[:2]: y1, x1 = (new_shape - images.shape[1]) // 2, ( new_shape - images.shape[2]) // 2 y2, x2 = new_shape - images.shape[ 1] - y1, new_shape - images.shape[2] - x1 images = np.pad(images, ((0, 0), (y1, y2), (x1, x2), (0, 0)), mode='constant', constant_values=0) else: y1, x1, y2, x2 = 0, 0, 0, 0 X = get_inputs_rcnn(images) if new_shape > max_shape: nb_cut = int(np.ceil(new_shape / 512)) - 1 y_preds = [np.zeros((images.shape[0], new_shape, new_shape, 1), dtype='float32')]+\ [np.zeros((images.shape[0], new_shape, new_shape, 2), dtype='float32') for _ in range(4)] for nb_y in range(nb_cut): start_y, end_y = 512 * nb_y, 512 * (nb_y + 2) shift_start_y = 0 if nb_y == 0 else 256 shift_end_y = 0 if nb_y == nb_cut - 1 else -256 for nb_x in range(nb_cut): start_x, end_x = 512 * nb_x, 512 * (nb_x + 2) shift_start_x = 0 if nb_x == 0 else 256 shift_end_x = 0 if nb_x == nb_cut - 1 else -256 print(start_y, end_y, start_x, end_x) print(shift_start_y, shift_end_y, shift_start_x, shift_end_x) X_nb = [ X[0][:, 4 * start_y:4 * end_y, 4 * start_x:4 * end_x], X[1][:, start_y:end_y, start_x:end_x] ] preds = model.keras_model.predict(X_nb, batch_size=1, verbose=1) for i, nb_output in enumerate(nb_outputs): y_preds[i][:, start_y+shift_start_y:end_y+shift_end_y, start_x+shift_start_x:end_x+shift_end_x]=\ preds[nb_output][:,shift_start_y:max_shape+shift_end_y, shift_start_x:max_shape+shift_end_x] del preds else: y_preds = model.keras_model.predict(X, batch_size=1, verbose=1) y_preds = [y_preds[i] for i in nb_outputs] for i, output_name in enumerate(output_names): y_pred = y_preds[i][:, :, :, :1] if (new_shape, new_shape) != shape: y_pred = y_pred[:, y1:new_shape - y2, x1:new_shape - x2, :1] preds_df.loc[ind_shape, output_name] = list(y_pred.astype('float16')) if nb_run == 1: save_to_cache( preds_df, os.path.join(weight_dir, '{}_two_{}'.format(df_name, nb))) else: save_to_cache( preds_df, os.path.join(weight_dir, '{}_two_{}_{}'.format(df_name, nb, run))) if len(df) < 200: preds_df = load_from_cache_multi( os.path.join(weight_dir, '{}_two'.format(df_name)), len(vals)) save_to_cache(preds_df, os.path.join(weight_dir, '{}_two'.format(df_name))) for nb in range(len(vals)): os.remove( os.path.join(weight_dir, '{}_two_{}.dat'.format(df_name, nb)))