def get_df():
    #########################################################################3
    image_dir = '/home/work/dsb/ext/weebly/Tissue images'
    anno_dir = '/home/work/dsb/ext/weebly/Annotations'
    #correct directory addresses accordingly here
    ############################################################################
    image_paths = os.listdir(image_dir)
    ids = [x.split('.')[0] for x in image_paths]
    image_paths = [os.path.join(image_dir, path) for path in image_paths]
    mask_paths = [os.path.join(anno_dir, '{}.xml'.format(id)) for id in ids]

    df = []
    for id, image_path, mask_path in zip(ids, image_paths, mask_paths):
        print(id, end=',')
        image = cv2.imread(image_path)
        mask, nb_instance, nb_gt, mask_vals = read_mask(
            mask_path, image.shape[:2])
        df.append({
            'image': image,
            'mask': mask,
            'nb_instance': nb_instance,
            'nb_gt': nb_gt,
            'mask_vals': mask_vals,
            'id': id,
            'image_path': image_path,
            'mask_path': mask_path
        })
    df = pd.DataFrame(df)
    df['shape'] = df['image'].apply(lambda x: x.shape)
    save_to_cache(df, 'weebly')
def predict_test_rcnn_half(df_name, weight_dir, weight_fl=None):
    config = DsbConfig()
    config.BATCH_SIZE = 1
    config.GPU_COUNT = 1
    unet_ratio, tp = 1, 'all'
    df = load_from_cache(df_name)
    df['shape'] = df['image'].apply(
        lambda x: (x.shape[0] // 2, x.shape[1] // 2, x.shape[2]))

    output_names = ['mask', 'ly', 'lx', 'ldr', 'ldl']
    nb_outputs = [0, 9, 10, 11, 12]
    preds_df = pd.DataFrame(index=df.index, columns=output_names)
    vals = np.unique(df['shape'])

    if weight_fl is None:
        weight_fls = glob.glob(os.path.join(weight_dir, '*.hdf5'))
        weight_fls = sorted(
            weight_fls,
            key=lambda x: float(os.path.basename(x)[:-5].split('_')[-1]))
        weight_fl = weight_fls[0]

    for nb, shape in enumerate(vals):
        ind_shape = df[df['shape'] == shape].index

        new_shape = 64 * int(np.ceil(max(shape) / 64))
        print('{}/{}'.format(nb, len(vals)), len(ind_shape), shape, new_shape)
        model = model_rcnn(tp, unet_ratio, 'sgd', config, weight_dir,
                           new_shape)
        #model = model_unet(new_shape, unet_ratio, tp, config=config)
        model.load_weights(weight_fl)
        model.compile(1e-3)

        images = np.stack([
            cv2.resize(image,
                       dsize=(shape[1], shape[0]),
                       interpolation=cv2.INTER_LINEAR)
            for image in df.loc[ind_shape, 'image']
        ], 0)
        if (new_shape, new_shape) != shape[:2]:
            y1, x1 = (new_shape - images.shape[1]) // 2, (new_shape -
                                                          images.shape[2]) // 2
            y2, x2 = new_shape - images.shape[
                1] - y1, new_shape - images.shape[2] - x1
            images = np.pad(images, ((0, 0), (y1, y2), (x1, x2), (0, 0)),
                            mode='constant',
                            constant_values=0)
        else:
            y1, x1, y2, x2 = 0, 0, 0, 0
        inputs = get_inputs_rcnn(images)
        y_preds = model.keras_model.predict(inputs, batch_size=1, verbose=1)

        for nb_output, output_name in zip(nb_outputs, output_names):
            y_pred = y_preds[nb_output][:, :, :, :1]
            if (new_shape, new_shape) != shape:
                y_pred = y_pred[:, y1:new_shape - y2, x1:new_shape - x2, :1]
            preds_df.loc[ind_shape,
                         output_name] = list(y_pred.astype('float16'))

    save_to_cache(preds_df, os.path.join(weight_dir,
                                         '{}_half'.format(df_name)))
def get_masks():
    ###############################################################################
    boundary_dir = '/home/work/dsb/ext/2009isbi/segmented-lpc-boundary'
    save_dir = '/home/work/dsb/ext/2009isbi/segmented-lpc-mask'

    ################################################################################

    os.path.exists(boundary_dir)
    folders = next(os.walk(boundary_dir))[1]
    masks = []
    for folder in folders:
        fls = next(os.walk(os.path.join(boundary_dir, folder)))[2]
        fls = sorted(fls, key=lambda x: int(x[:-4].split('-')[-1]))
        for fl in fls:
            pass
            id = os.path.basename(fl)[:-4]
            filepath = os.path.join(boundary_dir, folder, fl)
            boundary = cv2.imread(filepath, 0)
            mask = boundary2mask(boundary)
            check_sanity(mask, boundary, folder, fl)
            plt.imsave(
                os.path.join(save_dir, folder, '{}_mask.png'.format(id)), mask)
            masks.append({'id': '{}_{}'.format(folder, id), 'mask': mask})

    masks = pd.DataFrame(masks)
    df = load_from_cache('2009isbi')
    df = pd.merge(df, masks, how='left', on='id')
    df = df.dropna(0)
    df['nb_instance'] = df['mask'].apply(lambda x: x.max())
    save_to_cache(df, '2009isbi')
def get_df():
    ###################################################################
    data_dir = '/home/work/dsb/ext/TNBC_NucleiSegmentation'
    #correct directory address accordingly
    ######################################################################3
    assert os.path.exists(data_dir)
    image_folders = glob.glob(os.path.join(data_dir, 'Slide*'))
    image_folders = sorted(image_folders)
    df = []
    for image_folder in image_folders:
        image_fls = os.listdir(image_folder)
        image_fls = sorted(image_fls)
        for image_fl in image_fls:
            filepath = os.path.join(image_folder, image_fl)
            image = cv2.imread(filepath)
            mask_path = filepath.replace('Slide', 'GT')
            mask_unet = cv2.imread(mask_path, 0)
            assert len(np.unique(mask_unet)) == 2
            _, mask = cv2.connectedComponents(mask_unet, connectivity=4)
            df.append({
                'image': image,
                'mask': mask,
                'image_path': filepath,
                'mask_path': mask_path,
                'id': image_fl[:-4],
                'nb_instance': mask.max(),
                'shape': image.shape
            })

    df = pd.DataFrame(df)
    save_to_cache(df, 'TNBC')
def get_cons_scale(df_name, weight_dir, tag='half', n_jobs=18):
    fl_name = os.path.join(weight_dir, '{}_{}'.format(df_name, tag))
    if tag != 'two':
        preds_df = load_from_cache(fl_name)
    else:
        fl_names = glob.glob(os.path.join(weight_dir, '{}_{}'.format(df_name, tag),
                                          '{}_{}_[0-9+].dat'.format(df_name, tag)))+\
                   glob.glob(os.path.join(weight_dir, '{}_{}'.format(df_name, tag),
                                          '{}_{}_[0-9][0-9].dat'.format(df_name, tag)))
        preds_df = load_from_cache_multi(os.path.join(
            weight_dir, '{}_{}'.format(df_name, tag),
            '{}_{}'.format(df_name, tag)),
                                         nb=len(fl_names))
    print(preds_df.shape)
    cons_total = Parallel(n_jobs)(delayed(get_cons_local_valid)(
        preds_df.loc[ind, 'pred_scale'],
        np.concatenate(preds_df.loc[ind,
                                    ['mask', 'ly', 'lx', 'ldr', 'ldl']], -1))
                                  for ind in preds_df.index)
    preds_df['con'] = cons_total
    if tag != 'two':
        save_to_cache(preds_df,
                      os.path.join(weight_dir, '{}_{}'.format(df_name, tag)))
    else:
        save_to_cache_multi(
            preds_df,
            os.path.join(weight_dir, '{}_{}'.format(df_name, tag),
                         '{}_{}'.format(df_name, tag)), 10)
def get_mask(df_name, config, weight_dir, save_name, n_jobs=18):
    save_dir = os.path.join(weight_dir, save_name)
    os.path.exists(save_dir)
    #df = load_from_cache(df_name)

    fl_name = os.path.join(weight_dir, df_name)
    if os.path.exists('{}.dat'.format(fl_name)):
        preds_df = load_from_cache(fl_name)
    else:
        fl_names = glob.glob(
            os.path.join(weight_dir, '{}_[0-9+].dat'.format(df_name)))
        preds_df = load_from_cache_multi(fl_name, nb=len(fl_names))
    print(preds_df.shape)
    masks0 = Parallel(n_jobs)(delayed(postprocess)(np.concatenate(
        preds_df.loc[ind, ['mask', 'ly', 'lx', 'ldr', 'ldl']], -1), config)
                              for ind in preds_df.index)
    masks0 = Parallel(n_jobs)(delayed(renumber_mask)(mask) for mask in masks0)
    masks0 = [x[0] for x in masks0]
    preds_df['pred0'] = masks0

    masks = Parallel(n_jobs)(delayed(modify_w_unet)(
        preds_df.loc[ind, 'pred0'],
        np.concatenate(preds_df.loc[ind,
                                    ['mask', 'ly', 'lx', 'ldr', 'ldl']], -1))
                             for ind in preds_df.index)
    preds_df['pred'] = masks

    save_to_cache(preds_df, os.path.join(weight_dir, df_name))
Exemplo n.º 7
0
def train_valid_split():
    df = load_from_cache('train_df_fixed')
    df_256 = load_from_cache('train_df_256_fixed')
    df_256['id'] = df.loc[df_256['image_id'].values, 'id'].values
    train_ids = df['id'][::2]  
    valid_ids = df['id'][1::2]
    train_df = df_256.loc[df_256['id'].isin(train_ids)]
    valid_df = df_256.loc[df_256['id'].isin(valid_ids)]
    save_to_cache(train_df, 'train_final_df')
    save_to_cache(valid_df, 'valid_df')
Exemplo n.º 8
0
def get_test_df(test_dir, save_name='test_df'):
    test_ids = list(next(os.walk(test_dir)))[1]
    test_df = []
    for id_ in test_ids:
        path = os.path.join(test_dir, id_, 'images', id_+'.png')
        image = cv2.imread(path)
        test_df.append({'id':id_, 'image':image, 'path': path, 
                         'shape':image.shape})
    test_df = pd.DataFrame(test_df).sort_values('shape').reset_index()
    save_to_cache(test_df, save_name)    
def main():
    ################################################################################
    weight_dir = '/media/work/Data/dsb/cache/UnetRCNN_180410-221747'
    ################################################################################
    df_name = 'stage2_df'
    df = load_from_cache(df_name)
    tags = ['quarter', 'half', None, 'two']
    preds = []
    for tag in tags:
        if tag is None:
            fl = os.path.join(weight_dir, '{}.dat'.format(df_name))
            pred = load_file(fl)
        elif tag == 'two':
            fl_names = glob.glob(os.path.join(weight_dir, '{}_{}'.format(df_name, tag),
                                              '{}_{}_[0-9+].dat'.format(df_name, tag)))+\
                       glob.glob(os.path.join(weight_dir, '{}_{}'.format(df_name, tag), 
                                              '{}_{}_[0-9][0-9].dat'.format(df_name, tag)))
            pred = load_from_cache_multi(os.path.join(weight_dir, 
                                                          '{}_{}'.format(df_name, tag),
                                                          '{}_{}'.format(df_name, tag)),             
                       nb=len(fl_names))
        else:
            fl = os.path.join(weight_dir, '{}_{}.dat'.format(df_name,tag))
            pred = load_file(fl)            
        preds.append(pred)
    
    nb_fls = len(tags)
    results = []
    for ind in df.index:
        masks = [pred.loc[ind, 'pred'] for pred in preds]
        scores = [pred.loc[ind, 'con'] for pred in preds]

        res={}
        for key, vals in zip(np.arange(nb_fls),scores):
            for nb in range(len(vals)):
                res['{}_{}'.format(key, nb)] = vals[nb]                
        res = pd.Series(res).sort_values()
        res = res[res<0.2]
 
        mask = np.zeros_like(masks[0], dtype='int16')
        val = 1
        for ind_res in res.index:
            size, label = ind_res.split('_')
            size, label = int(size), int(label)                        
            index = masks[size]==label+1
            if (np.sum(mask[index]>0)/np.sum(index))<0.5:
                mask[(index)&(mask==0)] = val
                val = val+1
        results.append(mask)
        
    preds_df = pd.DataFrame(index = df.index)
    preds_df['pred'] = results
        
    save_to_cache(preds_df, os.path.join(weight_dir, 'preds_df_scale_01'))
    make_submission(preds_df)
def get_cons(df_name, weight_dir, n_jobs=18):
    fl_name = os.path.join(weight_dir, df_name)
    preds_df = load_from_cache(fl_name)
    print(preds_df.shape)
    cons_total = Parallel(n_jobs)(delayed(get_cons_local_valid)(
        preds_df.loc[ind, 'pred'],
        np.concatenate(preds_df.loc[ind,
                                    ['mask', 'ly', 'lx', 'ldr', 'ldl']], -1))
                                  for ind in preds_df.index)
    preds_df['con'] = cons_total
    save_to_cache(preds_df, os.path.join(weight_dir, '{}'.format(df_name)))
def main():
    config = DsbConfig()

    df = load_from_cache('train_df_fixed')
    df_cut = resave(df, config)
    df_cut['shape_id'] = df.loc[df_cut['image_id'].values, 'shape_id'].values
    save_to_cache(df_cut, 'train_df_256_fixed')

    for df_name in ['2009isbi', 'TNBC', 'weebly']:
        df = load_from_cache(df_name)
        df_cut = resave(df, config)
        save_to_cache(df_cut, '{}_256'.format(df_name))
def get_mask_scale(df_name,
                   config,
                   weight_dir,
                   save_name,
                   tag='half',
                   n_jobs=18):
    fl_name = os.path.join(weight_dir, df_name + '_' + tag)
    if os.path.exists('{}.dat'.format(fl_name)):
        preds_df = load_from_cache(fl_name)
    else:
        fl_names = glob.glob(os.path.join(weight_dir, '{}_{}_[0-9+].dat'.format(df_name, tag)))+\
                   glob.glob(os.path.join(weight_dir, '{}_{}_[0-9][0-9].dat'.format(df_name, tag)))

        preds_df = load_from_cache_multi(fl_name, nb=len(fl_names))

    print(preds_df.shape)

    save_dir = os.path.join(weight_dir, save_name)
    os.path.exists(save_dir)
    df = load_from_cache(df_name)

    masks0_scale = Parallel(n_jobs)(delayed(postprocess)(np.concatenate(
        preds_df.loc[ind, ['mask', 'ly', 'lx', 'ldr', 'ldl']], -1), config)
                                    for ind in preds_df.index)
    masks0_scale = Parallel(n_jobs)(delayed(renumber_mask)(mask)
                                    for mask in masks0_scale)
    preds_df['pred0_scale'] = [x[0].astype('int16') for x in masks0_scale]

    masks_scale = Parallel(n_jobs)(delayed(modify_w_unet)(
        preds_df.loc[ind, 'pred0_scale'],
        np.concatenate(preds_df.loc[ind,
                                    ['mask', 'ly', 'lx', 'ldr', 'ldl']], -1))
                                   for ind in preds_df.index)
    preds_df['pred_scale'] = [x.astype('int16') for x in masks_scale]

    masks = Parallel(n_jobs)(
        delayed(cv2.resize)(preds_df.loc[ind, 'pred_scale'],
                            dsize=(df.loc[ind,
                                          'shape'][1], df.loc[ind,
                                                              'shape'][0]),
                            interpolation=cv2.INTER_NEAREST)
        for ind in df.index)
    preds_df['pred'] = [x.astype('int16') for x in masks]

    if tag != 'two':
        save_to_cache(preds_df,
                      os.path.join(weight_dir, '{}_{}'.format(df_name, tag)))
    else:
        save_to_cache_multi(
            preds_df,
            os.path.join(weight_dir, '{}_{}'.format(df_name, tag),
                         '{}_{}'.format(df_name, tag)), 10)
Exemplo n.º 13
0
def get_train_df(train_dir, save_name):
    train_ids = list(next(os.walk(train_dir)))[1]
    train_df = []
    for id_ in train_ids:
        image = cv2.imread(os.path.join(train_dir, id_, 'images', id_+'.png'))
        gray = is_gray(image)
        shape = image.shape[:2] if gray else image.shape
        masks = []
        for mask_fl in glob.glob(os.path.join(train_dir, id_, 'masks', '*.*')):
            mask_ = cv2.imread(mask_fl, 0)
            masks.append((mask_>0).astype(np.uint8))
        masks = np.stack(masks, 2)
        mask = mask_m21(masks)
        train_df.append({'id':id_, 'image':image, 
                         'is_gray':gray, 'shape':shape, 'mask':mask, 
                         'nb_instance':mask.max()})
    train_df = pd.DataFrame(train_df).sort_values('shape').reset_index()
    train_df['shape_id'] = LabelEncoder().fit_transform(train_df['shape'])
    save_to_cache(train_df, save_name)    
def get_images():
    ######################################################################
    data_dir = '/media/work/Ubuntu 16.0/ext/2009_ISBI/data/images/dna-images'
    # where images are stored
    ########################################################################3
    os.path.exists(data_dir)
    df = []
    folders = next(os.walk(data_dir))[1]
    for folder in folders:
        fls = next(os.walk(os.path.join(data_dir, folder)))[2]
        fls = sorted(fls, key=lambda x: int(x[:-4].split('-')[-1]))
        for fl in fls:
            filepath = os.path.join(data_dir, folder, fl)
            image = cv2.imread(filepath)
            df.append({
                'image': image,
                'shape': image.shape,
                'md5': md5sum(filepath),
                'path': filepath,
                'id': '{}_{}'.format(folder, fl[:-4])
            })
    df = pd.DataFrame(df)
    save_to_cache(df, '2009isbi')
def main():
    config = DsbConfig()
    df_name = 'stage2_df'
    save_name = 'stage2'
    ###################################################################################
    weight_dir = '/home/work/data/dsb/cache/UnetRCNN_180410-221747'
    #correct directory address where model weight for prediction is saved
    ##################################################################################
    predict_test_rcnn(df_name, weight_dir)
    predict_test_rcnn_half(df_name, weight_dir)
    predict_test_rcnn_quarter(df_name, weight_dir)
    predict_test_rcnn_two(df_name, weight_dir)

    #combine predictions for zoom 2
    for nb in range(25):
        fl = os.path.join(weight_dir, 'stage2_df_two_{}.dat'.format(nb))
        if os.path.exists(fl):
            continue
        else:
            fls = glob.glob(
                os.path.join(weight_dir, 'stage2_df_two_{}_*.dat'.format(nb)))
            preds_df = load_from_cache_multi(fl[:-4], len(fls))
            save_to_cache(preds_df, fl)
            for fl in fls:
                os.remove(fl)

    get_mask(df_name, config, weight_dir, save_name)
    get_mask_scale(df_name, config, weight_dir, save_name, tag='half')
    get_mask_scale(df_name, config, weight_dir, save_name, tag='quarter')

    get_mask_scale(df_name, config, weight_dir, save_name, tag='two')

    get_cons(df_name, weight_dir)
    get_cons_scale(df_name, weight_dir, tag='half')
    get_cons_scale(df_name, weight_dir, tag='quarter')
    get_cons_scale(df_name, weight_dir, tag='two')
def predict_test_rcnn_two(df_name,
                          weight_dir,
                          weight_fl=None,
                          start=0,
                          end=100,
                          start_run=0):
    config = DsbConfig()
    config.BATCH_SIZE = 1
    config.GPU_COUNT = 1
    max_shape = 1024
    unet_ratio, tp = 1, 'all'
    df = load_from_cache(df_name)
    df['shape'] = df['image'].apply(
        lambda x: (x.shape[0] * 2, x.shape[1] * 2, x.shape[2]))

    output_names = ['mask', 'ly', 'lx', 'ldr', 'ldl']
    nb_outputs = [0, 9, 10, 11, 12]
    vals = np.unique(df['shape'])

    if weight_fl is None:
        weight_fls = glob.glob(os.path.join(weight_dir, '*.hdf5'))
        weight_fls = sorted(
            weight_fls,
            key=lambda x: float(os.path.basename(x)[:-5].split('_')[-1]))
        weight_fl = weight_fls[0]

    for nb, shape in enumerate(vals):
        if (nb < start) | (nb >= end):
            continue
        if max(shape) <= max_shape:
            new_shape = 64 * int(np.ceil(max(shape) / 64))
        else:
            new_shape = 512 * int(np.ceil(max(shape) / 512))
        ind_shape = df[df['shape'] == shape].index
        print('{}/{}'.format(nb, len(vals)), len(ind_shape), shape, new_shape)

        if len(ind_shape) * (new_shape // 512)**2 > 800:
            nb_run = (len(ind_shape) * (new_shape // 512)**2) // 800 + 1
            size = int(len(ind_shape) / nb_run) + 1
            ind_shape0 = ind_shape.copy()
        else:
            nb_run = 1
            size = len(ind_shape)

        for run in range(nb_run):
            if run < start_run:
                continue
            if nb_run != 1:
                start, end = run * size, min((run + 1) * size, len(ind_shape0))
                ind_shape = ind_shape0[start:end]

            preds_df = pd.DataFrame(index=df.index[ind_shape],
                                    columns=output_names)

            model = model_rcnn(tp, unet_ratio, 'sgd', config, weight_dir,
                               min(new_shape, max_shape))
            #model = model_unet(new_shape, unet_ratio, tp, config=config)
            model.load_weights(weight_fl)
            model.compile(1e-3)

            images = np.stack([
                cv2.resize(image,
                           dsize=(shape[1], shape[0]),
                           interpolation=cv2.INTER_LINEAR)
                for image in df.loc[ind_shape, 'image']
            ], 0)
            print(images.shape)
            if (new_shape, new_shape) != shape[:2]:
                y1, x1 = (new_shape - images.shape[1]) // 2, (
                    new_shape - images.shape[2]) // 2
                y2, x2 = new_shape - images.shape[
                    1] - y1, new_shape - images.shape[2] - x1
                images = np.pad(images, ((0, 0), (y1, y2), (x1, x2), (0, 0)),
                                mode='constant',
                                constant_values=0)
            else:
                y1, x1, y2, x2 = 0, 0, 0, 0
            X = get_inputs_rcnn(images)

            if new_shape > max_shape:
                nb_cut = int(np.ceil(new_shape / 512)) - 1
                y_preds = [np.zeros((images.shape[0], new_shape, new_shape, 1), dtype='float32')]+\
                          [np.zeros((images.shape[0], new_shape, new_shape, 2), dtype='float32') for _ in range(4)]
                for nb_y in range(nb_cut):
                    start_y, end_y = 512 * nb_y, 512 * (nb_y + 2)
                    shift_start_y = 0 if nb_y == 0 else 256
                    shift_end_y = 0 if nb_y == nb_cut - 1 else -256
                    for nb_x in range(nb_cut):
                        start_x, end_x = 512 * nb_x, 512 * (nb_x + 2)
                        shift_start_x = 0 if nb_x == 0 else 256
                        shift_end_x = 0 if nb_x == nb_cut - 1 else -256

                        print(start_y, end_y, start_x, end_x)
                        print(shift_start_y, shift_end_y, shift_start_x,
                              shift_end_x)
                        X_nb = [
                            X[0][:, 4 * start_y:4 * end_y,
                                 4 * start_x:4 * end_x], X[1][:, start_y:end_y,
                                                              start_x:end_x]
                        ]
                        preds = model.keras_model.predict(X_nb,
                                                          batch_size=1,
                                                          verbose=1)
                        for i, nb_output in enumerate(nb_outputs):
                            y_preds[i][:, start_y+shift_start_y:end_y+shift_end_y,
                                   start_x+shift_start_x:end_x+shift_end_x]=\
                                   preds[nb_output][:,shift_start_y:max_shape+shift_end_y,
                                        shift_start_x:max_shape+shift_end_x]
                        del preds
            else:
                y_preds = model.keras_model.predict(X, batch_size=1, verbose=1)
                y_preds = [y_preds[i] for i in nb_outputs]

            for i, output_name in enumerate(output_names):
                y_pred = y_preds[i][:, :, :, :1]
                if (new_shape, new_shape) != shape:
                    y_pred = y_pred[:, y1:new_shape - y2,
                                    x1:new_shape - x2, :1]
                preds_df.loc[ind_shape,
                             output_name] = list(y_pred.astype('float16'))

            if nb_run == 1:
                save_to_cache(
                    preds_df,
                    os.path.join(weight_dir, '{}_two_{}'.format(df_name, nb)))
            else:
                save_to_cache(
                    preds_df,
                    os.path.join(weight_dir,
                                 '{}_two_{}_{}'.format(df_name, nb, run)))

    if len(df) < 200:
        preds_df = load_from_cache_multi(
            os.path.join(weight_dir, '{}_two'.format(df_name)), len(vals))
        save_to_cache(preds_df,
                      os.path.join(weight_dir, '{}_two'.format(df_name)))
        for nb in range(len(vals)):
            os.remove(
                os.path.join(weight_dir, '{}_two_{}.dat'.format(df_name, nb)))
def predict_test_rcnn(df_name, weight_dir, weight_fl=None):
    config = DsbConfig()
    config.BATCH_SIZE = 1
    config.GPU_COUNT = 1

    max_shape = 1024
    unet_ratio, tp = 1, 'all'
    df = load_from_cache(df_name)

    output_names = ['mask', 'ly', 'lx', 'ldr', 'ldl']
    nb_outputs = [0, 9, 10, 11, 12]
    preds_df = pd.DataFrame(index=df.index, columns=output_names)
    vals = np.unique(df['shape'])

    if weight_fl is None:
        weight_fls = glob.glob(os.path.join(weight_dir, '*.hdf5'))
        weight_fls = sorted(
            weight_fls,
            key=lambda x: float(os.path.basename(x)[:-5].split('_')[-1]))
        weight_fl = weight_fls[0]

    for nb, shape in enumerate(vals):
        ind_shape = df[df['shape'] == shape].index
        if max(shape) <= max_shape:
            new_shape = 64 * int(np.ceil(max(shape) / 64))
        else:
            new_shape = 512 * int(np.ceil(max(shape) / 512))
        print('{}/{}'.format(nb, len(vals)), len(ind_shape), shape, new_shape)
        model = model_rcnn(tp, unet_ratio, 'sgd', config, weight_dir,
                           min(new_shape, max_shape))
        #model = model_unet(new_shape, unet_ratio, tp, config=config)
        model.load_weights(weight_fl)
        model.compile(1e-3)
        images = np.stack(df.loc[ind_shape, 'image'], 0)

        if (new_shape, new_shape) != shape:
            y1, x1 = (new_shape - images.shape[1]) // 2, (new_shape -
                                                          images.shape[2]) // 2
            y2, x2 = new_shape - images.shape[
                1] - y1, new_shape - images.shape[2] - x1
            images = np.pad(images, ((0, 0), (y1, y2), (x1, x2), (0, 0)),
                            mode='constant',
                            constant_values=0)
        else:
            y1, x1, y2, x2 = 0, 0, 0, 0
        X = get_inputs_rcnn(images)

        if new_shape > max_shape:
            nb_cut = int(np.ceil(new_shape / 512)) - 1
            y_preds = [np.zeros((images.shape[0], new_shape, new_shape, 1), dtype='float32')]+\
                      [np.zeros((images.shape[0], new_shape, new_shape, 2), dtype='float32') for _ in range(12)]
            for nb_y in range(nb_cut):
                start_y, end_y = 512 * nb_y, 512 * (nb_y + 2)
                shift_start_y = 0 if nb_y == 0 else 256
                shift_end_y = 0 if nb_y == nb_cut - 1 else -256
                for nb_x in range(nb_cut):
                    start_x, end_x = 512 * nb_x, 512 * (nb_x + 2)
                    shift_start_x = 0 if nb_x == 0 else 256
                    shift_end_x = 0 if nb_x == nb_cut - 1 else -256

                    print(start_y, end_y, start_x, end_x)
                    print(shift_start_y, shift_end_y, shift_start_x,
                          shift_end_x)
                    X_nb = [
                        X[0][:, 4 * start_y:4 * end_y, 4 * start_x:4 * end_x],
                        X[1][:, start_y:end_y, start_x:end_x]
                    ]
                    preds = model.keras_model.predict(X_nb,
                                                      batch_size=1,
                                                      verbose=1)
                    for nb_output in range(13):
                        y_preds[nb_output][:, start_y+shift_start_y:end_y+shift_end_y,
                               start_x+shift_start_x:end_x+shift_end_x]=\
                               preds[nb_output][:,shift_start_y:max_shape+shift_end_y,
                                    shift_start_x:max_shape+shift_end_x]
        else:
            y_preds = model.keras_model.predict(X, batch_size=1, verbose=1)

        for nb_output, output_name in zip(nb_outputs, output_names):
            y_pred = y_preds[nb_output][:, :, :, :1]
            if (new_shape, new_shape) != shape:
                y_pred = y_pred[:, y1:new_shape - y2, x1:new_shape - x2, :1]
            preds_df.loc[ind_shape,
                         output_name] = list(y_pred.astype('float16'))

    if len(preds_df) > 500:
        save_to_cache_multi(preds_df, os.path.join(weight_dir, df_name),
                            len(preds_df) // 500 + 1)
    else:
        save_to_cache(preds_df, os.path.join(weight_dir, df_name))