示例#1
0
def _save_preds(
        model_path='/data/pneumo_log/val_1/2019_0805_0344/best_weights.hdf5',
        data_path='/data/pneumo/fold/1/'):

    data_path_list = glob(data_path + '*.npy')

    #'/data/pneumo_log/val_1/'
    base_save_path = '/'.join(model_path.split('/')[:-2])
    #2019_0805_0344
    folder_name = model_path.split('/')[-2]
    # '/data/pneumo_log/val_1/val_predictions/2019_0805_0344/'
    save_path = base_save_path + '/val_predictions/' + folder_name + '/'

    # loading model
    model = _load_model(model_path=model_path)

    print('saving dirs: {}'.format(save_path))
    data_prep._make_dir(save_path)

    for data_path in tqdm(data_path_list):
        # this includes .npy
        file_name = data_path.split('/')[-1]
        data = np.load(data_path)[()]
        img = data['img']
        data['pred'] = _pred_img(img, model)

        np.save(save_path + file_name, data)
示例#2
0
def make_submission(model_path,
                    thresh,
                    small_thresh=2048,
                    test_base_path='/data/pneumo/dicom-images-test/',
                    save=False):
    '''
    save submission file under a folder that model file is saved.
    Set save=True if you want to save binary pred numpy.
    all binary (small removed) npy and submission csv will be saved in /data/pneumo_log/val_1/2019_0815_1742/submission/best_weights/

    '''
    # with snapshot, there is a case where there are multiple model files in the dir so make `submission` folder
    # best_weights
    model_file_name = model_path.split('/')[-1].split('.')[0]
    # 2019_0815_1742
    dir_name = model_path.split('/')[-2]
    # /data/pneumo_log/val_1/2019_0815_1742/submission/best_weights
    save_dir = '/'.join(
        model_path.split('/')[:-1]) + '/submission/' + model_file_name
    data_prep._make_dir(save_dir)
    # 'submission_'
    file_name = 'submission_' + dir_name + '_' + model_file_name + '.csv'
    test_data_path_list = glob(test_base_path + '/*/*/*.dcm')
    model = pred_util._load_model(model_path=model_path)
    rles = []
    im_ids = []

    for path in tqdm(test_data_path_list):
        im_id = path.split('/')[-1].split('.dcm')[0]
        im_ids.append(im_id)
        im = pydicom.dcmread(path).pixel_array
        # no need preprocess
        pred = pred_util._pred_img(im, model)
        # pred is already 1024 * 1024 but no binary. its values are 0-1
        binary_pred = np.where(pred > thresh, 1, 0)

        # zero out the smaller regions
        if binary_pred.sum() < small_thresh:
            binary_pred[:] = 0
        # binary -> 0, 255 and transpose for submission format
        if save:
            # save numpy. This is usually needed for ensemble. In order to use ensemble_util._ensemble_preds on the data later,
            # save it as dictionary like {'pred':}
            np.save(save_dir + '/' + im_id, {
                'pred': binary_pred,
                'pred_row': pred
            })
        binary_pred = (binary_pred.T * 255).astype(np.uint8)

        rles.append(mask2rle(binary_pred, 1024, 1024))

    sub_df = pd.DataFrame({'ImageId': im_ids, 'EncodedPixels': rles})
    sub_df.loc[sub_df.EncodedPixels == '', 'EncodedPixels'] = '-1'
    sub_df.to_csv(save_dir + '/' + file_name, index=False)
    if save:
        # for later ensemble
        return save_dir
示例#3
0
def make_ensemble_submission(model_path_list,
                             thresh_list=None,
                             small_thresh=2048,
                             test_base_path='/data/pneumo/dicom-images-test',
                             column_name='score',
                             save=False,
                             save_path=None):
    '''
    ensemble: This function call make_submission() each and save each submission csv.
    1. save binary pred based on each thresh as numpy (w/ remove small masks)
    2. ensemble (max vote)
    3. remove small masks
    4. mask2rle
    5. save submission file
    '''

    if save_path is None:
        # make save path based on first model path
        save_path = '/'.join(
            model_path_list[0].split('/')[:-1]) + '/ensemble_submission/'
    print('start making ensemble submission under {}'.format(save_path))
    data_prep._make_dir(save_path)
    # get best thresholds if thresh_list is None
    if thresh_list is None:
        thresh_list = []
        for model_path in model_path_list:
            thresh = _get_best_threshold(model_path, column_name=column_name)
            thresh_list.append(thresh)
    binary_mask_path_list = []
    for i, model_path in enumerate(model_path_list):
        print('start pred test data by {}'.format(model_path))
        binary_mask_path = make_submission(model_path,
                                           thresh_list[i],
                                           small_thresh=small_thresh,
                                           test_base_path=test_base_path,
                                           save=True)
        # save path should be like /data/pneumo_log/val_1/2019_0815_1742/submission/best_weights/

        binary_mask_path_list.append(binary_mask_path)

    # save ensembled data
    ensemble_util.ensemble_dirs(binary_mask_path_list=binary_mask_path_list,
                                cpu_num=16,
                                save_path=save_path,
                                data_key='pred')

    # assume the pred is already binary. don't need to set thresh
    sub_df = _make_submission_from_predictions(save_path,
                                               small_thresh=small_thresh)
    file_name = 'ensemble_submission.csv'
    sub_df.to_csv(save_path + '/' + file_name, index=False)
    print('saved submission file at {}'.format(save_path + '/' + file_name))
    print(
        '$kaggle competitions submit siim-acr-pneumothorax-segmentation -f {} -m "snapshot ensembles"'
        .format(save_path + '/' + file_name))
示例#4
0
def ensemble_dirs(binary_mask_path_list=[], cpu_num=16, save_path='', data_key='pred'):
    '''
    save ensembled numpy at save_path. ensemble all of files under each folder in binary_mask_path_list
    if TTA, change data_key to 'mean_pred'
    '''
    image_id_list = [path.split('/')[-1].split('.npy')[0] for path in glob(binary_mask_path_list[0] + '/*.npy')]
    p = Pool(processes=cpu_num)
    data_prep._make_dir(save_path)

    job_args = [(image_id, save_path, binary_mask_path_list, data_key) for image_id in image_id_list]
    list(tqdm(p.imap(_wrap_ensemble_preds, job_args), total=len(job_args)))
示例#5
0
def _load_eval_w_thresh_list(data_path, thresh_list=[0.5], save_binary=False):
    '''
    set save_path if save_binary is True.
    '''

    data = np.load(data_path)[()]
    mask = data['mask']
    pred = data['pred']
    image_id = data_path.split('/')[-1].split('.npy')[0]
    if 'aug_pred' in data.keys():
        # TTA
        aug_pred = data['aug_pred']
        mean_pred = data['mean_pred']

    result_list = []
    for thresh in thresh_list:
        result = {}
        dice, binary_pred = evaluation(mask, pred, thresh)
        # for save. need mask here to use this functin to the saved data to evaluate
        data = {'pred': binary_pred, 'mask': mask}
        result['score'] = dice
        if 'aug_pred' in data.keys():
            aug_dice, aug_binary_pred = evaluation(mask, aug_pred, thresh)
            mean_dice, mean_binary_pred = evaluation(mask, mean_pred, thresh)
            result['aug_score'] = aug_dice
            result['mean_score'] = mean_dice
            data['aug_pred'] = aug_binary_pred
            data['mean_pred'] = mean_binary_pred

        result['image_id'] = image_id
        result['thresh'] = thresh

        if save_binary:
            save_dir = '/'.join(data_path.split('/')
                                [:-1]) + '/binary_thresh_' + str(thresh) + '/'
            data_prep._make_dir(save_dir)
            file_name = save_dir + '/' + str(image_id)
            np.save(file_name, data)

        result_list.append(result)

    return result_list
示例#6
0
def _save_preds(
        model_path='/data/pneumo_log/val_1/2019_0805_0344/best_weights.hdf5',
        data_path='/data/pneumo/fold/1/',
        tta=True):
    '''
    save prediction as numpy. if tta=True, save horizontal flip too. The saved 'aug_pred' mask has been fliped again.

    '''
    print('start pred with {}'.format(model_path))
    data_path_list = glob(data_path + '*.npy')

    #'/data/pneumo_log/val_1/'
    base_save_path = '/'.join(model_path.split('/')[:-2])
    #2019_0805_0344
    folder_name = model_path.split('/')[-2]
    # '/data/pneumo_log/val_1/val_predictions/2019_0805_0344/best_weights/'
    # extended for snapshots
    save_path = base_save_path + '/val_predictions/' + folder_name + '/' + model_path.split(
        '/')[-1].split('.')[0] + '/'

    # loading model
    model = _load_model(model_path=model_path)

    print('saving dirs: {}'.format(save_path))
    data_prep._make_dir(save_path)

    for data_path in tqdm(data_path_list):
        # this includes .npy
        file_name = data_path.split('/')[-1]
        data = np.load(data_path)[()]
        img = data['img']
        data['pred'] = _pred_img(img, model)
        if tta:
            flip = np.flip(img, axis=-1)
            flip_pred = _pred_img(flip, model)
            # save after flip
            data['aug_pred'] = flip_pred
            data['mean_pred'] = 0.5 * data['pred'] + 0.5 * data['aug_pred']

        np.save(save_path + file_name, data)
示例#7
0
def save_make_pseudo_data(
        pred_data_dir='/data/pneumo_log/val_1/2019_0815_1742/submission/snapshot_model_2/',
        zero_max=0.005,
        one_min=0.8,
        cpu_num=16,
        test_base_path='/data/pneumo/dicom-images-test/',
        test_data=True):
    '''
    save pseudo label as dictionary {'img':, 'mask'} under pred_data_dir+'/pseudo/'
    This can be applied to train data (fold) too. set test_data=False
    '''

    if test_data:
        save_path = pred_data_dir + '/pseudo/'
    else:
        save_path = pred_data_dir + '/pseudo_train_fold/'
    data_prep._make_dir(save_path)
    print('start to make pseudo label under {}'.format(save_path))
    pred_data_path_list = glob(pred_data_dir + '/*.npy')

    p = Pool(processes=cpu_num)
    job_args = [(pred_data_path, save_path, zero_max, one_min, test_base_path,
                 test_data) for pred_data_path in pred_data_path_list]
    list(tqdm(p.imap(_wrap_save_pseudo_label, job_args), total=len(job_args)))