def evaluate_cv():
    meta = pd.read_csv(PARAMS.metadata_filepath)
    if DEV_MODE:
        meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED)

    meta_train = meta[meta['is_train'] == 1]

    cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits,
                                  shuffle=PARAMS.shuffle,
                                  random_state=SEED)

    fold_iou, fold_iout = [], []
    for fold_id, (train_idx, valid_idx) in enumerate(
            cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))):
        valid_data_split = meta_train.iloc[valid_idx]

        LOGGER.info('Started fold {}'.format(fold_id))
        iou, iout, _ = fold_evaluate_loop(valid_data_split, fold_id)
        LOGGER.info('Fold {} IOU {}'.format(fold_id, iou))
        CTX.channel_send('Fold {} IOU'.format(fold_id), 0, iou)
        LOGGER.info('Fold {} IOUT {}'.format(fold_id, iout))
        CTX.channel_send('Fold {} IOUT'.format(fold_id), 0, iout)

        fold_iou.append(iou)
        fold_iout.append(iout)

    iou_mean, iou_std = np.mean(fold_iou), np.std(fold_iou)
    iout_mean, iout_std = np.mean(fold_iout), np.std(fold_iout)

    log_scores(iou_mean, iou_std, iout_mean, iout_std)
def train_evaluate_cv():
    meta = pd.read_csv(PARAMS.metadata_filepath)
    if DEV_MODE:
        meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED)

    meta_train = meta[meta['is_train'] == 1]

    cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED)

    fold_iou, fold_iout = [], []
    for fold_id, (train_idx, valid_idx) in enumerate(cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))):
        train_data_split, valid_data_split = meta_train.iloc[train_idx], meta_train.iloc[valid_idx]

        if USE_AUXILIARY_DATA:
            auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath)
            train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin(valid_data_split[ID_COLUMN].tolist())]
            train_data_split = pd.concat([train_data_split, train_auxiliary], axis=0)

        LOGGER.info('Started fold {}'.format(fold_id))
        iou, iout, _ = fold_fit_evaluate_loop(train_data_split, valid_data_split, fold_id)
        LOGGER.info('Fold {} IOU {}'.format(fold_id, iou))
        CTX.channel_send('Fold {} IOU'.format(fold_id), 0, iou)
        LOGGER.info('Fold {} IOUT {}'.format(fold_id, iout))
        CTX.channel_send('Fold {} IOUT'.format(fold_id), 0, iout)

        fold_iou.append(iou)
        fold_iout.append(iout)

    iou_mean, iou_std = np.mean(fold_iou), np.std(fold_iou)
    iout_mean, iout_std = np.mean(fold_iout), np.std(fold_iout)

    log_scores(iou_mean, iou_std, iout_mean, iout_std)
def train():
    meta = pd.read_csv(PARAMS.metadata_filepath)
    meta_train = meta[meta['is_train'] == 1]

    cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits,
                                  shuffle=PARAMS.shuffle,
                                  random_state=SEED)
    for train_idx, valid_idx in cv.split(
            meta_train[DEPTH_COLUMN].values.reshape(-1)):
        break

    meta_train_split, meta_valid_split = meta_train.iloc[
        train_idx], meta_train.iloc[valid_idx]

    if DEV_MODE:
        meta_train_split = meta_train_split.sample(PARAMS.dev_mode_size,
                                                   random_state=SEED)
        meta_valid_split = meta_valid_split.sample(int(PARAMS.dev_mode_size /
                                                       2),
                                                   random_state=SEED)

    data = {
        'input': {
            'meta': meta_train_split
        },
        'callback_input': {
            'meta_valid': meta_valid_split
        }
    }

    pipeline_network = unet(config=CONFIG, train_mode=True)
    pipeline_network.clean_cache()
    pipeline_network.fit_transform(data)
    pipeline_network.clean_cache()
def evaluate_cv():
    meta = pd.read_csv(PARAMS.metadata_filepath)
    if DEV_MODE:
        meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED)

    meta_train = meta[meta['is_train'] == 1]

    with neptune.create_experiment(name=EXPERIMENT_NAME,
                                   params=PARAMS,
                                   tags=TAGS + ['evaluate', 'on_cv_folds'],
                                   upload_source_files=get_filepaths(),
                                   properties={'experiment_dir': EXPERIMENT_DIR}):

        cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED)

        fold_iou, fold_iout = [], []
        for fold_id, (train_idx, valid_idx) in enumerate(cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))):
            valid_data_split = meta_train.iloc[valid_idx]

            LOGGER.info('Started fold {}'.format(fold_id))
            iou, iout, _ = fold_evaluate_loop(valid_data_split, fold_id)
            LOGGER.info('Fold {} IOU {}'.format(fold_id, iou))
            neptune.send_metric('Fold {} IOU'.format(fold_id), iou)
            LOGGER.info('Fold {} IOUT {}'.format(fold_id, iout))
            neptune.send_metric('Fold {} IOUT'.format(fold_id), iout)

            fold_iou.append(iou)
            fold_iout.append(iout)

        iou_mean, iou_std = np.mean(fold_iou), np.std(fold_iou)
        iout_mean, iout_std = np.mean(fold_iout), np.std(fold_iout)

        log_scores(iou_mean, iou_std, iout_mean, iout_std)
def train():
    meta = pd.read_csv(PARAMS.metadata_filepath)
    meta_train = meta[meta['is_train'] == 1]

    cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED)
    for train_idx, valid_idx in cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1)):
        break

    meta_train_split, meta_valid_split = meta_train.iloc[train_idx], meta_train.iloc[valid_idx]

    if USE_AUXILIARY_DATA:
        auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath)
        train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin(meta_valid_split[ID_COLUMN].tolist())]
        meta_train_split = pd.concat([meta_train_split, train_auxiliary], axis=0)

    if DEV_MODE:
        meta_train_split = meta_train_split.sample(PARAMS.dev_mode_size, random_state=SEED)
        meta_valid_split = meta_valid_split.sample(int(PARAMS.dev_mode_size / 2), random_state=SEED)

    data = {'input': {'meta': meta_train_split
                      },
            'callback_input': {'meta_valid': meta_valid_split
                               }
            }

    pipeline_network = network(config=CONFIG, train_mode=True)
    pipeline_network.clean_cache()
    pipeline_network.fit_transform(data)
    pipeline_network.clean_cache()
def train():
    meta = pd.read_csv(PARAMS.metadata_filepath)
    meta_train = meta[meta['is_train'] == 1]

    cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED)
    for train_idx, valid_idx in cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1)):
        break

    meta_train_split, meta_valid_split = meta_train.iloc[train_idx], meta_train.iloc[valid_idx]

    if USE_AUXILIARY_DATA:
        auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath)
        train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin(meta_valid_split[ID_COLUMN].tolist())]
        meta_train_split = pd.concat([meta_train_split, train_auxiliary], axis=0)

    if DEV_MODE:
        meta_train_split = meta_train_split.sample(PARAMS.dev_mode_size, random_state=SEED)
        meta_valid_split = meta_valid_split.sample(int(PARAMS.dev_mode_size / 2), random_state=SEED)

    with neptune.create_experiment(name=EXPERIMENT_NAME,
                                   params=PARAMS,
                                   tags=TAGS + ['train'],
                                   upload_source_files=get_filepaths(),
                                   properties={'experiment_dir': EXPERIMENT_DIR}):

        data = {'input': {'meta': meta_train_split
                          },
                'callback_input': {'meta_valid': meta_valid_split
                                   }
                }

        pipeline_network = network(config=CONFIG, train_mode=True)
        pipeline_network.clean_cache()
        pipeline_network.fit_transform(data)
        pipeline_network.clean_cache()
def train_evaluate_cv():
    meta = pd.read_csv(PARAMS.metadata_filepath)
    if DEV_MODE:
        meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED)

    meta_train = meta[meta['is_train'] == 1]

    with neptune.create_experiment(name=EXPERIMENT_NAME,
                                   params=PARAMS,
                                   tags=TAGS + ['train', 'evaluate', 'on_cv_folds'],
                                   upload_source_files=get_filepaths(),
                                   properties={'experiment_dir': EXPERIMENT_DIR}):

        cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED)

        fold_auc = []
        for fold_id, (train_idx, valid_idx) in enumerate(cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))):
            train_data_split, valid_data_split = meta_train.iloc[train_idx], meta_train.iloc[valid_idx]

            if USE_AUXILIARY_DATA:
                auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath)
                train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin(valid_data_split[ID_COLUMN].tolist())]
                train_data_split = pd.concat([train_data_split, train_auxiliary], axis=0)

            LOGGER.info('Started fold {}'.format(fold_id))
            auc, _ = fold_fit_evaluate_loop(train_data_split, valid_data_split, fold_id)
            LOGGER.info('Fold {} AUC {}'.format(fold_id, auc))
            neptune.send_metric('Fold {} AUC'.format(fold_id), auc)

            fold_auc.append(auc)

        auc_mean, auc_std = np.mean(fold_auc), np.std(fold_auc)
        log_scores(auc_mean, auc_std)
def train_evaluate_predict_cv():
    meta = pd.read_csv(PARAMS.metadata_filepath)
    if DEV_MODE:
        meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED)

    meta_train = meta[meta['is_train'] == 1]
    meta_test = meta[meta['is_train'] == 0]

    with neptune.create_experiment(name=EXPERIMENT_NAME,
                                   params=PARAMS,
                                   tags=TAGS + ['train', 'evaluate', 'predict', 'on_cv_folds'],
                                   upload_source_files=get_filepaths(),
                                   properties={'experiment_dir': EXPERIMENT_DIR}):

        cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED)

        fold_iou, fold_iout, out_of_fold_train_predictions, out_of_fold_test_predictions = [], [], [], []
        for fold_id, (train_idx, valid_idx) in enumerate(cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))):
            train_data_split, valid_data_split = meta_train.iloc[train_idx], meta_train.iloc[valid_idx]

            if USE_AUXILIARY_DATA:
                auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath)
                train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin(valid_data_split[ID_COLUMN].tolist())]
                train_data_split = pd.concat([train_data_split, train_auxiliary], axis=0)

            LOGGER.info('Started fold {}'.format(fold_id))
            iou, iout, out_of_fold_prediction, test_prediction = fold_fit_evaluate_predict_loop(train_data_split,
                                                                                                valid_data_split,
                                                                                                meta_test,
                                                                                                fold_id)

            LOGGER.info('Fold {} IOU {}'.format(fold_id, iou))
            neptune.send_metric('Fold {} IOU'.format(fold_id), iou)
            LOGGER.info('Fold {} IOUT {}'.format(fold_id, iout))
            neptune.send_metric('Fold {} IOUT'.format(fold_id), iout)

            fold_iou.append(iou)
            fold_iout.append(iout)
            out_of_fold_train_predictions.append(out_of_fold_prediction)
            out_of_fold_test_predictions.append(test_prediction)

        train_ids, train_predictions = [], []
        for idx_fold, train_pred_fold in out_of_fold_train_predictions:
            train_ids.extend(idx_fold)
            train_predictions.extend(train_pred_fold)

        iou_mean, iou_std = np.mean(fold_iou), np.std(fold_iou)
        iout_mean, iout_std = np.mean(fold_iout), np.std(fold_iout)

        log_scores(iou_mean, iou_std, iout_mean, iout_std)

        save_predictions(train_ids, train_predictions, meta_test, out_of_fold_test_predictions)
def evaluate():
    meta = pd.read_csv(PARAMS.metadata_filepath)
    meta_train = meta[meta['is_train'] == 1]

    cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits,
                                  shuffle=PARAMS.shuffle,
                                  random_state=SEED)
    for train_idx, valid_idx in cv.split(
            meta_train[DEPTH_COLUMN].values.reshape(-1)):
        break

    meta_valid_split = meta_train.iloc[valid_idx]
    y_true_valid = utils.read_masks(meta_valid_split[Y_COLUMN].values)

    if DEV_MODE:
        meta_valid_split = meta_valid_split.sample(PARAMS.dev_mode_size,
                                                   random_state=SEED)

    data = {
        'input': {
            'meta': meta_valid_split,
        },
        'callback_input': {
            'meta_valid': None
        }
    }
    pipeline_network = unet(config=CONFIG, train_mode=False)
    pipeline_postprocessing = pipelines.mask_postprocessing(config=CONFIG)
    pipeline_network.clean_cache()
    output = pipeline_network.transform(data)
    valid_masks = {'input_masks': output}
    output = pipeline_postprocessing.transform(valid_masks)
    pipeline_network.clean_cache()
    pipeline_postprocessing.clean_cache()
    y_pred_valid = output['binarized_images']

    LOGGER.info('Calculating IOU and IOUT Scores')
    iou_score, iout_score = calculate_scores(y_true_valid, y_pred_valid)
    LOGGER.info('IOU score on validation is {}'.format(iou_score))
    CTX.channel_send('IOU', 0, iou_score)
    LOGGER.info('IOUT score on validation is {}'.format(iout_score))
    CTX.channel_send('IOUT', 0, iout_score)

    results_filepath = os.path.join(EXPERIMENT_DIR, 'validation_results.pkl')
    LOGGER.info('Saving validation results to {}'.format(results_filepath))
    joblib.dump((meta_valid_split, y_true_valid, y_pred_valid),
                results_filepath)
Exemplo n.º 10
0
def train_evaluate_predict_cv():
    meta = pd.read_csv(PARAMS.metadata_filepath)
    if DEV_MODE:
        meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED)

    meta_train = meta[meta['is_train'] == 1]
    meta_test = meta[meta['is_train'] == 0]

    cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits,
                                  shuffle=PARAMS.shuffle,
                                  random_state=SEED)

    fold_auc, out_of_fold_train_predictions, out_of_fold_test_predictions = [], [], []
    for fold_id, (train_idx, valid_idx) in enumerate(
            cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))):
        train_data_split, valid_data_split = meta_train.iloc[
            train_idx], meta_train.iloc[valid_idx]

        if USE_AUXILIARY_DATA:
            auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath)
            train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin(
                valid_data_split[ID_COLUMN].tolist())]
            train_data_split = pd.concat([train_data_split, train_auxiliary],
                                         axis=0)

        LOGGER.info('Started fold {}'.format(fold_id))
        auc, out_of_fold_prediction, test_prediction = fold_fit_evaluate_predict_loop(
            train_data_split, valid_data_split, meta_test, fold_id)

        LOGGER.info('Fold {} AUC {}'.format(fold_id, auc))
        CTX.channel_send('Fold {} AUC'.format(fold_id), 0, auc)

        fold_auc.append(auc)
        out_of_fold_train_predictions.append(out_of_fold_prediction)
        out_of_fold_test_predictions.append(test_prediction)

    train_ids, train_predictions = [], []
    for idx_fold, train_pred_fold in out_of_fold_train_predictions:
        train_ids.extend(idx_fold)
        train_predictions.extend(train_pred_fold)

    auc_mean, auc_std = np.mean(fold_auc), np.std(fold_auc)
    log_scores(auc_mean, auc_std)
    save_predictions(train_ids, train_predictions, meta_test,
                     out_of_fold_test_predictions)
def train_evaluate_predict_cv():
    meta = pd.read_csv(PARAMS.metadata_filepath)
    if DEV_MODE:
        meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED)

    meta_train = meta[meta['is_train'] == 1]
    meta_test = meta[meta['is_train'] == 0]

    cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits,
                                  shuffle=PARAMS.shuffle,
                                  random_state=SEED)

    fold_iou, fold_iout, out_of_fold_train_predictions, out_of_fold_test_predictions = [], [], [], []
    for fold_id, (train_idx, valid_idx) in enumerate(
            cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))):
        train_data_split, valid_data_split = meta_train.iloc[
            train_idx], meta_train.iloc[valid_idx]

        LOGGER.info('Started fold {}'.format(fold_id))
        iou, iout, out_of_fold_prediction, test_prediction = fold_fit_evaluate_predict_loop(
            train_data_split, valid_data_split, meta_test, fold_id)

        LOGGER.info('Fold {} IOU {}'.format(fold_id, iou))
        CTX.channel_send('Fold {} IOU'.format(fold_id), 0, iou)
        LOGGER.info('Fold {} IOUT {}'.format(fold_id, iout))
        CTX.channel_send('Fold {} IOUT'.format(fold_id), 0, iout)

        fold_iou.append(iou)
        fold_iout.append(iout)
        out_of_fold_train_predictions.append(out_of_fold_prediction)
        out_of_fold_test_predictions.append(test_prediction)

    train_ids, train_predictions = [], []
    for idx_fold, train_pred_fold in out_of_fold_train_predictions:
        train_ids.extend(idx_fold)
        train_predictions.extend(train_pred_fold)

    iou_mean, iou_std = np.mean(fold_iou), np.std(fold_iou)
    iout_mean, iout_std = np.mean(fold_iout), np.std(fold_iout)

    log_scores(iou_mean, iou_std, iout_mean, iout_std)

    save_predictions(train_ids, train_predictions, meta_test,
                     out_of_fold_test_predictions)
def evaluate_predict_cv():
    meta = pd.read_csv(PARAMS.metadata_filepath)
    if DEV_MODE:
        meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED)

    meta_train = meta[meta['is_train'] == 1]
    meta_test = meta[meta['is_train'] == 0]

    with neptune.create_experiment(name=EXPERIMENT_NAME,
                                   params=PARAMS,
                                   tags=TAGS + ['evaluate', 'predict', 'on_cv_folds'],
                                   upload_source_files=get_filepaths(),
                                   properties={'experiment_dir': EXPERIMENT_DIR}):

        cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED)

        fold_auc, out_of_fold_train_predictions, out_of_fold_test_predictions = [], [], []
        for fold_id, (train_idx, valid_idx) in enumerate(cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))):
            valid_data_split = meta_train.iloc[valid_idx]

            LOGGER.info('Started fold {}'.format(fold_id))
            auc, out_of_fold_prediction, test_prediction = fold_evaluate_predict_loop(valid_data_split,
                                                                                      meta_test,
                                                                                      fold_id)

            LOGGER.info('Fold {} AUC {}'.format(fold_id, auc))
            neptune.send_metric('Fold {} AUC'.format(fold_id), auc)

            fold_auc.append(auc)
            out_of_fold_train_predictions.append(out_of_fold_prediction)
            out_of_fold_test_predictions.append(test_prediction)

        train_ids, train_predictions = [], []
        for idx_fold, train_pred_fold in out_of_fold_train_predictions:
            train_ids.extend(idx_fold)
            train_predictions.extend(train_pred_fold)

        auc_mean, auc_std = np.mean(fold_auc), np.std(fold_auc)
        log_scores(auc_mean, auc_std)
        save_predictions(train_ids, train_predictions, meta_test, out_of_fold_test_predictions)