def evaluate_cv(): meta = pd.read_csv(PARAMS.metadata_filepath) if DEV_MODE: meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED) meta_train = meta[meta['is_train'] == 1] cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) fold_iou, fold_iout = [], [] for fold_id, (train_idx, valid_idx) in enumerate( cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))): valid_data_split = meta_train.iloc[valid_idx] LOGGER.info('Started fold {}'.format(fold_id)) iou, iout, _ = fold_evaluate_loop(valid_data_split, fold_id) LOGGER.info('Fold {} IOU {}'.format(fold_id, iou)) CTX.channel_send('Fold {} IOU'.format(fold_id), 0, iou) LOGGER.info('Fold {} IOUT {}'.format(fold_id, iout)) CTX.channel_send('Fold {} IOUT'.format(fold_id), 0, iout) fold_iou.append(iou) fold_iout.append(iout) iou_mean, iou_std = np.mean(fold_iou), np.std(fold_iou) iout_mean, iout_std = np.mean(fold_iout), np.std(fold_iout) log_scores(iou_mean, iou_std, iout_mean, iout_std)
def train_evaluate_cv(): meta = pd.read_csv(PARAMS.metadata_filepath) if DEV_MODE: meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED) meta_train = meta[meta['is_train'] == 1] cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) fold_iou, fold_iout = [], [] for fold_id, (train_idx, valid_idx) in enumerate(cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))): train_data_split, valid_data_split = meta_train.iloc[train_idx], meta_train.iloc[valid_idx] if USE_AUXILIARY_DATA: auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath) train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin(valid_data_split[ID_COLUMN].tolist())] train_data_split = pd.concat([train_data_split, train_auxiliary], axis=0) LOGGER.info('Started fold {}'.format(fold_id)) iou, iout, _ = fold_fit_evaluate_loop(train_data_split, valid_data_split, fold_id) LOGGER.info('Fold {} IOU {}'.format(fold_id, iou)) CTX.channel_send('Fold {} IOU'.format(fold_id), 0, iou) LOGGER.info('Fold {} IOUT {}'.format(fold_id, iout)) CTX.channel_send('Fold {} IOUT'.format(fold_id), 0, iout) fold_iou.append(iou) fold_iout.append(iout) iou_mean, iou_std = np.mean(fold_iou), np.std(fold_iou) iout_mean, iout_std = np.mean(fold_iout), np.std(fold_iout) log_scores(iou_mean, iou_std, iout_mean, iout_std)
def train(): meta = pd.read_csv(PARAMS.metadata_filepath) meta_train = meta[meta['is_train'] == 1] cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) for train_idx, valid_idx in cv.split( meta_train[DEPTH_COLUMN].values.reshape(-1)): break meta_train_split, meta_valid_split = meta_train.iloc[ train_idx], meta_train.iloc[valid_idx] if DEV_MODE: meta_train_split = meta_train_split.sample(PARAMS.dev_mode_size, random_state=SEED) meta_valid_split = meta_valid_split.sample(int(PARAMS.dev_mode_size / 2), random_state=SEED) data = { 'input': { 'meta': meta_train_split }, 'callback_input': { 'meta_valid': meta_valid_split } } pipeline_network = unet(config=CONFIG, train_mode=True) pipeline_network.clean_cache() pipeline_network.fit_transform(data) pipeline_network.clean_cache()
def evaluate_cv(): meta = pd.read_csv(PARAMS.metadata_filepath) if DEV_MODE: meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED) meta_train = meta[meta['is_train'] == 1] with neptune.create_experiment(name=EXPERIMENT_NAME, params=PARAMS, tags=TAGS + ['evaluate', 'on_cv_folds'], upload_source_files=get_filepaths(), properties={'experiment_dir': EXPERIMENT_DIR}): cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) fold_iou, fold_iout = [], [] for fold_id, (train_idx, valid_idx) in enumerate(cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))): valid_data_split = meta_train.iloc[valid_idx] LOGGER.info('Started fold {}'.format(fold_id)) iou, iout, _ = fold_evaluate_loop(valid_data_split, fold_id) LOGGER.info('Fold {} IOU {}'.format(fold_id, iou)) neptune.send_metric('Fold {} IOU'.format(fold_id), iou) LOGGER.info('Fold {} IOUT {}'.format(fold_id, iout)) neptune.send_metric('Fold {} IOUT'.format(fold_id), iout) fold_iou.append(iou) fold_iout.append(iout) iou_mean, iou_std = np.mean(fold_iou), np.std(fold_iou) iout_mean, iout_std = np.mean(fold_iout), np.std(fold_iout) log_scores(iou_mean, iou_std, iout_mean, iout_std)
def train(): meta = pd.read_csv(PARAMS.metadata_filepath) meta_train = meta[meta['is_train'] == 1] cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) for train_idx, valid_idx in cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1)): break meta_train_split, meta_valid_split = meta_train.iloc[train_idx], meta_train.iloc[valid_idx] if USE_AUXILIARY_DATA: auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath) train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin(meta_valid_split[ID_COLUMN].tolist())] meta_train_split = pd.concat([meta_train_split, train_auxiliary], axis=0) if DEV_MODE: meta_train_split = meta_train_split.sample(PARAMS.dev_mode_size, random_state=SEED) meta_valid_split = meta_valid_split.sample(int(PARAMS.dev_mode_size / 2), random_state=SEED) data = {'input': {'meta': meta_train_split }, 'callback_input': {'meta_valid': meta_valid_split } } pipeline_network = network(config=CONFIG, train_mode=True) pipeline_network.clean_cache() pipeline_network.fit_transform(data) pipeline_network.clean_cache()
def train(): meta = pd.read_csv(PARAMS.metadata_filepath) meta_train = meta[meta['is_train'] == 1] cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) for train_idx, valid_idx in cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1)): break meta_train_split, meta_valid_split = meta_train.iloc[train_idx], meta_train.iloc[valid_idx] if USE_AUXILIARY_DATA: auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath) train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin(meta_valid_split[ID_COLUMN].tolist())] meta_train_split = pd.concat([meta_train_split, train_auxiliary], axis=0) if DEV_MODE: meta_train_split = meta_train_split.sample(PARAMS.dev_mode_size, random_state=SEED) meta_valid_split = meta_valid_split.sample(int(PARAMS.dev_mode_size / 2), random_state=SEED) with neptune.create_experiment(name=EXPERIMENT_NAME, params=PARAMS, tags=TAGS + ['train'], upload_source_files=get_filepaths(), properties={'experiment_dir': EXPERIMENT_DIR}): data = {'input': {'meta': meta_train_split }, 'callback_input': {'meta_valid': meta_valid_split } } pipeline_network = network(config=CONFIG, train_mode=True) pipeline_network.clean_cache() pipeline_network.fit_transform(data) pipeline_network.clean_cache()
def train_evaluate_cv(): meta = pd.read_csv(PARAMS.metadata_filepath) if DEV_MODE: meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED) meta_train = meta[meta['is_train'] == 1] with neptune.create_experiment(name=EXPERIMENT_NAME, params=PARAMS, tags=TAGS + ['train', 'evaluate', 'on_cv_folds'], upload_source_files=get_filepaths(), properties={'experiment_dir': EXPERIMENT_DIR}): cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) fold_auc = [] for fold_id, (train_idx, valid_idx) in enumerate(cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))): train_data_split, valid_data_split = meta_train.iloc[train_idx], meta_train.iloc[valid_idx] if USE_AUXILIARY_DATA: auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath) train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin(valid_data_split[ID_COLUMN].tolist())] train_data_split = pd.concat([train_data_split, train_auxiliary], axis=0) LOGGER.info('Started fold {}'.format(fold_id)) auc, _ = fold_fit_evaluate_loop(train_data_split, valid_data_split, fold_id) LOGGER.info('Fold {} AUC {}'.format(fold_id, auc)) neptune.send_metric('Fold {} AUC'.format(fold_id), auc) fold_auc.append(auc) auc_mean, auc_std = np.mean(fold_auc), np.std(fold_auc) log_scores(auc_mean, auc_std)
def train_evaluate_predict_cv(): meta = pd.read_csv(PARAMS.metadata_filepath) if DEV_MODE: meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED) meta_train = meta[meta['is_train'] == 1] meta_test = meta[meta['is_train'] == 0] with neptune.create_experiment(name=EXPERIMENT_NAME, params=PARAMS, tags=TAGS + ['train', 'evaluate', 'predict', 'on_cv_folds'], upload_source_files=get_filepaths(), properties={'experiment_dir': EXPERIMENT_DIR}): cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) fold_iou, fold_iout, out_of_fold_train_predictions, out_of_fold_test_predictions = [], [], [], [] for fold_id, (train_idx, valid_idx) in enumerate(cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))): train_data_split, valid_data_split = meta_train.iloc[train_idx], meta_train.iloc[valid_idx] if USE_AUXILIARY_DATA: auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath) train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin(valid_data_split[ID_COLUMN].tolist())] train_data_split = pd.concat([train_data_split, train_auxiliary], axis=0) LOGGER.info('Started fold {}'.format(fold_id)) iou, iout, out_of_fold_prediction, test_prediction = fold_fit_evaluate_predict_loop(train_data_split, valid_data_split, meta_test, fold_id) LOGGER.info('Fold {} IOU {}'.format(fold_id, iou)) neptune.send_metric('Fold {} IOU'.format(fold_id), iou) LOGGER.info('Fold {} IOUT {}'.format(fold_id, iout)) neptune.send_metric('Fold {} IOUT'.format(fold_id), iout) fold_iou.append(iou) fold_iout.append(iout) out_of_fold_train_predictions.append(out_of_fold_prediction) out_of_fold_test_predictions.append(test_prediction) train_ids, train_predictions = [], [] for idx_fold, train_pred_fold in out_of_fold_train_predictions: train_ids.extend(idx_fold) train_predictions.extend(train_pred_fold) iou_mean, iou_std = np.mean(fold_iou), np.std(fold_iou) iout_mean, iout_std = np.mean(fold_iout), np.std(fold_iout) log_scores(iou_mean, iou_std, iout_mean, iout_std) save_predictions(train_ids, train_predictions, meta_test, out_of_fold_test_predictions)
def evaluate(): meta = pd.read_csv(PARAMS.metadata_filepath) meta_train = meta[meta['is_train'] == 1] cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) for train_idx, valid_idx in cv.split( meta_train[DEPTH_COLUMN].values.reshape(-1)): break meta_valid_split = meta_train.iloc[valid_idx] y_true_valid = utils.read_masks(meta_valid_split[Y_COLUMN].values) if DEV_MODE: meta_valid_split = meta_valid_split.sample(PARAMS.dev_mode_size, random_state=SEED) data = { 'input': { 'meta': meta_valid_split, }, 'callback_input': { 'meta_valid': None } } pipeline_network = unet(config=CONFIG, train_mode=False) pipeline_postprocessing = pipelines.mask_postprocessing(config=CONFIG) pipeline_network.clean_cache() output = pipeline_network.transform(data) valid_masks = {'input_masks': output} output = pipeline_postprocessing.transform(valid_masks) pipeline_network.clean_cache() pipeline_postprocessing.clean_cache() y_pred_valid = output['binarized_images'] LOGGER.info('Calculating IOU and IOUT Scores') iou_score, iout_score = calculate_scores(y_true_valid, y_pred_valid) LOGGER.info('IOU score on validation is {}'.format(iou_score)) CTX.channel_send('IOU', 0, iou_score) LOGGER.info('IOUT score on validation is {}'.format(iout_score)) CTX.channel_send('IOUT', 0, iout_score) results_filepath = os.path.join(EXPERIMENT_DIR, 'validation_results.pkl') LOGGER.info('Saving validation results to {}'.format(results_filepath)) joblib.dump((meta_valid_split, y_true_valid, y_pred_valid), results_filepath)
def train_evaluate_predict_cv(): meta = pd.read_csv(PARAMS.metadata_filepath) if DEV_MODE: meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED) meta_train = meta[meta['is_train'] == 1] meta_test = meta[meta['is_train'] == 0] cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) fold_auc, out_of_fold_train_predictions, out_of_fold_test_predictions = [], [], [] for fold_id, (train_idx, valid_idx) in enumerate( cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))): train_data_split, valid_data_split = meta_train.iloc[ train_idx], meta_train.iloc[valid_idx] if USE_AUXILIARY_DATA: auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath) train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin( valid_data_split[ID_COLUMN].tolist())] train_data_split = pd.concat([train_data_split, train_auxiliary], axis=0) LOGGER.info('Started fold {}'.format(fold_id)) auc, out_of_fold_prediction, test_prediction = fold_fit_evaluate_predict_loop( train_data_split, valid_data_split, meta_test, fold_id) LOGGER.info('Fold {} AUC {}'.format(fold_id, auc)) CTX.channel_send('Fold {} AUC'.format(fold_id), 0, auc) fold_auc.append(auc) out_of_fold_train_predictions.append(out_of_fold_prediction) out_of_fold_test_predictions.append(test_prediction) train_ids, train_predictions = [], [] for idx_fold, train_pred_fold in out_of_fold_train_predictions: train_ids.extend(idx_fold) train_predictions.extend(train_pred_fold) auc_mean, auc_std = np.mean(fold_auc), np.std(fold_auc) log_scores(auc_mean, auc_std) save_predictions(train_ids, train_predictions, meta_test, out_of_fold_test_predictions)
def train_evaluate_predict_cv(): meta = pd.read_csv(PARAMS.metadata_filepath) if DEV_MODE: meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED) meta_train = meta[meta['is_train'] == 1] meta_test = meta[meta['is_train'] == 0] cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) fold_iou, fold_iout, out_of_fold_train_predictions, out_of_fold_test_predictions = [], [], [], [] for fold_id, (train_idx, valid_idx) in enumerate( cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))): train_data_split, valid_data_split = meta_train.iloc[ train_idx], meta_train.iloc[valid_idx] LOGGER.info('Started fold {}'.format(fold_id)) iou, iout, out_of_fold_prediction, test_prediction = fold_fit_evaluate_predict_loop( train_data_split, valid_data_split, meta_test, fold_id) LOGGER.info('Fold {} IOU {}'.format(fold_id, iou)) CTX.channel_send('Fold {} IOU'.format(fold_id), 0, iou) LOGGER.info('Fold {} IOUT {}'.format(fold_id, iout)) CTX.channel_send('Fold {} IOUT'.format(fold_id), 0, iout) fold_iou.append(iou) fold_iout.append(iout) out_of_fold_train_predictions.append(out_of_fold_prediction) out_of_fold_test_predictions.append(test_prediction) train_ids, train_predictions = [], [] for idx_fold, train_pred_fold in out_of_fold_train_predictions: train_ids.extend(idx_fold) train_predictions.extend(train_pred_fold) iou_mean, iou_std = np.mean(fold_iou), np.std(fold_iou) iout_mean, iout_std = np.mean(fold_iout), np.std(fold_iout) log_scores(iou_mean, iou_std, iout_mean, iout_std) save_predictions(train_ids, train_predictions, meta_test, out_of_fold_test_predictions)
def evaluate_predict_cv(): meta = pd.read_csv(PARAMS.metadata_filepath) if DEV_MODE: meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED) meta_train = meta[meta['is_train'] == 1] meta_test = meta[meta['is_train'] == 0] with neptune.create_experiment(name=EXPERIMENT_NAME, params=PARAMS, tags=TAGS + ['evaluate', 'predict', 'on_cv_folds'], upload_source_files=get_filepaths(), properties={'experiment_dir': EXPERIMENT_DIR}): cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) fold_auc, out_of_fold_train_predictions, out_of_fold_test_predictions = [], [], [] for fold_id, (train_idx, valid_idx) in enumerate(cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))): valid_data_split = meta_train.iloc[valid_idx] LOGGER.info('Started fold {}'.format(fold_id)) auc, out_of_fold_prediction, test_prediction = fold_evaluate_predict_loop(valid_data_split, meta_test, fold_id) LOGGER.info('Fold {} AUC {}'.format(fold_id, auc)) neptune.send_metric('Fold {} AUC'.format(fold_id), auc) fold_auc.append(auc) out_of_fold_train_predictions.append(out_of_fold_prediction) out_of_fold_test_predictions.append(test_prediction) train_ids, train_predictions = [], [] for idx_fold, train_pred_fold in out_of_fold_train_predictions: train_ids.extend(idx_fold) train_predictions.extend(train_pred_fold) auc_mean, auc_std = np.mean(fold_auc), np.std(fold_auc) log_scores(auc_mean, auc_std) save_predictions(train_ids, train_predictions, meta_test, out_of_fold_test_predictions)