def search_averaging_weight_for_each_col(predictions: list, target: np.ndarray, trials=10000): best_score = -np.inf best_weights = np.zeros((len(predictions), 24)) utils.set_seed(1213) for i in tqdm(range(trials)): dice = np.random.rand(len(predictions), 24) weights = dice / dice.sum(axis=0) blended = np.zeros_like(predictions[0], dtype=np.float32) for weight, pred in zip(weights, predictions): blended += weight * pred score_class, class_weight = lwlrap(truth=target, scores=blended) score = (score_class * class_weight).sum() if score > best_score: best_score = score best_weights = weights return {"best_score": best_score, "best_weights": best_weights}
])[[f"s{i}" for i in range(24) ]].mean().reset_index(drop=False) oof_predictions.append(oof_tta_df) tta_pred_df = pd.concat(tta_predictions, axis=0).reset_index(drop=True) tta_pred_df = tta_pred_df.groupby(["recording_id"])[[ f"s{i}" for i in range(24) ]].mean().reset_index(drop=False) fold_predictions.append(tta_pred_df) oof_df = pd.concat(oof_predictions, axis=0).reset_index(drop=True) oof_indices = oof_df[["index"]] ground_truth = oof_indices.merge(ground_truth_df, on="index", how="left") columns = [f"s{i}" for i in range(24)] score_class, weight = lwlrap(ground_truth[columns].values, oof_df[columns].values) score = (score_class * weight).sum() logger.info(f"TTA all LWLRAP: {score:.5f}") class_level_score = {config_name: score_class} for key in oof_tta_dict: tta_df = pd.concat(oof_tta_dict[key], axis=0).reset_index(drop=True) oof_indices = tta_df[["index"]] ground_truth = oof_indices.merge(ground_truth_df, on="index", how="left") score_class, weight = lwlrap(ground_truth[columns].values, tta_df[columns].values) score = (score_class * weight).sum() logger.info(f"TTA {key} LWLRAP: {score:.5f}")
f"Best score {result_dict['best_score']}, Best Weights{result_dict['best_weights']}" ) for i, class_ in enumerate(classes): weights_dict[class_] = result_dict["best_weights"][:, i] else: for class_ in classes: weights_dict[class_] = config["strategy"]["weights"] blended = np.zeros((len(oofs[0]), 24)) class_level_score = {} for class_ in weights_dict: index = classes.index(class_) weights = weights_dict[class_] for weight, oof in zip(weights, oofs): blended[:, index] += weight * oof[class_].values score_class, weight = lwlrap(ground_truth_df[classes].values, blended) score = (score_class * weight).sum() logger.info(f"Blended LWLRAP: {score:5f}") class_level_score["blend_score"] = score class_level_score["blend_weight"] = weights_dict class_level_score["blend"] = score_class for oof, name in zip(oofs, names): score_class, weight = lwlrap(ground_truth_df[classes].values, oof[classes].values) score = (score_class * weight).sum() logger.info(f"Name: {name} LWLRAP: {score:5f}") class_level_score[f"{name}_score"] = score class_level_score[name] = score_class blended_sub = np.zeros((len(submissions[0]), 24))
def eval_one_epoch(model, loader, criterion, device: torch.device, input_key: str, input_target_key: str, epoch: int, writer: SummaryWriter, strong=False): loss_meter = utils.AverageMeter() lwlrap_meter = utils.AverageMeter() model.eval() preds = [] targs = [] recording_ids = [] progress_bar = tqdm(loader, desc="valid") for step, batch in enumerate(progress_bar): with torch.no_grad(): recording_ids.extend(batch["recording_id"]) x = batch[input_key].to(device) y = batch[input_target_key] for key in y: y[key] = y[key].to(device) output = model(x) loss = criterion(output, y).detach() loss_meter.update(loss.item(), n=len(loader)) if strong: clipwise_output = output["framewise_output"].detach().cpu().numpy().max(axis=1) else: clipwise_output = output["clipwise_output"].detach().cpu().numpy() target = y["weak"].detach().cpu().numpy() preds.append(clipwise_output) targs.append(target) score_class, weight = clb.lwlrap(target, clipwise_output) score = (score_class * weight).sum() lwlrap_meter.update(score, n=1) progress_bar.set_description( f"Epoch: {epoch + 1} " f"Step: [{step + 1}/{len(loader)}] " f"loss: {loss_meter.val:.4f} loss(avg) {loss_meter.avg:.4f} " f"lwlrap: {lwlrap_meter.val:.4f} lwlrap(avg) {lwlrap_meter.avg:.4f}") global_step = epoch * len(loader) + step + 1 writer.add_scalar(tag="loss/batch", scalar_value=loss_meter.val, global_step=global_step) writer.add_scalar(tag="lwlrap/batch", scalar_value=lwlrap_meter.val, global_step=global_step) y_pred = np.concatenate(preds, axis=0) y_true = np.concatenate(targs, axis=0) oof_pred_df = pd.DataFrame(y_pred, columns=[f"s{i}" for i in range(y_pred.shape[1])]) oof_pred_df = pd.concat([ pd.DataFrame({"recording_id": recording_ids}), oof_pred_df ], axis=1) oof_targ_df = pd.DataFrame(y_true, columns=[f"s{i}" for i in range(y_pred.shape[1])]) oof_targ_df = pd.concat([ pd.DataFrame({"recording_id": recording_ids}), oof_targ_df ], axis=1) oof_pred_df = oof_pred_df.groupby("recording_id").max().reset_index(drop=True) oof_targ_df = oof_targ_df.groupby("recording_id").max().reset_index(drop=True) score_class, weight = clb.lwlrap(oof_targ_df.values, oof_pred_df.values) score = (score_class * weight).sum() writer.add_scalar(tag="loss/epoch", scalar_value=loss_meter.avg, global_step=epoch + 1) writer.add_scalar(tag="lwlrap/epoch", scalar_value=score, global_step=epoch + 1) return loss_meter.avg, score
def train_one_epoch(model, loader, optimizer, scheduler, criterion, device: torch.device, input_key: str, input_target_key: str, epoch: int, writer: SummaryWriter): loss_meter = utils.AverageMeter() lwlrap_meter = utils.AverageMeter() model.train() preds = [] targs = [] progress_bar = tqdm(loader, desc="train") for step, batch in enumerate(progress_bar): x = batch[input_key].to(device) y = batch[input_target_key] for key in y: y[key] = y[key].to(device) output = model(x) loss = criterion(output, y) if hasattr(optimizer, "first_step"): loss.backward() optimizer.first_step(zero_grad=True) criterion(model(x), y).backward() optimizer.second_step(zero_grad=True) else: optimizer.zero_grad() loss.backward() optimizer.step() loss_meter.update(loss.item(), n=len(loader)) clipwise_output = output["clipwise_output"].detach().cpu().numpy() target = y["weak"].detach().cpu().numpy() preds.append(clipwise_output) targs.append(target) score_class, weight = clb.lwlrap(target, clipwise_output) score = (score_class * weight).sum() lwlrap_meter.update(score, n=1) progress_bar.set_description( f"Epoch: {epoch + 1} " f"Step: [{step + 1}/{len(loader)}] " f"loss: {loss_meter.val:.4f} loss(avg) {loss_meter.avg:.4f} " f"lwlrap: {lwlrap_meter.val:.4f} lwlrap(avg) {lwlrap_meter.avg:.4f}") global_step = epoch * len(loader) + step + 1 writer.add_scalar(tag="loss/batch", scalar_value=loss_meter.val, global_step=global_step) writer.add_scalar(tag="lwlrap/batch", scalar_value=lwlrap_meter.val, global_step=global_step) scheduler.step() y_pred = np.concatenate(preds, axis=0) y_true = np.concatenate(targs, axis=0) score_class, weight = clb.lwlrap(y_true, y_pred) score = (score_class * weight).sum() writer.add_scalar(tag="loss/epoch", scalar_value=loss_meter.avg, global_step=epoch + 1) writer.add_scalar(tag="lwlrap/epoch", scalar_value=score, global_step=epoch + 1) return loss_meter.avg, score
fold_prediction_df = pd.concat([ pd.DataFrame({"recording_id": recording_ids}), fold_prediction_df ], axis=1) fold_prediction_df = fold_prediction_df.groupby( "recording_id").max().reset_index(drop=False) fold_predictions.append(fold_prediction_df) submission_name = "strong.csv" oof_df = pd.concat(oof_predictions, axis=0).reset_index(drop=True) oof_target_df = pd.concat(oof_targets_list, axis=0).reset_index(drop=True) columns = [f"s{i}" for i in range(24)] score_class, weight = lwlrap(oof_target_df[columns].values, oof_df[columns].values) score = (score_class * weight).sum() logger.info(f"Valid LWLRAP: {score:.5f}") class_level_score = {config_name: score_class} utils.save_json(class_level_score, submission_file_dir / "class_level_results.json") oof_df.to_csv(submission_file_dir / oof_name, index=False) folds_prediction_df = pd.concat(fold_predictions, axis=0).reset_index(drop=True) folds_prediction_df = folds_prediction_df.groupby( "recording_id").mean().reset_index(drop=False) assert len(folds_prediction_df) == len(submission), \ "prediction length does not match sample submission length"
assert len(folds_prediction_df) == len(submission), \ "prediction length does not match sample submission length" assert folds_prediction_df.shape[1] == submission.shape[1], \ "number of classes in prediction does not match that of sample submission" assert len(set(folds_prediction_df["recording_id"]) - set(submission["recording_id"])) == 0, \ "recording_id in prediction has unknown value" assert len(set(submission["recording_id"]) - set(folds_prediction_df["recording_id"])) == 0, \ "prediction doesn't have enough recording_id" folds_prediction_df.to_csv(submission_file_dir / "submission.csv", index=False) summary = {} columns = [f"s{i}" for i in range(24)] score_class, weight = clb.lwlrap(oof_target_high[columns].values, oof_df_high[columns].values) score_high = (score_class * weight).sum() logger.info(f"Valid LWLRAP(high): {score_high:.5f}") summary["high"] = { "score": score_high, "score_class": score_class, "weight": weight } score_class, weight = clb.lwlrap(oof_target_low[columns].values, oof_df_low[columns].values) score_low = (score_class * weight).sum() logger.info(f"Valid LWLRAP(low): {score_low:.5f}") summary["low"] = {