示例#1
0
    config_name = args.config.split("/")[-1].replace(".yml", "")
    expdir = Path(f"out/{config_name}")
    if not expdir.exists():
        print(f"You need to train {config_name} first!")
        sys.exit(1)
    submission_file_dir = expdir / "submission"
    submission_file_dir.mkdir(parents=True, exist_ok=True)

    logger = utils.get_logger(expdir / "tta.log")

    # environment
    utils.set_seed(global_params["seed"])
    device = training.get_device(global_params["device"])

    # data
    tp, fp, train_all, test_all, train_audio, test_audio = datasets.get_metadata(
        config)
    submission = pd.read_csv(config["data"]["sample_submission_path"])

    # labels
    labels = []
    duration = config["dataset"]["valid"]["params"]["duration"]
    for _, sample in tp.iterrows():
        t_min = sample["t_min"]
        t_max = sample["t_max"]
        flac_id = sample["recording_id"]
        call_duration = t_max - t_min
        relative_offset = (duration - call_duration) / 2

        offset = min(max(0, t_min - relative_offset), 60 - duration)
        tail = offset + duration
示例#2
0
    global_params = config["globals"]

    # logging
    config_name = args.config.split("/")[-1].replace(".yml", "")
    logdir = Path(f"out/{config_name}")
    logdir.mkdir(exist_ok=True, parents=True)

    logger = utils.get_logger(logdir / "output.log")

    # environment
    utils.set_seed(global_params["seed"])
    device = training.get_device(global_params["device"])

    # data
    tp, fp, train_all, _, train_audio, _ = datasets.get_metadata(config)
    # validation
    splitter = training.get_split(config)

    ##################################################
    # Main Loop #
    ##################################################
    for i, (trn_idx, val_idx) in enumerate(splitter.split(train_all)):
        if i not in global_params["folds"]:
            continue
        logger.info("=" * 20)
        logger.info(f"Fold {i}")
        logger.info("=" * 20)

        trn_df = train_all.loc[trn_idx, :].reset_index(drop=True)
        val_df = train_all.loc[val_idx, :].reset_index(drop=True)
示例#3
0
    config_name = args.config.split("/")[-1].replace(".yml", "")
    expdir = Path(f"out/{config_name}")
    expdir.mkdir(exist_ok=True, parents=True)

    logger = utils.get_logger(expdir / "ensemble.log")

    oofs = []
    submissions = []
    names = []
    for result_dict in config["results"]:
        oofs.append(pd.read_csv(result_dict["oof"]))
        submissions.append(pd.read_csv(result_dict["submission"]))
        names.append(result_dict["name"])

    tp, _, _, _, _, _ = datasets.get_metadata(config)
    indices = tp[["index"]]

    for i in range(len(oofs)):
        oofs[i] = indices.merge(oofs[i], on="index", how="left")

    labels = []
    for _, sample in tp.iterrows():
        t_min = sample["t_min"]
        t_max = sample["t_max"]
        flac_id = sample["recording_id"]
        call_duration = t_max - t_min
        relative_offset = (10 - call_duration) / 2

        offset = min(max(0, t_min - relative_offset), 60 - 10)
        tail = offset + 10