Exemplo n.º 1
0
        with timer(name="generate features"):
            generate_features(
                train_df=train,
                test_df=test,
                namespace=globals(),
                required=config["features"],
                use_cudf=True,
                overwrite=args.force,
                log=True,
            )

        del train, test
        gc.collect()

    if args.dryrun:
        slack_notify("特徴量作り終わったぞ")
        exit(0)

    with timer("feature loading"):
        x_train, x_test = load_features(config)
        x_train.columns = [
            "".join(c if c.isalnum() else "_" for c in str(x))
            for x in x_train.columns
        ]
        x_test.columns = [
            "".join(c if c.isalnum() else "_" for c in str(x))
            for x in x_test.columns
        ]

    with timer("delete duplicated columns"):
        x_train = delete_duplicated_columns(x_train)
Exemplo n.º 2
0
                "mst_video_template_id",
            ],
            keep="last",
        )

    # https://www.guruguru.science/competitions/12/discussions/b6b3dd96-1dc9-4e03-be99-6e4dcde75e61/
    # https://www.guruguru.science/competitions/12/discussions/12aa6010-778c-4d79-a260-2296817776f1/
    with timer("merging"):
        train = merge_all(
            train,
            campaign,
            map_game_feed_native_video_assets,
            advertiser_video,
            advertiser_converted_video,
        ).sort_values("imp_at")
        test = merge_all(
            test,
            campaign,
            map_game_feed_native_video_assets,
            advertiser_video,
            advertiser_converted_video,
        ).sort_values("imp_at")

    with timer("saving"):
        with timer("save train"):
            train.to_feather(output_dir + "train_merged.ftr")
        with timer("save test"):
            test.to_feather(output_dir + "test_merged.ftr")

    slack_notify("create_train_test 終わったぞ")
Exemplo n.º 3
0
    im_dir = 'data/out/' + run_id + '/'
    notes_dir = 'notes/' + run_id + '/'
    try:
        os.makedirs(log_dir)
        os.makedirs(snapshot_dir)
        os.makedirs(im_dir)
        os.makedirs(notes_dir)
    except OSError:
        pass

    # Write to notes file
    with open(notes_dir + 'info.txt', 'w') as fp:
        fp.write(run_id + '\n')
        fp.write('Hyperparameters' + '\n')
        fp.write('# Iterations: ' + str(iterations) + '\n')
        fp.write('Learning Rate: ' + str(base_lr) + '\n')
        fp.write('Initial Temperature: ' + str(t) + '\n')
        fp.write('Template Folder: ' + template_folder + '\n')
        fp.write('Notes: ' + '\n')
        if notes is not None:
            fp.write(notes + '\n')

    # Start Training
    print(train)
    m.train()
else:
    m.predict(model_path)

slack_msg = 'Experiment done on gpu #' + str(gpu) + ' on Delta'
slack_notify('nariman_saftarli', slack_msg)
Exemplo n.º 4
0
            # test = cudf.read_feather(input_dir / "test.ftr")
        with timer(name="generate features"):
            generate_features(
                train_df=train,
                test_df=test,
                namespace=globals(),
                required=config["features"],
                overwrite=args.force,
                log=True,
            )

        del train, test
        gc.collect()

    if args.dryrun:
        slack_notify("特徴量作り終わったぞ")
        exit(0)

    with timer("feature loading"):
        x_train, x_test = load_features(config)
        x_train.columns = [
            "".join(c if c.isalnum() else "_" for c in str(x))
            for x in x_train.columns
        ]
        x_test.columns = [
            "".join(c if c.isalnum() else "_" for c in str(x))
            for x in x_test.columns
        ]
        categorical_cols = x_train.select_dtypes("category").columns
        x_train = x_train.to_pandas()
        x_test = x_test.to_pandas()