Пример #1
0
def stack_method():
    if "mean" in args.model:
        all_preds = [
            "data/te.ins5.ffm.out",
            "data/xgb5_test.out",
            "data/te.ins_bag.ffm.out",
            "data/te.ins5.fm.out",
            "data/te.ins3.ffm.out",
            "data/te.ins20.ffm.out",
            "data/xgb3_test.out",
            "data/te.ins.ffm.out",
        ]
    else:
        all_preds = [
            "data/te.ins5.ffm.out",
            "data/xgb5_test.out",
            "data/te.ins_bag.ffm.out",
            "data/te.ins5.fm.out",
            "data/te.ins3.ffm.out",
            "data/te.ins20.ffm.out",
            "data/xgb3_test.out",
            "data/te.ins.ffm.out",
            "data/te.ins2.ffm.out",
        ]
    delta = 1
    now = datetime.now()
    X = []
    true_y = []
    for t, preds in enumerate(read_preds(all_preds)):
        if "nn" in args.model or "xgb" in args.model:
            preds = map(lambda x: relogit(x), preds)
        X.append(preds)
        true_y.append(0)
        if t == delta:
            print "%s: %s" % (t, datetime.now() - now)
            delta *= 2
    if "nn" in args.model:
        model = read_dump("%s_model.dump" % args.model)
        X = np.array(X).astype(np.float32)
        preds = model.predict_proba(X)[:, 1]
    elif "xgb" in args.model:
        dtrain = xgb.DMatrix(np.array(X), label=true_y)
        bst = xgb.Booster({"nthread": 9})
        bst.load_model("%s_model.dump" % args.model)
        preds = bst.predict(dtrain)
    elif args.model == "mean":
        X = np.array(X).astype(np.float32)
        preds = np.mean(X, 1)
    with open("result_%s.csv" % args.model, "w") as outfile:
        outfile.write("ID,IsClick\n")
        avg_p = 0
        cnt = 0
        for ID, p in izip(read_sample("data/sampleSubmission.csv"), preds):
            avg_p += p
            cnt += 1
            outfile.write("%s,%s\n" % (ID, str(p)))
        print cnt, avg_p / cnt
Пример #2
0
def stack_method():
    if "mean" in args.model:
        all_preds = [
            "data/te.ins5.ffm.out",
            "data/xgb5_test.out",
            "data/te.ins_bag.ffm.out",
            "data/te.ins5.fm.out",
            "data/te.ins3.ffm.out",
            "data/te.ins20.ffm.out",
            "data/xgb3_test.out",
            "data/te.ins.ffm.out",
        ]
    else:
        all_preds = [
            "data/te.ins5.ffm.out",
            "data/xgb5_test.out",
            "data/te.ins_bag.ffm.out",
            "data/te.ins5.fm.out",
            "data/te.ins3.ffm.out",
            "data/te.ins20.ffm.out",
            "data/xgb3_test.out",
            "data/te.ins.ffm.out",
            "data/te.ins2.ffm.out",
        ]
    delta = 1
    now = datetime.now()
    X = []
    true_y = []
    for t, preds in enumerate(read_preds(all_preds)):
        if "nn" in args.model or "xgb" in args.model:
            preds = map(lambda x: relogit(x), preds)
        X.append(preds)
        true_y.append(0)
        if t == delta:
            print "%s: %s" % (t, datetime.now() - now)
            delta *= 2
    if "nn" in args.model:
        model = read_dump("%s_model.dump" % args.model)
        X = np.array(X).astype(np.float32)
        preds = model.predict_proba(X)[:, 1]
    elif "xgb" in args.model:
        dtrain = xgb.DMatrix(np.array(X), label=true_y)
        bst = xgb.Booster({'nthread': 9})
        bst.load_model("%s_model.dump" % args.model)
        preds = bst.predict(dtrain)
    elif args.model == "mean":
        X = np.array(X).astype(np.float32)
        preds = np.mean(X, 1)
    with open("result_%s.csv" % args.model, 'w') as outfile:
        outfile.write('ID,IsClick\n')
        avg_p = 0
        cnt = 0
        for ID, p in izip(read_sample("data/sampleSubmission.csv"), preds):
            avg_p += p
            cnt += 1
            outfile.write('%s,%s\n' % (ID, str(p)))
        print cnt, avg_p / cnt
Пример #3
0
    parser.add_argument('--seed', type=int, default=9)
    parser.add_argument('--date', type=int, default=0)
    parser.add_argument('--log', type=int, default=1)
    args = parser.parse_args()

    if args.mongo:
        from pymongo import MongoClient
        import functools32 as functools
        client = MongoClient('localhost', 27017)
        db = client.test
        @functools.lru_cache(maxsize=1000000)
        def get_ad_info(aid):
            ad_info = db.ad_info.find_one({"AdID": aid})
            return trans_ad_info(ad_info)

    uid_cnt_dict = read_dump("data/uid_cnt.dump")
    adid_cnt_dict = read_dump("data/adid_cnt.dump")
    ipid_cnt_dict = read_dump("data/ipid_cnt.dump")
    query_cnt_dict = read_dump("data/query_cnt.dump")
    title_cnt_dict = read_dump("data/title_cnt.dump")
    query_param_cnt_dict = read_dump("data/query_param_cnt.dump")
    ad_param_cnt_dict = read_dump("data/ad_param_cnt.dump")

    user_info_map = get_user_info()
    category_map = get_category()
    user_cnt_iter = read_tsv("data/user_cnt.csv", delimiter=",")
    user_aid_cnt_iter = next_row(read_tsv("data/user_aid_cnt.csv", delimiter=","))
    main()


Пример #4
0
    parser.add_argument('--seed', type=int, default=9)
    parser.add_argument('--date', type=int, default=0)
    parser.add_argument('--log', type=int, default=1)
    args = parser.parse_args()

    if args.mongo:
        from pymongo import MongoClient
        import functools32 as functools
        client = MongoClient('localhost', 27017)
        db = client.test

        @functools.lru_cache(maxsize=1000000)
        def get_ad_info(aid):
            ad_info = db.ad_info.find_one({"AdID": aid})
            return trans_ad_info(ad_info)

    uid_cnt_dict = read_dump("data/uid_cnt.dump")
    adid_cnt_dict = read_dump("data/adid_cnt.dump")
    ipid_cnt_dict = read_dump("data/ipid_cnt.dump")
    query_cnt_dict = read_dump("data/query_cnt.dump")
    title_cnt_dict = read_dump("data/title_cnt.dump")
    query_param_cnt_dict = read_dump("data/query_param_cnt.dump")
    ad_param_cnt_dict = read_dump("data/ad_param_cnt.dump")

    user_info_map = get_user_info()
    category_map = get_category()
    user_cnt_iter = read_tsv("data/user_cnt.csv", delimiter=",")
    user_aid_cnt_iter = next_row(
        read_tsv("data/user_aid_cnt.csv", delimiter=","))
    main()
Пример #5
0
    parser.add_argument('--seed', type=int, default=9)
    parser.add_argument('--date', type=int, default=0)
    parser.add_argument('--log', type=int, default=1)
    args = parser.parse_args()

    if args.mongo:
        from pymongo import MongoClient
        import functools32 as functools
        client = MongoClient('localhost', 27017)
        db = client.test
        @functools.lru_cache(maxsize=1000000)
        def get_ad_info(aid):
            ad_info = db.ad_info.find_one({"AdID": aid})
            return trans_ad_info(ad_info)

    uid_cnt_dict = read_dump("uid_cnt.dump")
    adid_cnt_dict = read_dump("adid_cnt.dump")
    ipid_cnt_dict = read_dump("ipid_cnt.dump")
    query_cnt_dict = read_dump("query_cnt.dump")
    title_cnt_dict = read_dump("title_cnt.dump")
    query_param_cnt_dict = read_dump("query_param_cnt.dump")
    ad_param_cnt_dict = read_dump("ad_param_cnt.dump")

    user_info_map = get_user_info()
    category_map = get_category()
    user_cnt_iter = read_tsv("data/user_cnt.csv", delimiter=",")
    user_aid_cnt_iter = next_row(read_tsv("data/user_aid_cnt.csv", delimiter=","))
    main()