Exemplo n.º 1
0
def main():
    if len(sys.argv) < 3:
        print "Usage python test_lr.py campaign_id learn_rate (budget_prop)"
        exit(-1)
    data_folder = "../../make-ipinyou-data/"
    config.campaign_id = int(sys.argv[1])
    # print config.campaign
    # print config.campaign_id
    # exit(-1)
    config.lr_alpha = float(sys.argv[2])
    if len(sys.argv) == 4:
        config.budget_prop = int(sys.argv[3])
    train_path = data_folder + ` config.campaign_id ` + "/train.yzx.txt"
    test_path = data_folder + ` config.campaign_id ` + "/test.yzx.txt"

    train_data = Dataset(train_path, config.campaign_id)
    train_data.shuffle()  # make train data shuffled
    test_data = Dataset(test_path, config.campaign_id)
    print "Load done."

    lr_model = LrModel(train_data, test_data)
    print "campaign v = " + ` lr_model.camp_v `
    print "learn_rate = " + ` config.lr_alpha `
    print "budget = " + ` lr_model.budget `

    if config.ds_ratio > 0:
        print "Need calibration."
    else:
        print "No calibration."

    print "Begin training ..."
    for i in range(0, config.lr_train_round):
        lr_model.train()
        lr_model.test()
        print "Round " + ` i + 1 ` + "\t" + ` tool.get_last_log(
            lr_model.test_log)['performance'] `
        if tool.judge_stop(lr_model.test_log):
            break
    print "Train done."

    log_file = ` config.campaign_id ` + "_lrlin_" + ` config.lr_alpha ` + "_" + ` config.budget_prop ` + ".csv"
    fo = open("../output/" + log_file, 'w')

    print "Begin log ..."
    header = "camp_id\tmodel\tdataset\trevenue\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\tlearn_rate\tnds_ratio\tbudget_prop"
    best_test_log = lr_model.get_best_test_log()
    best_test_line = `config.campaign_id` + "\t" + "LR\ttest\t" \
         + tool.gen_performance_line(best_test_log) + "\t" \
         + 'None' + "\t" + "None" + "\t" + `config.lr_alpha` + "\t" \
         + "None" + "\t" + `config.budget_prop`
    fo.write(header + "\n")
    fo.write(best_test_line + "\n")

    # search for best linear parameter
    opt_param = lr_model.lin_bid(best_test_log['weight'])
    fo.write(
        "prop\trevenue\troi\tctr\tcpc\tauc\trmse\tcpm\timps\tclks\tlin_param\n"
    )
    for prop in config.budget_props:
        performance = lr_model.replay(best_test_log['weight'],
                                      lr_model.test_data, prop)
        fo.write( ` prop `)
        fo.write("\t")
        fo.write( ` performance['revenue'] `)
        fo.write("\t")
        fo.write( ` performance['roi'] `)
        fo.write("\t")
        fo.write( ` performance['ctr'] `)
        fo.write("\t")
        fo.write( ` performance['cpc'] `)
        fo.write("\t")
        fo.write( ` performance['auc'] `)
        fo.write("\t")
        fo.write( ` performance['rmse'] `)
        fo.write("\t")
        fo.write( ` performance['cpm'] `)
        fo.write("\t")
        fo.write( ` performance['imps'] `)
        fo.write("\t")
        fo.write( ` performance['clks'] `)
        fo.write("\t")
        fo.write( ` opt_param `)
        fo.write("\n")

    fo.write("\n")

    fo.write("Round\tTest\tctr\tcpc\tauc\trmse\tcpm\tclks\timps\tbids\n")
    for i in range(0, len(lr_model.test_log)):
        test_log = lr_model.test_log[i]
        line = `i+1` + "\t" + `test_log['performance']['revenue']` \
          + "\t" + `test_log['performance']['ctr']` \
          + "\t" + `test_log['performance']['cpc']` \
          + "\t" + `test_log['performance']['auc']` \
          + "\t" + `test_log['performance']['rmse']` \
          + "\t" + `test_log['performance']['cpm']` \
          + "\t" + `test_log['performance']['clks']` \
          + "\t" + `test_log['performance']['imps']` \
          + "\t" + `test_log['performance']['bids']`
        fo.write(line + "\n")
    fo.close()
    print "Log done."

    weight_path = `config.campaign_id` + "_" + "lrlin_best_weight" \
       + "_" + `config.lr_alpha` + "_" + `config.budget_prop` \
       + ".weight"
    lr_model.output_weight(best_test_log['weight'], "../output/" + weight_path)
def main():
    if len(sys.argv) < 5:
        print "Usage: python test_prectr_joint.py campaign_id laplace eu_scale ds_ratio mkt_alpha mkt_lambda"
        exit(-1)

    config.campaign_id = int(sys.argv[1]) if int(
        sys.argv[1]) in config.campaign_list else config.campaign_id
    config.laplace = int(
        sys.argv[2]) if int(sys.argv[2]) > 0 else config.laplace
    config.eu_scale = float(
        sys.argv[3]) if float(sys.argv[3]) > 0 else config.eu_scale
    config.ds_ratio = float(sys.argv[4]) if float(sys.argv[4]) > 0 else 0
    config.market_alpha = float(sys.argv[5])
    config.market_lambda = float(sys.argv[6])
    print "camp_id\tlaplace\tscale\tds_ratio\tmkt_alpha\tmkt_lambda"
    print `config.campaign_id` + "\t" \
            + `config.laplace` + "\t" \
            + `config.eu_scale` + "\t" \
            + `config.ds_ratio` + "\t" \
            + `config.market_alpha` + "\t" \
            + `config.market_lambda`

    # judge if log and weight existed !!!
    log_file = `config.campaign_id` + "_prectr_joint" \
                + "_" + `config.laplace` \
                + "_" + `config.eu_scale` \
                + "_" + `config.ds_ratio` \
                + "_" + `config.market_alpha` \
        + "_" + `config.market_lambda`\
                + ".csv"
    if tool.judge_file_exists("../output/", log_file):
        print "Existed " + log_file
        exit(0)

    train_path = config.data_folder + ` config.campaign_id ` + "/train.yzx.txt"
    test_path = config.data_folder + ` config.campaign_id ` + "/test.yzx.txt"
    train_data = Dataset(train_path, config.campaign_id)
    train_data.shuffle()  # make train data shuffled
    test_data = Dataset(test_path, config.campaign_id)
    if config.INTVL:
        IntervalLandscape(train_data, train_data.get_camp_id(), config.laplace,
                          3)
        IntervalLandscape(test_data, test_data.get_camp_id(), config.laplace,
                          3)
    else:
        BidLandscape(train_data, train_data.get_camp_id(), config.laplace)
        BidLandscape(test_data, test_data.get_camp_id(), config.laplace)
    print "Load done."

    # downsampling
    if config.ds_ratio > 0:
        train_data_ds = train_data.down_sampling(config.ds_ratio)
    else:
        train_data_ds = train_data
    print "Down sampled."
    print train_data_ds.get_statistics()

    eu_model = EuModel(train_data_ds, test_data)
    print "campaign v = " + ` eu_model.camp_v `

    # pre-train
    print "Begin ctr pre-training ..."
    pre_train_log = []
    for i in range(0, config.eu_train_round):
        eu_model.train()
        eu_model.test()
        pre_train_log.append(copy.deepcopy(eu_model.test_log[-1]))
        print "Round " + ` i + 1 ` + "\t" + ` tool.get_last_log(
            eu_model.test_log)['performance'] `
        if tool.judge_stop(eu_model.test_log):
            break
    print "Ctr pre-training done."
    pre_train_round = len(eu_model.test_log)
    tri_model = TriModel(train_data_ds, test_data, eu_model, None, 'eu')
    eu_model.weight = copy.deepcopy(
        tri_model.get_best_log(pre_train_log)['weight'])

    mkt_model = LinMarket(train_data, test_data)
    mkt_model.set_camp_v(eu_model.camp_v)  # !!! nds camp value
    mkt_model.set_ctr_model(eu_model)
    mkt_model.set_bid_strategy(eu_model.get_bid_strategy())
    print "campaign v = " + ` eu_model.camp_v `

    train_data_ds.set_landscape(mkt_model)
    train_data.set_landscape(mkt_model)

    # train
    print "Begin joint training ..."
    config.PARAM_MARKET = True
    recent_eu_weight = []
    all_anlp = []
    for i in range(0, config.eu_train_round):
        mkt_model.joint_train()
        test_anlp = mkt_model.calc_total_anlp(test_data)
        print "Round " + ` i + 1 ` + " test_anlp: " + ` test_anlp `
        all_anlp.append(test_anlp)
        eu_model.train()
        eu_model.test()
        print "%d %d" % (len(all_anlp), len(eu_model.test_log))
        if i + 1 > 3:
            del recent_eu_weight[0]
        recent_eu_weight.append(copy.deepcopy(eu_model.weight))
        print "Round " + ` i + 1 ` + "\t" + ` tool.get_last_log(
            eu_model.test_log)['performance'] `
        if tool.judge_stop(eu_model.test_log):
            break
    if (len(recent_eu_weight) == 3):
        eu_model.weight = recent_eu_weight[0]
    print "Joint train done."

    fo = open("../output/" + log_file, 'w')

    print "Begin log ..."
    header = "camp_id\tmodel\tdataset\trevenue\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\teu_scale\tnds_ratio"
    best_test_log = eu_model.get_best_test_log()
    best_test_line = `config.campaign_id` + "\t" + "PRECTR_JOINT\ttest\t" \
                        + tool.gen_performance_line(best_test_log) + "\t" \
                        + `config.laplace` + "\t" + "None" + "\t" + `config.eu_scale` + "\t" + `config.ds_ratio` + "\t" \
                        + `config.market_alpha` + "\t" + `config.market_lambda`
    fo.write(header + "\n")
    fo.write(best_test_line + "\n")

    fo.write("\n")

    fo.write("Ctr Pre-train Log:")
    fo.write("Round\tTest\tctr\tcpc\tauc\trmse\tcpm\tclks\timps\tbids\n")
    for i in range(0, pre_train_round):
        test_log = eu_model.test_log[i]
        line = `i+1` + "\t" + `test_log['performance']['revenue']` \
                + "\t" + `test_log['performance']['ctr']` \
                + "\t" + `test_log['performance']['cpc']` \
                + "\t" + `test_log['performance']['auc']` \
                + "\t" + `test_log['performance']['rmse']` \
                + "\t" + `test_log['performance']['cpm']` \
                + "\t" + `test_log['performance']['clks']` \
                + "\t" + `test_log['performance']['imps']` \
                + "\t" + `test_log['performance']['bids']`
        fo.write(line + "\n")
    print "Pre-train Log done."

    fo.write("Train Log:")
    fo.write("Round\tTest\tctr\tcpc\tauc\trmse\tcpm\tclks\timps\tbids\tanlp\n")
    for i in range(0, len(all_anlp)):
        test_log = eu_model.test_log[pre_train_round + i]
        line = `i+1` + "\t" + `test_log['performance']['revenue']` \
                + "\t" + `test_log['performance']['ctr']` \
                + "\t" + `test_log['performance']['cpc']` \
                + "\t" + `test_log['performance']['auc']` \
                + "\t" + `test_log['performance']['rmse']` \
                + "\t" + `test_log['performance']['cpm']` \
                + "\t" + `test_log['performance']['clks']` \
                + "\t" + `test_log['performance']['imps']` \
                + "\t" + `test_log['performance']['bids']` \
                + "\t" + `all_anlp[i]`
        fo.write(line + "\n")
    fo.close()
    print "Train Log done."


    weight_path = `config.campaign_id` + "_" + "prectr_joint_best_weight" \
                + "_" + `config.laplace` \
                + "_" + `config.eu_scale` \
                + "_" + `config.ds_ratio` \
        + "_" + `config.market_alpha` \
        + "_" + `config.market_lambda` \
                 + ".weight"
    eu_model.output_weight(best_test_log['weight'], "../output/" + weight_path)
Exemplo n.º 3
0
def main():
    if len(sys.argv) < 3:
        print "Usage python test_sqlr.py campaign_id learn_rate (budget_prop)"
        exit(-1)
    data_folder = "../../make-ipinyou-data/"
    config.campaign_id = int(sys.argv[1])
    config.lr_alpha = float(sys.argv[2])
    if len(sys.argv) == 4:
        config.budget_prop = int(sys.argv[3])
    train_path = data_folder + ` config.campaign_id ` + "/train.yzx.txt"
    test_path = data_folder + ` config.campaign_id ` + "/test.yzx.txt"
    print "Camp_id\tlearn_alpha"
    print ` config.campaign_id ` + "\t" + ` config.lr_alpha `

    train_data = Dataset(train_path, config.campaign_id)
    train_data.shuffle()
    test_data = Dataset(test_path, config.campaign_id)
    print "Load done."

    lr_model = SqlrModel(train_data, test_data)
    print "campaign v = " + ` lr_model.camp_v `
    print "budget = " + ` lr_model.budget `

    log_file = ` config.campaign_id ` + "_sqlr_" + ` config.lr_alpha ` + "_" + ` config.budget_prop ` + ".csv"
    fo = open("../output/" + log_file, 'w')

    print "Begin training ..."
    for i in range(0, config.lr_train_round):
        lr_model.train()
        lr_model.test()
        print "Round " + ` i + 1 ` + "\t" + ` tool.get_last_log(
            lr_model.test_log)['performance'] `
        if tool.judge_stop(lr_model.test_log):
            break
    print "Train done."

    print "Begin log ..."
    header = "camp_id\tmodel\tdataset\trevenue\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\tlearn_rate\tnds_ratio"
    best_test_log = lr_model.get_best_test_log()
    best_test_line = `config.campaign_id` + "\t" + "SQ\ttest\t" \
         + tool.gen_performance_line(best_test_log) + "\t" \
         + `config.laplace` + "\t" + "None" + "\t" \
         + `config.lr_alpha` + "\t" + "None"
    fo.write(header + "\n")
    fo.write(best_test_line + "\n")

    fo.write("\n")

    fo.write("Round\tTest\tctr\tcpc\tauc\trmse\tcpm\tclks\timps\tbids\n")
    for i in range(0, len(lr_model.test_log)):
        test_log = lr_model.test_log[i]
        line = `i+1` + "\t" + `test_log['performance']['revenue']` \
          + "\t" + `test_log['performance']['ctr']` \
          + "\t" + `test_log['performance']['cpc']` \
          + "\t" + `test_log['performance']['auc']` \
          + "\t" + `test_log['performance']['rmse']` \
          + "\t" + `test_log['performance']['cpm']` \
          + "\t" + `test_log['performance']['clks']` \
          + "\t" + `test_log['performance']['imps']` \
          + "\t" + `test_log['performance']['bids']`
        fo.write(line + "\n")
    fo.close()
    print "Log done."

    # weight_path = `config.campaign_id` + "_sqlr_best_weight_" + `config.lr_alpha` + "_" + `config.budget_prop` + ".txt"
    # lr_model.output_weight(best_test_log['weight'], "../output/" + weight_path)

    weight_path = `config.campaign_id` + "_" + "sqlr_best_weight" \
       + "_" + `config.laplace` \
       + "_" + `config.eu_scale` \
       + "_" + `config.ds_ratio` \
       + ".weight"
    lr_model.output_weight(best_test_log['weight'], "../output/" + weight_path)
Exemplo n.º 4
0
def main():
    if len(sys.argv) < 5:
        print "Usage: python test_eu.py campaign_id laplace eu_scale ds_ratio"
        exit(-1)

    config.campaign_id = int(sys.argv[1]) if int(
        sys.argv[1]) in config.campaign_list else config.campaign_id
    config.laplace = int(
        sys.argv[2]) if int(sys.argv[2]) > 0 else config.laplace
    config.eu_scale = float(
        sys.argv[3]) if float(sys.argv[3]) > 0 else config.eu_scale
    config.ds_ratio = float(sys.argv[4]) if float(sys.argv[4]) > 0 else 0
    print "camp_id\tlaplace\tscale\tds_ratio"
    print ` config.campaign_id ` + "\t" + ` config.laplace ` + "\t" + ` config.eu_scale ` + "\t" + ` config.ds_ratio `

    train_path = config.data_folder + ` config.campaign_id ` + "/train.yzx.txt"
    test_path = config.data_folder + ` config.campaign_id ` + "/test.yzx.txt"
    train_data = Dataset(train_path, config.campaign_id)
    train_data.shuffle()  # make train data shuffled
    test_data = Dataset(test_path, config.campaign_id)
    if config.INTVL:
        IntervalLandscape(train_data, train_data.get_camp_id(), config.laplace,
                          3)
        IntervalLandscape(test_data, test_data.get_camp_id(), config.laplace,
                          3)
    else:
        BidLandscape(train_data, train_data.get_camp_id(), config.laplace)
        BidLandscape(test_data, test_data.get_camp_id(), config.laplace)
    print "Load done."

    # downsampling
    if config.ds_ratio > 0:
        train_data_ds = train_data.down_sampling(config.ds_ratio)
    else:
        train_data_ds = train_data
    print "Down sampled."
    print train_data_ds.get_statistics()

    eu_model = EuModel(train_data_ds, test_data)
    print "campaign v = " + ` eu_model.camp_v `

    # train
    print "Begin training ..."
    for i in range(0, config.eu_train_round):
        eu_model.train()
        eu_model.test()
        print "Round " + ` i + 1 ` + "\t" + ` tool.get_last_log(
            eu_model.test_log)['performance'] `
        if tool.judge_stop(eu_model.test_log):
            break
    print "Train done."

    # eu_2997_3_0.1_0.05.csv
    log_file = `config.campaign_id` + "_eu" \
                + "_" + `config.laplace` \
                + "_" + `config.eu_scale` \
                + "_" + `config.ds_ratio` \
                + ".csv"
    fo = open("../output/" + log_file, 'w')

    print "Begin log ..."
    header = "camp_id\tmodel\tdataset\trevenue\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\teu_scale\tnds_ratio"
    best_test_log = eu_model.get_best_test_log()
    best_test_line = `config.campaign_id` + "\t" + "EU\ttest\t" \
                        + tool.gen_performance_line(best_test_log) + "\t" \
                        + `config.laplace` + "\t" + "None" + "\t" + `config.eu_scale` + "\t" + `config.ds_ratio`
    fo.write(header + "\n")
    fo.write(best_test_line + "\n")

    fo.write("\n")

    fo.write("Round\tTest\tctr\tcpc\tauc\trmse\tcpm\tclks\timps\tbids\n")
    for i in range(0, len(eu_model.test_log)):
        test_log = eu_model.test_log[i]
        line = `i+1` + "\t" + `test_log['performance']['revenue']` \
                + "\t" + `test_log['performance']['ctr']` \
                + "\t" + `test_log['performance']['cpc']` \
                + "\t" + `test_log['performance']['auc']` \
                + "\t" + `test_log['performance']['rmse']` \
                + "\t" + `test_log['performance']['cpm']` \
                + "\t" + `test_log['performance']['clks']` \
                + "\t" + `test_log['performance']['imps']` \
                + "\t" + `test_log['performance']['bids']`
        fo.write(line + "\n")
    fo.close()
    print "Log done."

    weight_path = `config.campaign_id` + "_" + "eu_best_weight" \
                + "_" + `config.laplace` \
                + "_" + `config.eu_scale` \
                + "_" + `config.ds_ratio` \
                 + ".weight"
    eu_model.output_weight(best_test_log['weight'], "../output/" + weight_path)
Exemplo n.º 5
0
def main():
    if len(sys.argv) < 7:
        print "Usage: python test_tri.py campaign_id model_name laplace x_scale ds_ratio budget_prop train_option mkt_alpha mkt_lambda"
        exit(-1)

    config.campaign_id = int(sys.argv[1])
    model_name = sys.argv[2]
    if not model_name in config.model_list:
        print "Wrong model name."
        exit(-1)
    config.model_name = model_name
    config.laplace = int(sys.argv[3])
    config.eu_scale = float(sys.argv[4])
    config.ds_ratio = float(sys.argv[5]) if float(sys.argv[5]) > 0 else 0
    config.budget_prop = int(sys.argv[6])
    train_option = int(sys.argv[7])

    print "cam_id\tmodel\tlaplace\tscale\tds_ratio\tbudget_prop\ttrain_option\tmkt_alpha\tmkt_lambda"
    print `config.campaign_id` + "\t" + `model_name` \
 + "\t" + `config.laplace` + "\t" + `config.eu_scale` \
 + "\t" + `config.ds_ratio` + "\t" + `config.budget_prop` \
 + "\t" + `train_option` + "\t" + `config.market_alpha` + "\t" + `config.market_lambda`

    train_path = config.data_folder + ` config.campaign_id ` + "/train.yzx.txt"
    test_path = config.data_folder + ` config.campaign_id ` + "/test.yzx.txt"
    train_data = Dataset(train_path, config.campaign_id)
    train_data.shuffle()  # make train data shuffled
    test_data = Dataset(test_path, config.campaign_id)

    # downsampling
    train_data_ds = train_data.down_sampling(
        config.ds_ratio) if config.ds_ratio > 0 else train_data
    print train_data_ds.get_statistics()
    print "Down sampled."

    # eu_model mkt_model tri_model
    eu_model = EuModel(train_data_ds, test_data)
    mkt_model = LinMarket(train_data, test_data)
    tri_model = TriModel(train_data_ds, test_data, eu_model, mkt_model,
                         model_name)

    eu_model.bid_strategy = tri_model.bid_strategy
    mkt_model.set_camp_v(eu_model.camp_v)  # !!! nds camp value
    mkt_model.set_ctr_model(eu_model)
    mkt_model.set_bid_strategy(eu_model.get_bid_strategy())
    print "campaign v = " + ` eu_model.camp_v `

    # bid landscape initialization
    # pre-train
    # 1. CTR pre-train
    if train_option in {1, 3}:
        # temporarily init counting-based landscape
        BidLandscape(train_data, train_data.get_camp_id(), config.laplace)
        train_data_ds.init_landscape(train_data.get_landscape())
        eu_model.train()
        eu_model.test()
        #print "Round 0" + "\t" + `tool.get_last_log(eu_model.test_log)['performance']`
        # add back parameterized landscape

    # 2. MKT pre-train
    if train_option > 1:
        mkt_model.train()
        train_anlp = mkt_model.calc_total_anlp(train_data)
        test_anlp = mkt_model.calc_total_anlp(test_data)
        #print "Market Model pre-train ANLP train: %.3f, test: %.3f." % (train_anlp, test_anlp)

    train_data_ds.set_landscape(mkt_model)
    train_data.set_landscape(mkt_model)

    # train
    print "Begin training ..."
    config.PARAM_MARKET = True
    for i in range(0, config.em_round):
        #print "Tri Round starts:"
        recent_mkt_weight = []
        recent_ctr_weight = []
        for j in range(0, config.eu_train_round):
            mkt_model.joint_train()
            test_anlp = mkt_model.calc_total_anlp(test_data)
            print "Tri Round " + ` i + 1 ` + " Round " + ` j + 1 ` + " test_anlp: " + ` test_anlp `
            eu_model.train()
            eu_model.test()
            print "Tri Round " + ` i + 1 ` + " Round " + ` j + 1 ` + "\t" + ` tool.get_last_log(
                eu_model.test_log)['performance'] `
            if (j + 1 > 3):
                del recent_mkt_weight[0]
                del recent_ctr_weight[0]
            recent_mkt_weight.append(copy.deepcopy(mkt_model.weight))
            recent_ctr_weight.append(copy.deepcopy(eu_model.weight))
            if tool.judge_stop(eu_model.test_log):
                break
        mkt_model.weight = recent_mkt_weight[0]
        eu_model.weight = recent_ctr_weight[0]
        tri_model.train()
        print "Tri Round " + ` i + 1 ` + "\t" + ` tool.get_last_log(
            tri_model.em_log)['performance'] `
        if tool.judge_stop(tri_model.em_log):
            break
    print "Train done."

    # em_rr_2997_3_0.1_0.csv
    log_file = "tri_" + model_name \
                + "_" + `config.campaign_id` \
                + "_" + `config.budget_prop` \
                + "_" + `config.laplace` \
                + "_" + `config.eu_scale` \
                + "_" + `config.ds_ratio` \
                + ".csv"
    fo = open("../../output/Tri" + log_file, 'w')

    print "Begin log ..."
    header = "camp_id\tmodel\tdataset\trevenue\troi\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\tscale\tds_ratio\tbudget_prop\tem_round\tmu"
    best_em_log = tri_model.get_best_log(tri_model.em_log)
    best_em_line = `config.campaign_id` + "\t" + "em"+model_name + "\ttest\t" \
                    + tool.gen_performance_line(best_em_log) + "\t" \
                    + `config.laplace` + "\t" + "None" + "\t" + `config.eu_scale` + "\t" \
                    + (`config.ds_ratio` if config.ds_ratio>0 else "None") + "\t" \
                    + `config.budget_prop` +"\t" \
                    + `len(tri_model.em_log)` + "\t" + `best_em_log['mu']`

    fo.write(header + "\n")
    fo.write(best_em_line + "\n")

    fo.write("Test with Budget Constraints\n")

    # # reset mu
    # em_model.mu = best_em_log['mu']
    # em_model.bid_strategy.set_mu(em_model.mu)
    # # replay
    # fo.write("prop\trevenue\troi\tctr\tcpc\tauc\trmse\tcpm\timps\tclks\n")
    # for prop in config.budget_props:
    #     performance = em_model.replay(best_em_log['weight'], em_model.test_data, prop)
    #     fo.write(`prop`); fo.write("\t")
    #     fo.write(`performance['revenue']`); fo.write("\t")
    #     fo.write(`performance['roi']`); fo.write("\t")
    #     fo.write(`performance['ctr']`); fo.write("\t")
    #     fo.write(`performance['cpc']`); fo.write("\t")
    #     fo.write(`performance['auc']`); fo.write("\t")
    #     fo.write(`performance['rmse']`); fo.write("\t")
    #     fo.write(`performance['cpm']`); fo.write("\t")
    #     fo.write(`performance['imps']`); fo.write("\t")
    #     fo.write(`performance['clks']`); fo.write("\t")
    #     fo.write("\n")

    fo.write("\n")

    fo.write("Round\trevenue\troi\tcpc\tctr\tauc\trmse\timps\ttruncate\tmu\n")
    for i in range(0, len(tri_model.em_log)):
        tri_log = tri_model.em_log[i]
        line = `i+1` + "\t" + `tri_log['performance']['revenue']` + "\t" \
                + `tri_log['performance']['roi']` + "\t" \
                + `tri_log['performance']['cpc']` + "\t" \
                + `tri_log['performance']['ctr']` + "\t" \
                + `tri_log['performance']['auc']` + "\t" \
                + `tri_log['performance']['rmse']` + "\t" \
                + `tri_log['performance']['imps']` + "\t" \
                + `tri_log['weight'][0]` + "\t" \
  + `tri_log['mu']`
        fo.write(line + "\n")
    fo.write("\n")
    for i in range(0, len(tri_model.test_log)):
        test_log = tri_model.test_log[i]
        line = `i+1` + "\t" + `test_log['performance']['revenue']` + "\t" \
                + `test_log['performance']['roi']` + "\t" \
                + `test_log['performance']['cpc']` + "\t" \
                + `test_log['performance']['ctr']` + "\t" \
                + `test_log['performance']['auc']` + "\t" \
                + `test_log['performance']['rmse']` + "\t" \
                + `test_log['performance']['imps']` + "\t" \
                + `test_log['weight'][0]`
        if 'm' in test_log:
            line = line + "\tm"
        fo.write(line + "\n")

    fo.close()
    print "Log done."
Exemplo n.º 6
0
def main():
    if len(sys.argv) < 5:
        print "Usage: python test_stat_bin.py campaign_id laplace eu_scale ds_ratio train_option (DEBUG)"
        exit(-1)
    
    weight_path_dict = {'1458': 'campaign_1458_alpha_1e-9_beta_1e-4.txt',
                        '2259': 'campaign_2259_alpha_1e-9_beta_5e-3.txt',
                        '2261': 'campaign_2261_alpha_5e-9_beta_1e-4.txt',
                        '2821': 'campaign_2821_alpha_1e-9_beta_5e-7.txt',
                        '2997': 'campaign_2997_alpha_5e-9_beta_1e-7.txt',
                        '3358': 'campaign_3358_alpha_1e-9_beta_1e-8.txt',
                        '3386': 'campaign_3386_alpha_5e-10_beta_5e-5.txt',
                        '3427': 'campaign_3427_alpha_5e-10_beta_1e-2.txt',
                        '3476': 'campaign_3476_alpha_5e-10_beta_5e-3.txt'}

    config.campaign_id = int(sys.argv[1]) if int(sys.argv[1]) in config.campaign_list else config.campaign_id
    config.laplace = int(sys.argv[2]) if int(sys.argv[2])>0 else config.laplace
    config.eu_scale = float(sys.argv[3]) if float(sys.argv[3])>0 else config.eu_scale
    config.ds_ratio = float(sys.argv[4]) if float(sys.argv[4])>0 else 0
    if len(sys.argv) > 8:
    	config.DEBUG = bool(sys.argv[8])
    
    if len(sys.argv) > 5:
    	options = {0,1,2,3}
    	option_dict = {
    		0: 'directly joint train',
        	1: 'CTR pre-train',
        	2: 'MKT pre-train',
        	3: 'all pre-train' }
        train_option = int(sys.argv[5])
        if not train_option in options:
            print "ERROR: Train Option Error!"
            exit(-1)
        config.mkt_weight_path = '../lin_weight/' + weight_path_dict[`config.campaign_id`]
    print "camp_id\tlaplace\tscale\tds_ratio\ttrain_option\tmkt_alpha\tmkt_lambda"
    print `config.campaign_id` + "\t" \
    		+ `config.laplace` + "\t" \
    		+ `config.eu_scale` + "\t" \
    		+ `config.ds_ratio` + "\t" \
    		+ option_dict[train_option] + "\t" \
    		+ `config.mkt_weight_path`

    train_path = config.data_folder + `config.campaign_id` + "/train.yzx.txt"
    test_path = config.data_folder + `config.campaign_id` + "/test.yzx.txt"
    train_data = Dataset(train_path, config.campaign_id)
    train_data.shuffle() # make train data shuffled
    test_data = Dataset(test_path, config.campaign_id)
    print "Dataset load done."

    # downsampling
    if config.ds_ratio>0:
        train_data_ds = train_data.down_sampling(config.ds_ratio)
    else:
        train_data_ds = train_data
    print "Down sampled."
    print train_data_ds.get_statistics()

    eu_model = EuModel(train_data_ds, test_data)
    mkt_model = LinMarket(train_data, test_data)
    if mkt_model.load_weight(config.mkt_weight_path):
        test_anlp = mkt_model.calc_total_anlp(test_data)
        print "Market model weight loaded. The overall ANLP = " + `test_anlp`
    mkt_model.set_camp_v(eu_model.camp_v) # !!! nds camp value
    mkt_model.set_ctr_model(eu_model)
    mkt_model.set_bid_strategy(eu_model.get_bid_strategy())
    print "campaign v = " + `eu_model.camp_v`

    # bid landscape initialization
    # pre-train
    # 1. CTR pre-train
    if train_option in {1, 3}:
    	# temporarily init counting-based landscape
    	BidLandscape(train_data, train_data.get_camp_id(), config.laplace)
    	train_data_ds.init_landscape(train_data.get_landscape())
    	eu_model.train()
    	eu_model.test()
    	print "Round 0" + "\t" + `tool.get_last_log(eu_model.test_log)['performance']`
    	# add back parameterized landscape

    # 2. MKT pre-train
    if train_option > 1:
    	mkt_model.train()
    	train_anlp = mkt_model.calc_total_anlp(train_data)
    	test_anlp = mkt_model.calc_total_anlp(test_data)
    	print "Market Model pre-train ANLP train: %.3f, test: %.3f." % (train_anlp, test_anlp)

    train_data_ds.set_landscape(mkt_model)
    train_data.set_landscape(mkt_model)

    # # set up bid landscape model
    # mkt_model = LinMarket(train_data_ds, test_data)
    # if mkt_model.load_weight(weight_path): print "Market parameter loaded."
    # train_data_ds.init_landscape(mkt_model)
    # print mkt_model.weight[0]
    # print "Parameterized landscape initialized."

    # raw_input("pause ...")

    # train
    print "Begin training ..."
    config.PARAM_MARKET = True
    for i in range(0, config.eu_train_round):
    	# mkt_model.joint_train()
    	print "Round " + `i+1` + " test_anlp: " + `test_anlp`
        eu_model.train()
        eu_model.test()
        print "Round " + `i+1` + "\t" + `tool.get_last_log(eu_model.test_log)['performance']`
        if tool.judge_stop(eu_model.test_log):
            break;
    print "Train done."

    # eu_2997_3_0.1_0.05.csv
    log_file = `config.campaign_id` + "_eu" \
                + "_" + `config.laplace` \
                + "_" + `config.eu_scale` \
                + "_" + `config.ds_ratio` \
                + ".stat.csv"
    fo = open("../output/"+log_file, 'w')
    
    print "Begin log ..."
    header = "camp_id\tmodel\tdataset\trevenue\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\teu_scale\tnds_ratio"
    best_test_log = eu_model.get_best_test_log()
    best_test_line = `config.campaign_id` + "\t" + "EU-stat\ttest\t" \
                        + tool.gen_performance_line(best_test_log) + "\t" \
                        + `config.laplace` + "\t" + "None" + "\t" + `config.eu_scale` + "\t" + `config.ds_ratio`
    fo.write(header+"\n")
    fo.write(best_test_line+"\n")

    fo.write("\n")

    fo.write("Round\tTest\tctr\tcpc\tauc\trmse\tcpm\tclks\timps\tbids\n")
    for i in range(0, len(eu_model.test_log)):
        test_log = eu_model.test_log[i]
        line = `i+1` + "\t" + `test_log['performance']['revenue']` \
                + "\t" + `test_log['performance']['ctr']` \
                + "\t" + `test_log['performance']['cpc']` \
                + "\t" + `test_log['performance']['auc']` \
                + "\t" + `test_log['performance']['rmse']` \
                + "\t" + `test_log['performance']['cpm']` \
                + "\t" + `test_log['performance']['clks']` \
                + "\t" + `test_log['performance']['imps']` \
                + "\t" + `test_log['performance']['bids']`
        fo.write(line + "\n")
    fo.close()
    print "Log done."

    weight_path = `config.campaign_id` + "_" + "eu_best_weight" \
                + "_" + `config.laplace` \
                + "_" + `config.eu_scale` \
                + "_" + `config.ds_ratio` \
                 + ".stat_ctr.weight"
    eu_model.output_weight(best_test_log['weight'], "../output/" + weight_path)
Exemplo n.º 7
0
def main():
    if len(sys.argv) < 7:
        print "Usage: python test_em.py camp_id model_name laplace x_scale ds_ratio budget_prop"
        exit(-1)

    config.campaign_id = int(sys.argv[1])
    model_name = sys.argv[2]
    if not model_name in config.model_list:
        print "Wrong model name."
        exit(-1)
    config.model_name = model_name
    config.laplace = int(sys.argv[3])
    config.em_scale = float(sys.argv[4])
    config.ds_ratio = float(sys.argv[5]) if float(sys.argv[5]) > 0 else 0
    config.budget_prop = int(sys.argv[6])
    print "camp_id\tmodel\tlaplace\tscale\tds_ratio\tbudget_prop"
    print `config.campaign_id` + "\t" + `model_name` \
            + "\t" + `config.laplace` + "\t" + `config.em_scale` \
            + "\t" + `config.ds_ratio` + "\t" + `config.budget_prop`

    train_path = config.data_folder + ` config.campaign_id ` + "/train.yzx.txt"
    test_path = config.data_folder + ` config.campaign_id ` + "/test.yzx.txt"
    train_data = Dataset(train_path, config.campaign_id)
    train_data.shuffle()  # make train data shuffled
    test_data = Dataset(test_path, config.campaign_id)

    # no interval setting
    BidLandscape(train_data, train_data.get_camp_id(), config.laplace)
    BidLandscape(test_data, test_data.get_camp_id(), config.laplace)
    print "Load done."

    # downsampling
    train_data_ds = train_data.down_sampling(
        config.ds_ratio) if config.ds_ratio > 0 else train_data
    print train_data_ds.get_statistics()
    print "Down sampled."

    em_model = EmModel(train_data_ds, test_data, model_name)
    print "campaign v = " + ` em_model.camp_v `

    # train
    print "Begin training ..."
    for i in range(0, config.em_round):
        em_model.train()
        print "EM Round " + ` i + 1 ` + "\t" + ` tool.get_last_log(
            em_model.em_log)['performance'] `
        if tool.judge_stop(em_model.em_log):
            break
    print "Train done."

    # em_rr_2997_3_0.1_0.csv
    log_file = "em_" + model_name \
                + "_" + `config.campaign_id` \
                + "_" + `config.budget_prop` \
                + "_" + `config.laplace` \
                + "_" + `config.em_scale` \
                + "_" + `config.ds_ratio` \
                + ".csv"
    fo = open("../output/" + log_file, 'w')

    print "Begin log ..."
    header = "camp_id\tmodel\tdataset\trevenue\troi\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\tscale\tds_ratio\tbudget_prop\tem_round\tmu"
    best_em_log = em_model.get_best_log(em_model.em_log)
    best_em_line = `config.campaign_id` + "\t" + "em"+model_name + "\ttest\t" \
                    + tool.gen_performance_line(best_em_log) + "\t" \
                    + `config.laplace` + "\t" + "None" + "\t" + `config.em_scale` + "\t" \
                    + (`config.ds_ratio` if config.ds_ratio>0 else "None") + "\t" \
                    + `config.budget_prop` +"\t" \
                    + `len(em_model.em_log)` + "\t" + `best_em_log['mu']`

    fo.write(header + "\n")
    fo.write(best_em_line + "\n")

    fo.write("Test with Budget Constraints\n")

    # # reset mu
    # em_model.mu = best_em_log['mu']
    # em_model.bid_strategy.set_mu(em_model.mu)
    # # replay
    # fo.write("prop\trevenue\troi\tctr\tcpc\tauc\trmse\tcpm\timps\tclks\n")
    # for prop in config.budget_props:
    #     performance = em_model.replay(best_em_log['weight'], em_model.test_data, prop)
    #     fo.write(`prop`); fo.write("\t")
    #     fo.write(`performance['revenue']`); fo.write("\t")
    #     fo.write(`performance['roi']`); fo.write("\t")
    #     fo.write(`performance['ctr']`); fo.write("\t")
    #     fo.write(`performance['cpc']`); fo.write("\t")
    #     fo.write(`performance['auc']`); fo.write("\t")
    #     fo.write(`performance['rmse']`); fo.write("\t")
    #     fo.write(`performance['cpm']`); fo.write("\t")
    #     fo.write(`performance['imps']`); fo.write("\t")
    #     fo.write(`performance['clks']`); fo.write("\t")
    #     fo.write("\n")

    fo.write("\n")

    fo.write("Round\trevenue\troi\tcpc\tctr\tauc\trmse\timps\ttruncate\tmu\n")
    for i in range(0, len(em_model.em_log)):
        em_log = em_model.em_log[i]
        line = `i+1` + "\t" + `em_log['performance']['revenue']` + "\t" \
                + `em_log['performance']['roi']` + "\t" \
                + `em_log['performance']['cpc']` + "\t" \
                + `em_log['performance']['ctr']` + "\t" \
                + `em_log['performance']['auc']` + "\t" \
                + `em_log['performance']['rmse']` + "\t" \
                + `em_log['performance']['imps']` + "\t" \
                + `em_log['weight'][0]` + "\t" \
  + `em_log['mu']`
        fo.write(line + "\n")
    fo.write("\n")
    for i in range(0, len(em_model.test_log)):
        test_log = em_model.test_log[i]
        line = `i+1` + "\t" + `test_log['performance']['revenue']` + "\t" \
                + `test_log['performance']['roi']` + "\t" \
                + `test_log['performance']['cpc']` + "\t" \
                + `test_log['performance']['ctr']` + "\t" \
                + `test_log['performance']['auc']` + "\t" \
                + `test_log['performance']['rmse']` + "\t" \
                + `test_log['performance']['imps']` + "\t" \
                + `test_log['weight'][0]`
        if 'm' in test_log:
            line = line + "\tm"
        fo.write(line + "\n")

    fo.close()
    print "Log done."