Пример #1
0
    def gridSearchBidPrice(self, y_prob, slotprices):
        print("=== Get best bid prices")
        #avg_ctr = ClickEvaluator().compute_avgCTR(self.Y_train)
        avg_ctr = 0.00075  # use fixed ctr from full train set
        print("Train avgCTR = {}".format(avg_ctr))

        bid_estimator = BidEstimator()
        # TODO: could add option for alternate  bid strats
        best_pred_thresh, best_base_bid, perf_df = bid_estimator.gridSearch_bidPrice(y_prob, avg_ctr, slotprices,self.gold_val,bidpriceest_model='linearBidPrice')
        ipinyouWriter.ResultWriter().writeResult(re.sub('.csv','-linearBidPrice.csv',self.bids_tuning_perf_filepath), perf_df) #
        best_pred_thresh, best_base_bid, perf_df = bid_estimator.gridSearch_bidPrice(y_prob, avg_ctr, slotprices,self.gold_val,bidpriceest_model='linearBidPrice_variation')
        ipinyouWriter.ResultWriter().writeResult(re.sub('.csv','-linearBidPrice_variation.csv',self.bids_tuning_perf_filepath), perf_df)


        return best_pred_thresh,best_base_bid
Пример #2
0
def exeEnsemble_v1(trainDF, targetDF, trainPath, validationPath, targetPath, writeResult2CSV=False):
    xg_y_pred = exeXGBoostBidModel(validationData=targetDF, trainData=trainDF, writeResult2CSV=False)
    cnn_y_pred = exeCNNBidModel(validationDataPath=validationPath, trainDataPath=trainset, testDataPath=targetPath, writeResult2CSV=False)
    # fm_y_pred = exeFM_SGDBidModel(validationDataOneHot=validateDFonehot, trainDataOneHot=trainDFonehot, validationData=validateDF, writeResult2CSV=True)

    # Use XG's 0 when its threshold is below 0.75.
    y_pred = [0 if xg < 0.75 else cnn for xg, cnn in zip(xg_y_pred, cnn_y_pred)]

    # Use CNN's 1 when its threshold is above 0.2?
    prune_thresh = 0.2

    be = BidEstimator()
    bidprice = be.linearBidPrice_mConfi(y_pred, 230, 100, prune_thresh)
    # bidprice = be.linearBidPrice_variation(y_pred, 80, 0.2, slotprices=slotprices, prune_thresh=prune_thresh)
    bids = np.stack([targetDF['bidid'], bidprice], axis=1)
    bids = pd.DataFrame(bids, columns=['bidid', 'bidprice'])

    if writeResult2CSV:
        ipinyouWriter.ResultWriter().writeResult("resultEnsemble_v1.csv", bids)

    myEvaluator = Evaluator.Evaluator()
    myEvaluator.computePerformanceMetricsDF(6250*1000, bids, targetDF)

    # Force CNN result to 1 and 0 for F1 score
    y_pred = [1 if i >= prune_thresh else 0 for i in y_pred]
    ce = Evaluator.ClickEvaluator()
    ce.printClickPredictionScore(y_pred, targetDF)
Пример #3
0
def exeLogisticRegressionBidModel_v2(validationReader=None, trainReader=None, writeResult2CSV=False):
    print("============ LogisticRegressionBidModel_v2")
    trainOneHotData, trainY = trainReader.getOneHotData()
    validationOneHotData, valY = validationReader.getOneHotData(
        train_cols=trainOneHotData.columns.get_values().tolist())

    X_train = trainOneHotData
    Y_train = trainY['click']
    X_val = validationOneHotData
    Y_val = valY['click']

    lbm = LinearBidModel_v2(cBudget=110, avgCTR=0.2)
    lbm.trainModel(X_train, Y_train)
    # lbm.gridSearchandCrossValidate(X_train, Y_train)
    # print (validationReader.getDataFrame().info())
    v_df = validationReader.getDataFrame()

    y_pred, bids = lbm.getBidPrice(X_val, v_df)
    if writeResult2CSV:
        ipinyouWriter.ResultWriter().writeResult("resultLogisticRegressionBidModel.csv", bids)

    myEvaluator = Evaluator()
    myEvaluator.computePerformanceMetricsDF(6250 * 1000, bids, v_df)
    myEvaluator.printResult()

    return y_pred
Пример #4
0
def exeGaussianRandomBidModel(validationData, trainData=None, writeResult2CSV=False):
    # gaussian random Bidding Model
    randomBidModel = BidModels.GaussianRandomBidModel()

    bids = randomBidModel.getBidPrice(validationData.bidid)
    # bids = np.apply_along_axis(randomBidModel.getBidPrice, axis=1, arr=validationData.getTestData())

    if writeResult2CSV:
        ipinyouWriter.ResultWriter().writeResult("resultGaussianRandomBidModel.csv", bids)
    # myEvaluator = Evaluator.Evaluator(25000*1000, bids, validationData.getTrainData())
    # myEvaluator.computePerformanceMetrics()
    myEvaluator = Evaluator()
    myEvaluator.computePerformanceMetricsDF(6250 * 1000, bids, validationData)
    myEvaluator.printResult()
Пример #5
0
def exeUniformRandomBidModel(validationData, trainData=None, writeResult2CSV=False):
    # uniform random Bidding Model
    randomBidModel = BidModels.UniformRandomBidModel(300) #upper bound for random bidding range
    # TODO: could train this too in a range.

    bids = randomBidModel.getBidPrice(validationData.bidid)
    # bids = np.apply_along_axis(randomBidModel.getBidPrice, axis=1, arr=validationData.getTestData())

    if writeResult2CSV:
        ipinyouWriter.ResultWriter().writeResult("resultUniformRandomBidModel.csv", bids)
    # myEvaluator = Evaluator.Evaluator(25000*1000, bids, validationData.getTrainData())
    # myEvaluator.computePerformanceMetrics()
    myEvaluator = Evaluator()
    myEvaluator.computePerformanceMetricsDF(6250 * 1000, bids, validationData)
    myEvaluator.printResult()
Пример #6
0
def exeConstantBidModel(validationData, trainData=None, train=False, writeResult2CSV=False):
    # Constant Bidding Model
    constantBidModel = BidModels.ConstantBidModel(defaultbid=77)

    if train:
        constantBidModel.trainModel(trainData, searchRange=[1, 300], budget=int(6250*1000*8.88))

    bids = constantBidModel.getBidPrice(validationData.bidid)
    # bids = np.apply_along_axis(constantBidModel.getBidPrice, axis=1, arr=validationData.getTestData())

    if writeResult2CSV:
        ipinyouWriter.ResultWriter().writeResult("resultConstantBidModel.csv", bids)
    # myEvaluator = Evaluator.Evaluator(25000*1000, bids, validationData.getTrainData())
    # myEvaluator.computePerformanceMetrics()

    myEvaluator = Evaluator()
    myEvaluator.computePerformanceMetricsDF(6250 * 1000, bids, validationData)
    myEvaluator.printResult()
Пример #7
0
def exeXGBoostBidModel(validationData, trainData=None, testData=None, writeResult2CSV=False, testMode=True):
    Y_column = 'click'
    X_column = list(trainDF)
    unwanted_Column = ['click', 'bidid', 'bidprice', 'payprice', 'userid', 'IP', 'url', 'creative', 'keypage']
    [X_column.remove(i) for i in unwanted_Column]

    xgd = XGBoostBidModel(X_column, Y_column)
    xgd.trainModel(trainData)
    bids = xgd.getBidPrice(validationData)

    if writeResult2CSV:
        ipinyouWriter.ResultWriter().writeResult("resultXGBoostBidModel.csv", bids)

    if not testMode:
        myEvaluator = Evaluator()
        myEvaluator.computePerformanceMetricsDF(6250 * 1000, bids, validationData)
        myEvaluator.printResult()

    return xgd.getY_Pred(validationData),xgd.getY_Pred(testData)
Пример #8
0
def exeLogisticRegressionBidModel(validationData=None, trainData=None, writeResult2CSV=False):
    # Get regressionFormulaX
    X_column = list(trainData)
    unwanted_Column = ['click', 'bidid', 'bidprice', 'payprice', 'userid', 'IP', 'url', 'creative', 'keypage']
    [X_column.remove(i) for i in unwanted_Column]
    final_x = X_column[0]
    for i in range(1, len(X_column)):
        final_x = final_x + ' + ' + X_column[i]

    lrBidModel = LinearBidModel.LinearBidModel(regressionFormulaY='click', regressionFormulaX=final_x, cBudget=272.412385 * 1000, avgCTR=0.2, modelType='logisticregression')
    print(type(validationData))
    lrBidModel.trainModel(trainData, retrain=True, modelFile="LogisticRegression.pkl")
    # lrBidModel.gridSearchandCrossValidate(trainData.getDataFrame())

    bids = lrBidModel.getBidPrice(validationData)
    if writeResult2CSV:
        ipinyouWriter.ResultWriter().writeResult("LRbidModelresult.csv", bids)
    myEvaluator = Evaluator.Evaluator()
    myEvaluator.computePerformanceMetricsDF(6250*1000, bids, validationData)
    myEvaluator.printResult()
Пример #9
0
def exeEnsemble_Weighted(trainDF, validateDF, testDF,
                   trainPath, validationPath, testPath,
                   trainReader, validateReader, testReader,
                   writeResult2CSV=False):
    '''
    Takes the average of y_pred from all models.
    '''
    xg_val_y_pred, xg_test_y_pred = exeXGBoostBidModel(validationData=validateDF, trainData=trainDF, testData=testDF, writeResult2CSV=False)
    cnn_val_y_pred, cnn_test_y_pred = exeCNNBidModel(validationDataPath=validationPath, trainDataPath=trainPath, testDataPath=testPath, writeResult2CSV=False)
    #lr_y_pred = exeLogisticRegressionBidModel_v2(validationReader=validationReader, trainReader=trainReader, writeResult2CSV=False)
    #fm_y_pred=exeFMBidModel(trainReader=trainReader, validationReader=validateReader, testReader=testReader, writeResult2CSV=False)

    # Average them
    # y_pred = [(xg+ lr) / 2.0 for xg, lr in zip(xg_y_pred, lr_y_pred)]
    # y_pred = [(xg + cnn + lr)/3.0 for xg, cnn, lr in zip(xg_y_pred, cnn_y_pred, lr_y_pred)]
    #y_pred = [(xg*0.4 + cnn*0.4 + lr*0.05 + fm*0.15)  for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 87.80

    #This one hits 0.874 for the xg/lr/fm emsemble models, perviously 0.861 (Can't run CNN on my mac yet, got this convolution missing error)
    # y_pred = [(xg * 0.6 + lr * 0.1 + fm * 0.3) for xg, lr, fm in zip(xg_y_pred, lr_y_pred, fm_y_pred)]

    #ongmin testing
    # y_pred = [(xg * 0.5 + cnn * 0.5 + lr * 0.05 + fm * 0.15) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8760
    # y_pred = [(xg * 0.6 + cnn * 0.4 + lr * 0.00 + fm * 0.00) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8810
    # y_pred = [(xg*0.5 + cnn*0.5 + lr*0.00 + fm*0.00)  for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8797
    #y_pred = [(xg * 0.7 + cnn * 0.3 + lr * 0.00 + fm * 0.00) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)]  # AUC 0.8840
    #y_pred = [(xg * 0.8 + cnn * 0.2 + lr * 0.00 + fm * 0.00) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)]  # AUC 0.8836
    val_y_pred = [(xg * 0.7 + cnn * 0.3 ) for xg, cnn in zip(xg_val_y_pred, cnn_val_y_pred)]  # AUC 0.8840

    timestamp=str(time.strftime("%Y%m%d-%H%M%S"))

    print("XGBoost AUC:")
    ClickEvaluator().clickROC(validateDF['click'], xg_val_y_pred, imgpath="./SavedEnsembleInfo/XGBoost_AUC-" + timestamp + ".jpg")
    print("CNN AUC:")
    ClickEvaluator().clickROC(validateDF['click'], cnn_val_y_pred, imgpath="./SavedEnsembleInfo/CNN_AUC-" + timestamp + ".jpg")
    # print("Logistic AUC:")
    # ClickEvaluator().clickROC(validateDF['click'], lr_y_pred, imgpath="./SavedEnsembleInfo/LogisticR_AUC-" + timestamp + ".jpg")
    # print("FastFM AUC:")
    # ClickEvaluator().clickROC(validateDF['click'], fm_y_pred, imgpath="./SavedEnsembleInfo/FastFM_AUC-" + timestamp + ".jpg")

    print("Ensemble AUC:")
    ClickEvaluator().clickROC(validateDF['click'], val_y_pred, imgpath="./SavedEnsembleInfo/ensemble_weighted_AUC-" + timestamp + ".jpg",
                                                           showGraph=False)

    val_y_pred = np.array(val_y_pred)
    click1 = val_y_pred[validateDF.click == 1]
    n, bins, patches = ClickEvaluator().clickProbHistogram(pred_prob=click1, color='g',
                                                           title='Predicted probabilities for clicks=1',
                                                           imgpath="./SavedEnsembleInfo/ensemble_weighted-click1-" + timestamp + ".jpg",
                                                           showGraph=False)

    # click=0 prediction as click=1 probabilities
    click0 = val_y_pred[validateDF.click == 0]
    n, bins, patches = ClickEvaluator().clickProbHistogram(pred_prob=click0, color='r',
                                                           title='Predicted probabilities for clicks=0',
                                                           imgpath="./SavedEnsembleInfo/ensemble_weighted-click0-" + timestamp + ".jpg",
                                                           showGraph=False)


    ### Bid price model evaluations
    test_y_pred = [(xg * 0.7 + cnn * 0.3 ) for xg, cnn in zip(xg_test_y_pred, cnn_test_y_pred)]

    slotprices_val = validateDF['slotprice'].as_matrix().astype(int)
    slotprices_test = testDF['slotprice'].as_matrix().astype(int)

    print("=== Get best bid prices on validation set")
    #avg_ctr = ClickEvaluator().compute_avgCTR(trainDF.click)
    #TODO override with complete train set avg ctr
    avg_ctr = 0.00075
    print("Train avgCTR = {}".format(avg_ctr))

    bid_estimator = BidEstimator()
    print("== linearBidPrice")
    best_pred_thresh, best_base_bid, perf_df = bid_estimator.gridSearch_bidPrice(val_y_pred, avg_ctr, slotprices_val,
                                                                                 validateDF,
                                                                                 bidpriceest_model='linearBidPrice')
    ipinyouWriter.ResultWriter().writeResult("./SavedEnsembleInfo/ensemble_weighted-linearBidPrice-"+ timestamp +".csv",perf_df)  #
    print("= linearBidPrice estimate test bids")
    bids = bid_estimator.linearBidPrice(test_y_pred, best_base_bid, avg_ctr)
    # format bids into bidids pandas frame
    bids_df = pd.concat([testDF['bidid'], pd.DataFrame(bids, columns=['bidprice'], index=testDF['bidid'].index)],axis=1)
    ipinyouWriter.ResultWriter().writeResult("./SavedEnsembleInfo/ensemble_weighted-testbids-"+ timestamp +".csv", bids_df)