Пример #1
0
def lambdarank_ref(qids, y, y_pred, metric):
    n_samples = y.shape[0]
    ranknet_cost = 0
    lambdarank_cost = 0
    discrete_metric = 0
    pairCount = 0
    lambdas = np.zeros(qids.shape)
    metric = NDCG(k=7)
    n_queries = 0
    for qid, a, b in get_groups(qids):
        (r, l, d, lambdas[a:b], _, _,
         p) = calc_lambdas(qid, y[a:b], y_pred[a:b], metric)
        ranknet_cost += r
        lambdarank_cost += l
        discrete_metric += d
        pairCount += p
        n_queries += 1

    # Scale by pairCount
    lambdarank_cost *= 100.0 / n_samples
    ranknet_cost *= 100.0 / n_samples
    discrete_metric /= n_queries
    lambdas *= 100.0 / n_samples

    print(pairCount, n_queries, ranknet_cost, lambdarank_cost, discrete_metric)
    # print(lambdas)
    return (ranknet_cost, lambdarank_cost, discrete_metric, lambdas)
def train_lambda_mart(df_train):
    x_train = df_train.drop(["target_score", "srch_id", "prop_id"], axis=1)
    y_train = df_train["target_score"]
    query_ids = df_train["srch_id"].copy()

    print("Fitting LambdaMART...")
    model = LambdaMART(metric=NDCG(len(df_train)), n_estimators=100, verbose=1)
    model.fit(x_train, y_train, query_ids)

    print_feature_importances(x_train, model)

    return model
Пример #3
0
def getmodel(grad, X_train, y_train, query_ids):
    if grad:
        model = GradientBoostingRegressor(n_estimators=100, verbose=1)
        model.fit(X_train, y_train)
    else:
        #X_train = X_train.drop(["srch_id"],axis=1)
        query_ids = query_ids.copy()
        model = LambdaMART(metric=NDCG(len(X_train)),
                           n_estimators=100,
                           verbose=1)
        model.fit(X_train, y_train, query_ids)
    return model
Пример #4
0
'LambdaMart'
from pyltr.models import LambdaMART
from pyltr.metrics import NDCG
from pyltr.models.monitors import ValidationMonitor

train_['rel'] = train_['booking_bool'] * 5 + train_[
    'click_bool']  #should have 5 as maximum
train_.loc[
    train_['rel'] == 6,
    'rel'] = 5  # 6 can only occure if click and bool are 1. Hence, cap at 5.
#add the srch_id as the index to the new train values
m2_rnn.append('srch_id')
LambdaMART_train = train_[m2_rnn].set_index('srch_id').sort_index()
qids_val = LambdaMART_train.index

metric = NDCG(k=100)
monitor = ValidationMonitor(LambdaMART_train,
                            train_['rel'],
                            qids_val.values,
                            metric=metric)
model = LambdaMART(metric=metric,
                   max_depth=10,
                   n_estimators=1000,
                   learning_rate=.1,
                   verbose=1,
                   max_features=0.5,
                   query_subsample=0.5,
                   max_leaf_nodes=10,
                   min_samples_leaf=64)
model.fit(train_[m2_rnn], train_['rel'], qids_val, monitor=monitor)
def calculate_ndcg(truth, prediction):
    print("Calculating score...")
    return NDCG(k=len(truth)).calc_mean(truth["srch_id"].values,
                                        truth["target_score"].values,
                                        prediction["result"].values)
Пример #6
0
import pandas as pd
from pyltr.models.lambdamart import LambdaMART
from pyltr.metrics import NDCG

print("Reading train_0.csv ...")
train = pd.read_csv("data/train/train_0.csv").sort_values("srch_id")
print("Finished reading")
qids = train["srch_id"].copy()
train_Y = train["target"]
train_X = train.drop([
    "target", "srch_id", "click_bool", "booking_bool", "gross_bookings_usd",
    "position"
],
                     axis=1)

model = LambdaMART(metric=NDCG(len(train)),
                   n_estimators=200,
                   min_samples_leaf=6,
                   max_depth=10,
                   max_leaf_nodes=7,
                   verbose=1)
del train
print("Fitting lambdaMART ...")
model.fit(train_X, train_Y, qids)

outfile = "data/lambdamart_EST10_SL20_D10_LN7"
print("Dumping model ...")
pickle.dump(model, open(outfile, 'wb'))
print("Model dumped")

print("Calculating feature importances ...")
Пример #7
0
# In[3]:


y_train = np.asarray(y_train)
qids_train = np.asarray(qids_train)
y_val = np.asarray(y_val)
qids_val = np.asarray(qids_val)
y_test = np.asarray(y_test)
qids_test = np.asarray(qids_test)


# In[4]:


metric = NDCG(k=40)

monitor = ValidationMonitor(x_val, y_val, qids_val, metric=metric, stop_after=50)


# In[5]:


model = LambdaMART(metric=metric, max_depth = 4, n_estimators=450, learning_rate=.04, verbose=1, max_features = 25,
                  min_samples_split = 1000, min_samples_leaf = 200, max_leaf_nodes = 20)
model.fit(x_train, y_train, qids_train, monitor=monitor)


# In[ ]: