示例#1
0
def model():
    '''computes p@k and map@k evaluation metrics and saves model'''
    sparse_item_user = load_npz(
        "/Users/maxmaiberger/Documents/board-game-recommender/import/Data/test_data_saved/sparse_item_user.npz"
    )

    train, test = train_test_split(sparse_item_user, train_percentage=0.8)

    model = implicit.als.AlternatingLeastSquares(factors=100,
                                                 regularization=0.1,
                                                 iterations=20,
                                                 calculate_training_loss=False)
    model.fit(train)

    with open(
            '/Users/maxmaiberger/Documents/board-game-recommender/import/Data/test_data_saved/model.sav',
            'wb') as pickle_out:
        pickle.dump(model, pickle_out)

    p_at_k = precision_at_k(model,
                            train_user_items=train,
                            test_user_items=test,
                            K=10)
    m_at_k = mean_average_precision_at_k(model, train, test, K=10)
    print('precision at k:', p_at_k)
    print('mean average precision at k:', m_at_k)

    return p_at_k, m_at_k
示例#2
0
def model(sparse_user_item_file_path='files/sparse_user_item.npz'):
    """Computes p@k and map@k evaluation mettrics and saves model.

    Args:
        sparse_user_item_file_path (str): file location for a scipy.sparse.csr_matrix sparse user * item matrix

    Returns:
        p_at_k (float): precision @ k recommendations, with k=10
        m_at_k (float): mean average precision @ k recommendations, with k=10
    """
    sparse_user_item = load_npz(sparse_user_item_file_path)

    train, test = train_test_split(sparse_user_item, train_percentage=0.8)

    model = implicit.als.AlternatingLeastSquares(factors=100,
                                                 regularization=0.1,
                                                 iterations=100,
                                                 calculate_training_loss=False)
    model.fit(train)

    with open('files/model.sav', 'wb') as pickle_out:
        pickle.dump(model, pickle_out)

    p_at_k = precision_at_k(model,
                            train_user_items=train,
                            test_user_items=test,
                            K=10)
    map_at_k = mean_average_precision_at_k(model, train, test, K=10)

    return p_at_k, map_at_k
def train_als(trn,tst):
    model.fit(trn,show_progress=True)  
    MAP = mean_average_precision_at_k(model, trn, tst, K=10,
                                      show_progress=True, num_threads=1)
    NDCG = ndcg_at_k(model, trn, tst, K=10,
                  show_progress=True, num_threads=1)
    print("MAP is %.4f and NDCG is %.4f: " %(MAP,NDCG))
    return model
    def evaloutput(self, K=10):
        with open('model_NR.sav', 'rb') as pickle_in:
            model = pickle.load(pickle_in)
        sparse_item_user = load_npz("sparse_user_item_NR.npz")
        train, test = train_test_split(sparse_item_user, train_percentage=0.8)
        #p_at_k = precision_at_k(model, K, train_user_items=train, test_user_items=test)
        print("test", test.shape)
        print("train", train.shape)
        p_at_k = precision_at_k(model, train, test, K)
        m_at_k = mean_average_precision_at_k(model, train, test, K)

        return p_at_k, m_at_k
示例#5
0
def model():
    '''computes p@k and map@k evaluation mettrics and saves model'''
    sparse_item_user = load_npz("sparse_item_user.npz")

    train, test = train_test_split(sparse_item_user, train_percentage=0.8)

    model = implicit.als.AlternatingLeastSquares(factors=100,
                                                 regularization=0.1,
                                                 iterations=20,
                                                 calculate_training_loss=False)
    model.fit(train)

    with open('model.sav', 'wb') as pickle_out:
        pickle.dump(model, pickle_out)

    p_at_k = precision_at_k(model,
                            train_user_items=train,
                            test_user_items=test,
                            K=10)
    m_at_k = mean_average_precision_at_k(model, train, test, K=10)

    return p_at_k, m_at_k
示例#6
0
def learningCurve(model, train, test, epochs, outFile=None,
                  k=5, showProgress=True, numThreads=12):
    # if not userIndex:
    #     userIndex = range(train.shape[0])
    prevEpoch = 0

    pAtK = []
    MAPatK = []
    NDCGatK = []
    AUCatK = []

    headers = ["epochs", f"p@{k}", f"MAP@{k}", f"NDCG@{k}", f"AUC@{k}"]
    printLog(headers, header=True, outFile=outFile)

    for epoch in epochs:
        model.iterations = epoch - prevEpoch
        if not hasattr(model, "user_vectors"):
            model.fit(train, show_progress=showProgress)
        else:
            model.fit_partial(train, show_progress=showProgress)
        pAtK.append(precision_at_k(model, train.T.tocsr(), test.T.tocsr(),
                                   K=k, show_progress=showProgress,
                                   num_threads=numThreads))
        MAPatK.append(mean_average_precision_at_k(model, train.T.tocsr(),
                                                  test.T.tocsr(), K=k,
                                                  show_progress=showProgress,
                                                  num_threads=numThreads))
        NDCGatK.append(ndcg_at_k(model, train.T.tocsr(), test.T.tocsr(),
                                 K=k, show_progress=showProgress,
                                 num_threads=numThreads))
        AUCatK.append(AUC_at_k(model, train.T.tocsr(), test.T.tocsr(),
                               K=k, show_progress=showProgress,
                               num_threads=numThreads))
        row = [epoch, pAtK[-1], MAPatK[-1], NDCGatK[-1], AUCatK[-1]]
        printLog(row, outFile=outFile)
        prevEpoch = epoch

    return model, pAtK, MAPatK, NDCGatK, AUCatK
示例#7
0
    trainTscr = train.T.tocsr()
    testTscr = test.T.tocsr()

    k = args.k

    print(f"Computing p@{k} ...", flush=True)
    t0 = time()
    pAtK = precision_at_k(model, trainTscr, testTscr, K=k,
                          show_progress=args.progressBar,
                          num_threads=args.numThreads)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"Computing MAP@{k} ...", flush=True)
    t0 = time()
    MAPatK = mean_average_precision_at_k(model, trainTscr, testTscr, K=k,
                                         show_progress=args.progressBar,
                                         num_threads=args.numThreads)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"Computing NDCG@{k} ...", flush=True)
    t0 = time()
    NDCGatK = ndcg_at_k(model, trainTscr, testTscr, K=k,
                        show_progress=args.progressBar,
                        num_threads=args.numThreads)
    AUCatK = AUC_at_k(model, trainTscr, testTscr, K=k,
                      show_progress=args.progressBar,
                      num_threads=args.numThreads)
    print(f"Δt: {time() - t0:5.1f}s")

    print(f"p@{k}: {pAtK:6.4f}, MAP@{k}: {MAPatK:6.4f}"
          f"NDCG@{k}: {NDCGatK:6.4f}, AUC@{k}: {AUCatK:6.4f}", flush=True)
示例#8
0
def run(modelName, datasetName, factorCt, k, λ, α,
        maxIters, showProgress, useGPU, threadCt):

    if modelName == 'als':
        model = getModel(modelName, volubility=2,
                         params={'factors': factorCt,
                                 'regularization': λ,
                                 'iterations': maxIters,
                                 'use_gpu': useGPU})
    else:
        model = getModel(modelName, volubility=2,
                         params={'factors': factorCt,
                                 'regularization': λ,
                                 'alpha': α,
                                 'iterations': maxIters,
                                 'use_gpu': useGPU})

    artists, users, plays = fetchDataset(datasetName, volubility=2)

    print(artists.shape, users.shape, plays.shape, flush=True)

    if issubclass(model.__class__, AlternatingLeastSquares):
        # lets weight these models by bm25weight.
        print("weighting matrix by bm25_weight")
        plays = bm25_weight(plays, K1=100, B=0.8)

        # also disable building approximate recommend index
        model.approximate_recommend = False

    # print(asctime(localtime()))
    # t0 = time()
    plays = plays.tocsr()
    # print(f"Δt: {time() - t0:5.1f}s")

    train, test = train_test_split(plays, train_percentage=0.8)

    print("Training model")
    print(asctime(localtime()), flush=True)
    t0 = time()

    model.fit(train, show_progress=showProgress)
    print(f"Δt: {time() - t0:5.1f}s", flush=True)

    trainTscr = train.T.tocsr()
    testTscr = test.T.tocsr()

    print(f"Computing p@{k} ...", flush=True)
    t0 = time()
    pAtK = precision_at_k(model, trainTscr, testTscr, K=k,
                          show_progress=showProgress,
                          num_threads=threadCt)
    ex.log_scalar(f"p@{k}", pAtK)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"Computing MAP@{k} ...", flush=True)
    t0 = time()
    MAPatK = mean_average_precision_at_k(model, trainTscr, testTscr, K=k,
                                         show_progress=showProgress,
                                         num_threads=threadCt)
    ex.log_scalar(f"MAP@{k}", MAPatK)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"Computing NDCG@{k} ...", flush=True)
    t0 = time()
    NDCGatK = ndcg_at_k(model, trainTscr, testTscr, K=k,
                        show_progress=showProgress,
                        num_threads=threadCt)
    ex.log_scalar(f"NDCG@{k}", NDCGatK)
    AUCatK = AUC_at_k(model, trainTscr, testTscr, K=k,
                      show_progress=showProgress,
                      num_threads=threadCt)
    ex.log_scalar(f"AUC@{k}", AUCatK)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"p@{k}: {pAtK:6.4f}, MAP@{k}: {MAPatK:6.4f}"
          f"NDCG@{k}: {NDCGatK:6.4f}, AUC@{k}: {AUCatK:6.4f}", flush=True)
示例#9
0
        model = get_model(model_name)
        # if we're training an ALS based model, weight input for last.fm
        # by bm25
        if issubclass(model.__class__, AlternatingLeastSquares):
            if USE_BUILTIN_DATA:
                bm25_play = bm25_weight(plays, K1=100, B=0.8)
                train_plays, test_plays = train_test_split(bm25_play)
                # lets weight these models by bm25weight.
                logging.debug("weighting matrix by bm25_weight")

                # also disable building approximate recommend index
#                model.approximate_similar_items = False
            else:
                #perform operation to approximate confidence intervals
                #paper doesn't specify an alpha value, so just guess alpha=1
                alpha = 1
                train_plays.data = 1 + np.log(alpha * train_plays.data)
                test_plays.data = 1 + np.log(alpha * test_plays.data)
        elif USE_BUILTIN_DATA:
            train_plays, test_plays = train_test_split(plays)

        model.fit(train_plays)
        MAPk = mean_average_precision_at_k(model,
                                           train_plays.transpose(),
                                           test_plays.transpose(),
                                           K=5,
                                           num_threads=0)
        MAPk_scores.append(MAPk)

        print("MAP for " + str(model_name) + " is: " + str(MAPk))