Exemplo n.º 1
0
def worker(fold, n_users, n_items, dataset_dir):
    traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt'
    trasR = loadSparseR(n_users, n_items, traFilePath)

    print(
        dataset_dir.split('/')[-2] + ':', trasR.shape, trasR.nnz,
        '%.2f' % (trasR.nnz / float(trasR.shape[0])))

    tra_tuple = np.array([(user, item, trasR[user, item])
                          for user, item in np.asarray(trasR.nonzero()).T
                          ])  # triad

    tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt'
    tstsR = loadSparseR(n_users, n_items, tstFilePath)
    tst_tuple = np.array([(user, item, tstsR[user, item])
                          for user, item in np.asarray(tstsR.nonzero()).T
                          ])  # triad

    sampler = Sampler(trasR=trasR, negRatio=.0, batch_size=batch_size)
    mf = MF(n_users, n_items, eval_metrics, range_of_ratings, reg, n_factors,
            batch_size)
    scores = mf.train(fold + 1, tra_tuple, tst_tuple, sampler)

    print('fold=%d:' % fold,
          ','.join(['%s' % eval_metric for eval_metric in eval_metrics]), '=',
          ','.join(['%.6f' % (score) for score in scores]))

    return scores
def train():
    print('Preprocessing raw data')
    preprocessor = Preprocessor()
    preprocessor.preprocess()

    dataset = Dataset(preprocessor)

    print('Training MF')
    mf = MF(preprocessor, dataset)
    mf.train_or_load_if_exists()

    print('Building I2I')
    i2i = Item2Item(dataset)

    print('Generating candidates')
    candidate_generator = CandidateGenerator(preprocessor, dataset, mf, i2i)
    X_train, y_train, q_train, q_train_reader = candidate_generator.generate_train()
    X_val, y_val, q_val, q_val_reader = candidate_generator.generate_val()

    import pickle
    try:
        with open('puke.pkl', 'wb') as f:
            pickle.dump((X_train, y_train, q_train, q_train_reader,
                         X_val, y_val, q_val, q_val_reader), f)
    except:
        print("Couldn't save puke")

    print('Training ranker')
    ranker = Ranker()
    ranker.train(X_train, y_train, q_train, X_val, y_val, q_val)
    ranker.save()

    print('Validating ranker')
    rank_scores = ranker.rank(X_val)
    print('ndcg', dataset.validate_ndcg(y_val, q_val, q_val_reader, rank_scores))
def inference():
    preprocessor = Preprocessor(first_time=False)
    preprocessor.preprocess()
    dataset = Dataset(preprocessor)
    mf = MF(preprocessor, dataset)
    mf.load()
    i2i = Item2Item(dataset)
    candidate_generator = CandidateGenerator(preprocessor, dataset, mf, i2i)
    ranker = Ranker()
    ranker.load()

    X_submit, X_article_nums, q_submit, q_reader = candidate_generator.generate_submit()
    try:
        with open('submit_puke.pkl', 'wb') as f:
            pickle.dump((X_submit, X_article_nums, q_submit, q_reader), f)
    except:
        print("Couldn't save submit_puke")

    # X_submit, X_article_nums, q_submit, q_reader = pickle.load(open('submit_puke.pkl', 'rb'))

    rank_scores = ranker.rank(X_submit)
    base = 0
    entire_articles = []
    not_heavy_items = set(range(1, article_count+1)) - set(preprocessor.heavy_items)
    not_heavy_items = sorted(not_heavy_items)
    cut = 50

    random.seed(0)
    with result_path.open('w') as fout:
        for group_size, reader in tqdm(zip(q_submit, q_reader), total=len(q_submit)):
            articles = X_article_nums[base:base+group_size]
            scores = rank_scores[base:base+group_size]

            articles = [a for _, a in sorted(zip(scores, articles), key=lambda x: x[0], reverse=True)]
            articles = articles[:cut]
            from_followable = candidate_generator.get_readers_followable_articles(reader)
            # from_keywords = candidate_generator.get_readers_keyword_articles(reader)
            for item in from_followable:
                if len(articles) >= cut + 15:
                    break
                if item in articles:
                    continue
                articles.append(item)
            while len(articles) < 100:
                item = random.choice(not_heavy_items)
                if item not in articles:
                    articles.append(item)
            entire_articles.extend(articles)

            reader_str = preprocessor.num2reader[reader]
            article_strs = map(preprocessor.num2article.get, articles)

            fout.write('%s %s\n' % (reader_str, ' '.join(article_strs)))

            base += group_size
    print('Entropy of candidates = ', entropy(entire_articles))
Exemplo n.º 4
0
def train(reg):
    logdir = 'logs/mf/numpy'
    if not os.path.exists(logdir):
        os.makedirs(logdir)

    print("Loading data...")
    movie_titles, ratings, rating_indices, n_users, n_items = get_netflix_data(n_samples=1000000)
    print("number of users with ratings: {}".format(len(np.unique(rating_indices[:,0]))))
    print("number of movies with ratings: {}".format(len(np.unique(rating_indices[:,1]))))

    method = 'als'
    if reg:
        print("Performing cross validation with reg: {}.".format(reg))
    else:
        print("Finding optimal regularization penalty.")
    reg_vals = [0.01, 0.1, 1, 10]
    best_reg = 0
    mean_loss = 0.0
    n_splits = 5
    n_features = 15
    loss_path = np.zeros((len(reg_vals), n_splits))
    kf = KFold(n_splits=n_splits, shuffle=True)
    kf.get_n_splits(rating_indices)
    for k, (train_index, test_index) in enumerate(kf.split(rating_indices)):
        print("Fold {}".format(k))
        train_indices, test_indices = rating_indices[train_index], rating_indices[test_index]
        train_indices = (train_indices[:,0], train_indices[:,1], train_indices[:,2])
        test_indices = (test_indices[:,0], test_indices[:,1], test_indices[:,2])
        if reg:
            start = time.time() 
            model = MF(n_users, n_items, n_features, method=method)
            model.fit(train_indices, verbose=1) 
            acc, loss = model.predict(test_indices)
            print("val_loss: {:.4f} - val_acc: {:.4f}".format(loss, acc))
            mean_loss = (mean_loss*k + loss) / (k+1)
        else:
            for i, reg in enumerate(reg_vals):
                print("lambda: {}".format(reg))
                start = time.time()
                model = MF(n_users, n_items, n_features, method=method)
                model.fit(train_indices, verbose=1)
                acc, loss = model.predict(test_indices)
                print("val_loss: {:.4f} - val_acc: {:.4f}".format(loss, acc))
                loss_path[i, k] = loss
    if reg:
        print("mean loss: {:.4f}".format(mean_loss))
    else:
        loss_means = np.mean(loss_path, axis=1)
        print(loss_means)
        best_reg = reg_vals[np.argmin(loss_means)]
        best_loss = np.amin(loss_means)
        print("best lambda: {} - loss: {}".format(best_reg, best_loss))
    print("Successfully finished training MF. See logs directory.")
Exemplo n.º 5
0
def train_and_save(data, save_to='db'):
    start = time.time()
    print("> Training the NMF model over", data.shape, "items")
    mf = MF(data, K=20, alpha=0.001, beta=0.01, iterations=800)
    mf.train()
    saved_model = mf.full_matrix()
    end = time.time()
    print("> Elapsed Time to Train = ", end - start)
    if save_to == 'pickle':
        np.save('NMF', saved_model)
    if save_to == 'db':
        savetodb(saved_model)
    return 0
Exemplo n.º 6
0
def matrix_factorization():
    prefix = 'Data/'

    # ------------------------------- Learning ------------------------------- #
    # Load training data
    training_user_movie_pairs = base.load_from_csv(
        os.path.join(prefix, 'data_train.csv'))
    training_labels = base.load_from_csv(
        os.path.join(prefix, 'output_train.csv'))

    # Concatenating data
    user_movie_rating_triplets = np.hstack(
        (training_user_movie_pairs, training_labels.reshape((-1, 1))))

    # Build the learning matrix
    rating_matrix = base.build_rating_matrix(user_movie_rating_triplets)

    # Build the model
    model = MF(rating_matrix, K=30, alpha=1e-5, beta=0.02, iterations=2000)
    with base.measure_time('Training'):
        print('Training matrix factorization...')
        model.train()

    # Save the predicted matrix
    predicted_matrix = np.matrix(model.full_matrix())
    with open('predicted_matrix.txt', 'wb') as f:
        for line in predicted_matrix:
            np.savetxt(f, line, fmt='%.5f')

    # -----------------------Submission: Running model on provided test_set---------------------------- #
    df = pd.read_csv("Data/data_test.csv")
    R = pd.read_csv('predicted_matrix.txt', sep=" ", header=None)
    R = R.values
    users = df['user_id'].values
    movies = df['movie_id'].values
    ratings = []
    for u, m in zip(users, movies):
        if (R[u - 1][m - 1] > 5.00):
            ratings.append(5.00)
        else:
            ratings.append(R[u - 1][m - 1])

    fname = base.make_submission(ratings, df.values.squeeze(),
                                 'MatrixFactorization')
    print('Submission file "{}" successfully written'.format(fname))
Exemplo n.º 7
0
    def __init__(self):

        # load data
        print 'start load data'
        data = pd.read_csv('data/movielens/user_ratedmovies.dat', delimiter='\t')
        item_info = pd.read_csv('data/movielens/movies.dat', delimiter='\t')
        self.itemid2name = dict(zip(item_info['id'].tolist(), item_info['title'].tolist()))

        N = len(set(data['userID'].tolist()))  # number of user
        M = len(set(data['movieID'].tolist()))  # number of movie

        rating_matrix = np.zeros([N, M])
        userid2index = {}
        itemid2index = {}
        userid2itemindexes = {}

        for i, row in data.iterrows():
            userid = row['userID']
            itemid = row['movieID']
            rating = row['rating']
            # print userid, itemid, rating
            if userid in userid2index:
                userindex = userid2index[userid]
                userid2itemindexes[userid].append(itemid)
            else:
                userindex = len(userid2index)
                userid2index[userid] = userindex
                userid2itemindexes[userid] = [itemid]

            if itemid in itemid2index:
                itemindex = itemid2index[itemid]
            else:
                itemindex = len(itemid2index)
                itemid2index[itemid] = itemindex

            rating_matrix[userindex, itemindex] = rating

        self.userid2itemindexes = userid2itemindexes
        self.userid2index = userid2index
        self.itemid2index = itemid2index
        self.index2userid = {y: x for x, y in userid2index.items()}
        self.index2itemid = {y: x for x, y in itemid2index.items()}

        nonzero_col, nonzero_row = rating_matrix.nonzero()
        inds = zip(nonzero_col.tolist(), nonzero_row.tolist())
        print 'finish load data'
        K = 10
        alpha = 0.0001
        lam = 0.01
        self.mf = MF(rating_matrix, inds, K, alpha, lam)
        self.is_training = False
        self.losses = []
        self.epochs = []
Exemplo n.º 8
0
def doTrain(K, alpha, beta, gamma, iterations, maxError):
    print('>>> K=' + str(K) + ', alpha=' + str(alpha) + ', beta=' + str(beta) +
          ', gamma=' + str(gamma) + ', iterations=' + str(iterations) +
          ', maxError=' + str(maxError))
    inCsv = 'ml-latest-small/ratings.csv'
    inTestCsv = 'ml-latest-small/trainRatings.csv'
    outModel = 'trainedModel.pkl'

    ratings = readCsv(inCsv)
    trainSubset = readCsv(inTestCsv)
    maxUserId, maxMovieId = getMaxIds(ratings)
    R = getRatingsMatrix(trainSubset, maxUserId, maxMovieId)
    mf = MF(R, K, alpha, beta, gamma, iterations, maxError)

    print('Training...')
    training_process = mf.train()
    print('Done. Mse = ' + str(mf.get_mse()))

    print('Serializing model to ' + outModel)
    with open(outModel, 'wb') as output:
        pickle.dump(mf, output, pickle.HIGHEST_PROTOCOL)
    print('Done serializing model to ' + outModel)
Exemplo n.º 9
0
def run(path_str, comb='', K=10):
    if path_str in ['ratings_only']:
        use_topK = False
    else:
        use_topK = True

    sim_filename = dir_ + 'sim_res/path_count/%s.res' % path_str
    if path_str == 'ratings_only':
        sim_filename = dir_ + 'ratings.txt'
    if use_topK:
        sim_filename = dir_ + 'sim_res/path_count/%s_top%s.res' % (path_str,
                                                                   topK)
    if comb:
        sim_filename = dir_ + 'sim_res/path_count/combs/%s_%s_top%s.res' % (
            path_str, comb, topK)
    start_time = time.time()
    data = np.loadtxt(sim_filename)
    uids = set(data[:, 0].flatten())
    bids = set(data[:, 1].flatten())
    uid2ind = {int(v): k for k, v in enumerate(uids)}
    ind2uid = reverse_map(uid2ind)
    bid2ind = {int(v): k for k, v in enumerate(bids)}
    ind2bid = reverse_map(bid2ind)

    data[:, 0] = [uid2ind[int(r)] for r in data[:, 0]]
    data[:, 1] = [bid2ind[int(r)] for r in data[:, 1]]

    print 'finish load data from %s, cost %.2f seconds, users: %s, items=%s' % (
        sim_filename, time.time() - start_time, len(uids), len(bids))

    eps, lamb, iters = 10, 10, 500
    print 'start generate mf features, (K, eps, reg, iters) = (%s, %s, %s, %s)' % (
        K, eps, lamb, iters)
    mf = MF(data=data,
            train_data=data,
            test_data=[],
            K=K,
            eps=eps,
            lamb=lamb,
            max_iter=iters,
            call_logger=logger)
    U, V = mf.run()
    start_time = time.time()
    wfilename = dir_ + 'mf_features/path_count/%s_user.dat' % (path_str)
    rank_dir = dir_ + 'mf_features/path_count/ranks/%s/' % K
    if K != 10 and not os.path.isdir(rank_dir):
        os.makedirs(rank_dir)

    if use_topK:
        #wfilename = dir_ + 'mf_features/path_count/%s_top%s_user.dat' % (path_str, topK)
        wfilename = dir_ + 'mf_features/path_count/%s_top%s_user.dat' % (
            path_str, topK)
    else:
        wfilename = dir_ + 'mf_features/path_count/%s_user.dat' % (path_str)

    fw = open(wfilename, 'w+')
    res = []
    for ind, fs in enumerate(U):
        row = []
        row.append(ind2uid[ind])
        row.extend(fs.flatten())
        res.append('\t'.join([str(t) for t in row]))

    fw.write('\n'.join(res))
    fw.close()
    print 'User-Features: %s saved in %s, cost %.2f seconds' % (
        U.shape, wfilename, time.time() - start_time)

    start_time = time.time()
    wfilename = dir_ + 'mf_features/path_count/%s_item.dat' % (path_str)
    if use_topK:
        #wfilename = dir_ + 'mf_features/path_count/%s_top%s_item.dat' % (path_str, topK)
        wfilename = dir_ + 'mf_features/path_count/%s_top%s_item.dat' % (
            path_str, topK)
    else:
        wfilename = dir_ + 'mf_features/path_count/%s_item.dat' % (path_str)

    fw = open(wfilename, 'w+')
    res = []
    for ind, fs in enumerate(V):
        row = []
        row.append(ind2bid[ind])
        row.extend(fs.flatten())
        res.append('\t'.join([str(t) for t in row]))

    fw.write('\n'.join(res))
    fw.close()
    print 'Item-Features: %s  saved in %s, cost %.2f seconds' % (
        V.shape, wfilename, time.time() - start_time)
Exemplo n.º 10
0
import numpy as np
from mf import MF

# A rating matrix with ratings from 5 users on 4 items
# zero entries are unknown values
R = np.array([
    [5, 3, 0, 1],
    [4, 0, 0, 1],
    [1, 1, 0, 5],
    [1, 0, 0, 4],
    [0, 1, 5, 4],
])

# Perform training and obtain the user and item matrices
mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=20)
training_process = mf.train()
print(mf.P)
print(mf.Q)
print(mf.full_matrix())
Exemplo n.º 11
0
                    sep='::', 
                    engine='python', 
                    encoding='latin-1',
                    names=['user_id', 'gender', 'age', 'occupation', 'zipcode'])
# users['age_desc'] = users['age'].apply(lambda x: AGES[x])
# users['occ_desc'] = users['occupation'].apply(lambda x: OCCUPATIONS[x])
# print (len(users), 'descriptions of', max_userid, 'users loaded.')
user_train = users.as_matrix()
all_users = user_train[:, :4]
all_users[all_users[:,1]=='M', 1] = 1
all_users[all_users[:,1]=='F', 1] = 0
print(all_users)



rs = MF(rate_train, K = 100, lam = .1, print_every = 10,learning_rate = 0.75, max_iter = 100, user_based = 1)
# print("X0:\n", rs.X)
# print("rate_test:\n", rate_test)
# in_file = open("MF.obj", "rb") # opening for [r]eading as [b]inary
# rs = pickle.load(in_file) # if you only wanted to read 512 bytes, do .read(512)
# in_file.close()
# print(type(rs))

rs.fit()

file_mf = open('MF_1m.obj', 'wb')
pickle.dump(rs, file_mf)
file_mf.close()
print(type(rs))
# print("X1:\n", rs.X)
print("utility:\n", rs.X.dot(rs.W) + rs.mu)
Exemplo n.º 12
0
R1 = np.array(acq_data)
#Set the number of values to replace. For example 20%:
prop = int(R.size * 0.2)
#Randomly choose indices of the numpy array:
i = [np.random.choice(range(R.shape[0])) for _ in range(prop)]
j = [np.random.choice(range(R.shape[1])) for _ in range(prop)]
#Change values with 0
R[i, j] = 0
print("Original:\n", R1)
print("Test Set:\n", R)
R = np.rint(R)
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(R, R1)
print("RMSE=", mse**0.5)
print("\nTraining ...\n")
mf = MF(R, K=2, alpha=0.01, beta=0.01, iterations=100)
training_process = mf.train()
L = np.rint(mf.full_matrix())
print("\nDone\n")
x = [x for x, y in training_process]
y = [y for x, y in training_process]
x = x[::10]
y = y[::10]
plt.figure(figsize=((16, 4)))
plt.plot(x, np.sqrt(y))
plt.xticks(x, x)
print("Minimizing Error on Training Set:\n")
plt.xlabel("Iterations")
plt.ylabel("Root Mean Square Error")
plt.grid(axis="y")
print("Learnt=\n", mf.full_matrix())
Exemplo n.º 13
0
    rating_matrix[userindex, itemindex] = rating

index2userid = {y: x for x, y in userid2index.items()}
index2itemid = {y: x for x, y in itemid2index.items()}

nonzero_row, nonzero_col = rating_matrix.nonzero()
inds = zip(nonzero_row.tolist(), nonzero_col.tolist())

import sys

sys.path.append('../tpmrec/')

from mf import MF

mf = MF(rating_matrix, inds, 10, 0.0001, 0.01)

mf.train(10)

for userindex in range(1000):
    userid = index2userid[userindex]
    if len(userid2itemindexes[userid]) > 20:
        continue
    pr = mf.predict()
    user_predict = pr[userindex, :]
    top_item_indexes = np.argsort(user_predict)[::-1][:10]
    print "userid = ", userid
    for itemid in userid2itemindexes[userid]:
        print itemid, itemid2name[itemid]
    print "recommend item"
    for itemindex in top_item_indexes:
Exemplo n.º 14
0
import pandas as pd
import numpy as np
from mf import MF

df_train = pd.read_csv('all/train.csv')
df_train = df_train[0:10000]
R = np.array(
    df_train.pivot(index='User', columns='Track', values='Rating').fillna(0))
d_mf = MF(R, K=20, alpha=0.001, beta=0.01, iterations=100)
training_process = d_mf.train()
print()
print("P x Q:")
print(d_mf.full_matrix())
print()
Exemplo n.º 15
0
        self.P = np.vstack([self.P, new_lf])

        print('in MF')
        print(len(self.P))

    def remove_new_user(self):
        self.P = np.delete(self.P, 168, 0)

    def predict(self):
        prediction_mat = np.matmul(self.P, self.Q.T)
        return prediction_mat


rating_data = np.load('rating_data.npy')
mf = MF(rating_data, rating_data)
_, _ = mf.train(epoch=20, verbose=False)

num_total_rest = 80

restaurants = pickle.load(open('restaurants.dict', 'rb'))


@app.route('/get_restaurants')
def get_restaurants():
    num_sample_rest = 5
    rand_ints = []
    while len(rand_ints) < 5:
        rand_int = randrange(num_total_rest)
        if not rand_int in rand_ints:
            rand_ints.append(rand_int)
Exemplo n.º 16
0
def run(path_str, K=10):
    if path_str in ['ratings_only']:
        use_topK = False
    else:
        use_topK = True

    sim_filename = os.path.join(data_dir,
                                'sim_res/path_count/%s.res' % path_str)
    if path_str == 'ratings_only':
        sim_filename = os.path.join(data_dir, 'tuples/ratings.txt')
    elif use_topK:
        sim_filename = os.path.join(
            data_dir, 'sim_res/path_count/%s_top%s.res' % (path_str, topK))

    start_time = time.time()
    data = np.loadtxt(sim_filename, dtype=np.str, delimiter="\t")
    uids = set(data[:, 0].flatten())
    bids = set(data[:, 1].flatten())
    # uid2ind = {v: k for k, v in enumerate(uids)}
    uid2ind = {int(v): k for k, v in enumerate(uids)}
    ind2uid = reverse_map(uid2ind)
    # bid2ind = {v: k for k, v in enumerate(bids)}
    bid2ind = {int(v): k for k, v in enumerate(bids)}
    ind2bid = reverse_map(bid2ind)

    data[:, 0] = [uid2ind[int(r)] for r in data[:, 0]]
    data[:, 1] = [bid2ind[int(r)] for r in data[:, 1]]

    # data[:, 0] = [uid2ind[r] for r in data[:, 0]]
    # data[:, 1] = [bid2ind[r] for r in data[:, 1]]

    print('finish load data from %s, cost %.2f seconds, users: %s, items=%s' %
          (sim_filename, time.time() - start_time, len(uids), len(bids)))
    # must convert data type to float
    data = data.astype(dtype=np.float)
    print("data shape: ", data.shape, data.dtype)

    eps, lamb, iters = 10, 10, 500
    print(
        'start generate mf features, (K, eps, reg, iters) = (%s, %s, %s, %s)' %
        (K, eps, lamb, iters))
    mf = MF(data=data,
            train_data=data,
            test_data=[],
            K=K,
            eps=eps,
            lamb=lamb,
            max_iter=iters,
            call_logger=logger)
    U, V = mf.run()

    start_time = time.time()
    wfilename = os.path.join(data_dir,
                             'mf_features/path_count/%s_user.dat' % (path_str))
    if use_topK:
        wfilename = os.path.join(
            data_dir,
            'mf_features/path_count/%s_top%s_user.dat' % (path_str, topK))

    fw = open(wfilename, 'w+')
    res = []
    for ind, fs in enumerate(U):
        row = []
        row.append(ind2uid[ind])
        row.extend(fs.flatten())
        res.append('\t'.join([str(t) for t in row]))

    fw.write('\n'.join(res))
    fw.close()
    print('User-Features: %s saved in %s, cost %.2f seconds' %
          (U.shape, wfilename, time.time() - start_time))

    start_time = time.time()
    wfilename = os.path.join(data_dir,
                             'mf_features/path_count/%s_item.dat' % (path_str))
    if use_topK:
        wfilename = os.path.join(
            data_dir,
            'mf_features/path_count/%s_top%s_item.dat' % (path_str, topK))

    fw = open(wfilename, 'w+')
    res = []
    for ind, fs in enumerate(V):
        row = []
        row.append(ind2bid[ind])
        row.extend(fs.flatten())
        res.append('\t'.join([str(t) for t in row]))

    fw.write('\n'.join(res))
    fw.close()
    print('Item-Features: %s  saved in %s, cost %.2f seconds' %
          (V.shape, wfilename, time.time() - start_time))
Exemplo n.º 17
0
    [1, 1, 0, 5],
    [1, 0, 0, 4],
    [0, 1, 5, 4],
])
#Set the number of values to replace. For example 20%:
prop = int(R.size * 0.2)
#Randomly choose indices of the numpy array:
i = [np.random.choice(range(R.shape[0])) for _ in range(prop)]
j = [np.random.choice(range(R.shape[1])) for _ in range(prop)]
#Change values with 0
R[i, j] = 0
print("Original:\n", R1)
print("Test Set:\n", R)
R = np.rint(R)
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(R, R1)
print("MSE=", mse**0.5)
print("\nTraining ...\n")
mf = MF(R, K=10000, alpha=0.01, beta=0.01, iterations=10000)
training_process = mf.train()
L = np.rint(mf.full_matrix())
print("Learnt=\n", L)
print("\nFinding Error on test set...\n")
msef = 0.0
for i1 in range(len(i)):
    for i2 in range(len(j)):
        if R1.item(i[i1], j[i2]) != 0:
            msef = msef + (R1.item((i[i1], j[i2])) - (L).item(
                (i[i1], j[i2])))**2
msef = (msef / (len(j) * len(i)))
print("RMSE f=", msef**0.5)
Exemplo n.º 18
0
import pickle

from mf import MF
from recommender.utils.dataprocess import ratings, meanRatings

userCount = ratings['userId'].max()
movieCount = ratings['movieIndex'].max() + 1

mf = MF(movieCount,
        userCount,
        meanRatings,
        alpha=0.01,
        reg=0.01,
        iterations=20,
        K=20)
mf.train(ratings)

pickle.dump(mf, open('pkl/mfModel.pkl', 'wb'))