def evaluate(u: np.ndarray, v: np.ndarray, test_X, mode: str): print(u.shape, v.shape, test_X.shape, mode) a = ALSSparse(u, v, test_X) if mode == 'sparse' else ALS( u, v, test_X.toarray()) mse = a.function_eval() / test_X.getnnz() print("Test set MSE:", mse) return mse
def myfunction(seed): input_dict_copy = self.input_dict.copy() input_dict_copy['seed'] = seed als = ALS(**input_dict_copy) # ** allows to pass arguments as dict als.learningManager.run_experiment() # result is a dict with keys as metric_strs and values as list of that metric per learning step result = als.learningManager.get_performance_results() return result.copy()
def main_func(m, n, k, V, USER_MATRIX_RANK, MF_RANK): # 1. Builds user matrix M = np.random.rand(m, k) U = alter_rank(M, m, k, USER_MATRIX_RANK) # 2. generate true rating matrix, with variance sigma = .1 # R = np.random.rand(m, n) * sigma + np.dot(U, V) R = np.dot(U, V) R = alter_rank(R, m, n, MF_RANK) # 3. sample some values out mask_prob = .1 mask = generate_mask(mask_prob, m, n) # 4. matrix factorization, guess actual values lamba = .5 R_hat = ALS(R, mask, k, lamba) rmse = calc_unobserved_rmse(U, V, R_hat, mask) return rmse
def run(self, n_reps, n_als_to_perform, n_als_performed, n_jobs=4): """ :param n_reps: number of repetitions of als to perform with the exact same parameters (except seed) :param n_jobs: number of cores to use :return: nothing, self.results gets new values """ self.results = [] seeds = list(range(n_reps)) inputs = seeds #inputs = tqdm(seeds) def myfunction(seed): input_dict_copy = self.input_dict.copy() input_dict_copy['seed'] = seed als = ALS(**input_dict_copy) # ** allows to pass arguments as dict als.learningManager.run_experiment() # result is a dict with keys as metric_strs and values as list of that metric per learning step result = als.learningManager.get_performance_results() return result.copy() # print(__name__) # if __name__ == 'alsRepeater': # self.results = Parallel(n_jobs=n_jobs)(delayed(myfunction)(i) for i in inputs) for i in range(n_reps): self.input_dict['seed'] = i als = ALS(**self.input_dict) # ** allows to pass arguments as dict als.learningManager.run_experiment( n_als_performed=n_als_performed, n_als_to_perform=n_als_to_perform) n_als_performed = n_als_performed + 1 # result is a dict with keys as metric_strs and values as list of that metric per learning step result = als.learningManager.get_performance_results() self.results.append(result) #if n_reps > 1: # self.save_temp_results()
def run_experiment(data: MovieLensDataset, sparse=True, grad_sensibility=1e-8, param_sensibility=1e-16, num_experiments=1, warmup=0, workers=8): date = datetime.now().strftime("%d-%m-%Y_%H-%M-%S") # try to load matrices first try: print("Loading train and test split from /tmp/..") trainX = load_matrix(f'trainX_{"sparse" if sparse else "full"}', sparse) testX = load_matrix(f'testX_{"sparse" if sparse else "full"}', sparse) except: print("Loading failed, generating train-test split now..") # %5 test size test_set_size = data.n_ratings // 20 # trainX, testX = data.train_test_split(test_set_size, workers) trainX, testX = data.train_test_split_simple(test_set_size) print(f"Saving train and test set to /tmp/ first..") save_matrix(f'trainX_{"sparse" if sparse else "full"}', trainX) save_matrix(f'testX_{"sparse" if sparse else "full"}', testX) # print(trainX.shape, testX.shape) # optional warmup for _ in range(warmup): u = init_vector(data.n_users, normalize=True) v = init_vector(data.n_movies, normalize=True) args = [u, v, trainX] als = ALSSparse(*args) if sparse else ALS(*args) u, v = als.fit(eps_g=grad_sensibility) stats = {} start = time.time() for i in range(num_experiments): u = init_vector(data.n_users, normalize=True) v = init_vector(data.n_movies, normalize=True) args = [u, v, trainX] als = ALSSparse(*args) if sparse else ALS(*args) # run Alternating Least Squares algorithm u, v = als.fit(eps_g=grad_sensibility, eps_params=param_sensibility) # average results stats = average_stats(stats, als.stats, i + 1) end = time.time() # additional context info non depending from experiment results stats['number_of_ratings'] = trainX.getnnz( ) if sparse else np.count_nonzero(trainX) stats['dataset_path'] = data.path stats['grad_sensibility'] = grad_sensibility stats['param_sensibility'] = param_sensibility stats['theta_diff_sensibility'] = 1e-10 stats['num_experiments'] = num_experiments stats['warmup_cycles'] = warmup stats['experiments_total_runtime'] = end - start stats['date'] = date stats['train_mse'] = als.function_eval() / stats['number_of_ratings'] print("Train Mean Squared error is:", stats['train_mse']) # free memory before testing del trainX del data # test on test set test_mse = evaluate(als.u, als.v, testX, "sparse" if sparse else "full") stats['test_mse'] = test_mse # save results print("Saving results..") with open(f'data/als_{"sparse" if sparse else "full"}_{date}.json', 'w') as f: json.dump(stats, f, indent=4) return als
] row = head + temp # print(head) # print(row) data.append(row) else: if len(head) > 0: head = [] head.append(int(temp[0][:-1])) f.close() return data X = load_movie_ratings() model = ALS() model.fit(X, k=3, max_iter=6) def writeto(file_name, contant): with open(file_name, "a+") as f: f.writelines(str(contant)) f.writelines("\n") def format(x): if x < 1: x = 1.0 elif x > 5: x = 5.0 else:
from aucReader import Dataloader test = 0 user = 0 als = 0 attention = 1 config = Config() if test: config.data_path = '../data/ml-1m/sample.csv' config.num_users = 50 config.train_path = '../data/ml-1m/train2' dl = Dataloader(config) if user: from userRnn import UserRNN print('user model test:%d' % test) model = UserRNN(config, dl) elif attention: from attURnn import AttUserRNN print('attention model test:%d' % test) model = AttUserRNN(config, dl) elif als: from als import ALS print('als model test:%d' % test) model = ALS(config, dl) else: from rnnRS import RnnRs print('rnn model test:%d' % test) model = RnnRs(config, dl) model.train_and_evaluate()