pseudo_num = 1 linucb_regret_matrix = np.zeros((loop, iteration)) lints_regret_matrix = np.zeros((loop, iteration)) giro_regret_matrix = np.zeros((loop, iteration)) lse_soft_regret_matrix = np.zeros((loop, iteration)) online_regret_matrix = np.zeros((loop, iteration)) offline_prob_matrix = np.zeros((loop, iteration)) online_prob_matrix = np.zeros((loop, iteration)) online_beta_matrix = np.zeros((loop, iteration)) user_feature = np.random.normal(size=dimension) user_feature = user_feature / np.linalg.norm(user_feature) # train model lse_soft_model = LSE_soft(dimension, iteration, item_num, user_feature, alpha, sigma, step_size_beta, step_size_gamma, weight1, beta, gamma) # lse_soft_regret_list_train, lse_soft_beta_list_train=lse_soft_model.train(train_loops, item_num) # test data item_features = np.random.multivariate_normal(mean=np.zeros(dimension), cov=np.linalg.pinv( np.identity(dimension)), size=item_num) item_features = Normalizer().fit_transform(item_features) item_features = Normalizer().fit_transform( np.random.normal(size=(item_num, dimension))) true_payoffs = np.dot(item_features, user_feature) best_arm = np.argmax(true_payoffs)
weight1 = 0.01 loop = 1 train_loops = 100 beta = 5 gamma = 0 loop_num = 10 user_feature = np.random.normal(size=dimension) user_feature = user_feature / np.linalg.norm(user_feature) beta_matrix = np.zeros((loop_num, train_loops)) regret_matrix = np.zeros((loop_num, train_loops)) for l in range(loop_num): lse_soft_model = LSE_soft(dimension, iteration, item_num, user_feature, alpha, sigma, step_size_beta, step_size_gamma, weight1, beta, gamma) lse_soft_regret_list_train, lse_soft_beta_list_train, lse_soft_prob_matrix, lse_soft_beta_gradient = lse_soft_model.train( train_loops, item_num) beta_matrix[l] = lse_soft_beta_list_train regret_matrix[l] = lse_soft_regret_list_train beta_mean = np.mean(beta_matrix, axis=0) beta_std = beta_matrix.std(0) regret_mean = np.mean(regret_matrix, axis=0) regret_std = regret_matrix.std(0) np.save( path + 'lse_soft_offline_beta_mean_item_%s_d_%s_t_%s.npy' % (item_num, dimension, phase_num), beta_mean) np.save(