def experiments_run(): configs = extract_args_from_json() print(configs) set_seeds(configs['seed']) df_train, df_test, df_train_matrix, df_test_matrix, movies_categories, titles = split_dataset( configs) test_dataset = NoAdditionalInfoTestDataLoader(df_test, df_test_matrix) test_loader = DataLoader(test_dataset, batch_size=configs['test_batch_size'], shuffle=True, num_workers=4, drop_last=True) all_movies = np.arange(len(df_train_matrix.columns)) for slate_size in configs['slate_size']: set_seeds(configs['seed']) print(f'Test for {slate_size}') model = RandomSlateGeneration(slate_size, all_movies, configs['test_batch_size']) experiment_builder = ExperimentBuilderRandom( model, test_loader, len(df_train_matrix.columns), movies_categories, titles, configs) experiment_builder.run_experiment()
def experiments_run(): configs = extract_args_from_json() print(configs) set_seeds(configs['seed']) df_train, df_test, df_train_matrix, df_test_matrix, movies_categories = split_dataset( configs) train_dataset = PointwiseDataLoader( df_train, df_train_matrix, configs['neg_sample_per_training_example']) train_loader = DataLoader(train_dataset, batch_size=configs['train_batch_size'], shuffle=True, num_workers=4, drop_last=True) test_dataset = UserIndexTestDataLoader(df_test, df_test_matrix, df_train_matrix) test_loader = DataLoader(test_dataset, batch_size=configs['test_batch_size'], shuffle=False, num_workers=4, drop_last=False) total_movies = len(df_train_matrix.columns) total_users = len(df_train_matrix.index)
def experiments_run(): configs = extract_args_from_json() print(configs) set_seeds(configs['seed']) train_loader, test_loader, data_configs, movie_categories, titles = get_data_loaders( configs, False) if configs['diverse']: # One dims maximize utility, one dim genres maximization response_vector_dims = 2 else: response_vector_dims = 1 device = torch.device("cuda") encoder_params = Parameters(configs['enc_batch_norm'], configs['enc_dropout'], configs['enc_act']) decoder_params = Parameters(configs['dec_batch_norm'], configs['dec_dropout'], configs['dec_act']) prior_params = Parameters(configs['prior_batch_norm'], configs['prior_dropout'], configs['prior_act']) gdpp_active = False if configs['gdpp_weight'] > 0: gdpp_active = True model = ListCVAE(train_loader.dataset.number_of_movies, configs['slate_size'], response_vector_dims, configs['embed_dims'], configs['encoder_dims'], configs['latent_dims'], configs['decoder_dims'], configs['prior_dims'], device, encoder_params, decoder_params, prior_params, gdpp_active) print(model) experiment_builder = ExperimentBuilderCVAE( model, train_loader, test_loader, data_configs['number_of_movies'], movie_categories, titles, configs) if configs['load_model']: experiment_builder.run_evaluation() else: experiment_builder.run_experiment()
def experiments_run(): configs = extract_args_from_json() print(configs) set_seeds(configs['seed']) df_train, df_test, df_train_matrix, df_test_matrix, movies_categories = split_dataset( configs) train_dataset = PointwiseDataLoader( df_train, df_train_matrix, configs['neg_sample_per_training_example']) train_loader = DataLoader(train_dataset, batch_size=configs['train_batch_size'], shuffle=True, num_workers=4, drop_last=True) test_dataset = UserIndexTestDataLoader(df_test, df_test_matrix, df_train_matrix) test_loader = DataLoader(test_dataset, batch_size=configs['test_batch_size'], shuffle=False, num_workers=4, drop_last=False) total_movies = len(df_train_matrix.columns) total_users = len(df_train_matrix.index) model = GreedyMLP(total_users, total_movies, configs['hidden_layers_dims'], configs['use_bias'], configs['dropout']) print(model) experiment_builder = GreedyMLPExperimentBuilder( model, train_loader, test_loader, total_movies, configs, print_learnable_parameters=False) experiment_builder.run_experiment()
def experiments_run(): configs = extract_args_from_json() print(configs) set_seeds(configs['seed']) train_loader, test_loader, data_configs = get_data_loaders(configs, True) print('number of movies: ', train_loader.dataset.number_of_movies) response_vector_dims = 1 generator = Generator(train_loader.dataset.number_of_movies, configs['slate_size'], configs['embed_dims'], configs['noise_hidden_dims'], configs['hidden_layers_dims_gen'], response_vector_dims, configs['gen_dropout']) print(generator) discriminator = Discriminator(train_loader.dataset.number_of_movies, configs['slate_size'], configs['embed_dims'], configs['hidden_layers_dims_dis'], response_vector_dims, configs['dis_dropout']) print(discriminator) experiment_builder = FullyConnectedGANExperimentBuilder(generator, discriminator, train_loader, test_loader, configs, print_learnable_parameters=True) experiment_builder.run_experiment()
def experiments_run(): configs = extract_args_from_json() print(configs) set_seeds(configs['seed']) df_train, df_test, df_train_matrix, df_test_matrix, movies_categories, titles = split_dataset( configs) test_dataset = UserIndexTestDataLoader(df_test, df_test_matrix, df_train_matrix) test_loader = DataLoader(test_dataset, batch_size=configs['test_batch_size'], shuffle=True, num_workers=4, drop_last=False) model = implicit.als.AlternatingLeastSquares( regularization=configs['weight_decay'], iterations=50, factors=configs['embed_dims']) a = sparse.coo_matrix(df_train_matrix.to_numpy().T) temp = sparse.csr_matrix(df_train_matrix.to_numpy()) # train the model on a sparse matrix of item/user/confidence weights model.fit(a) for slate_size in configs['slate_size']: print(f'Test for {slate_size}') recommendations = model.recommend_all(temp, N=slate_size) predicted_slates = [] ground_truth_slates = [] for values in test_loader: for value in values[0]: predicted_slates.append(recommendations[int(value)]) ground_truth_slate = values[1].cpu() ground_truth_indexes = np.nonzero(ground_truth_slate) grouped_ground_truth = np.split( ground_truth_indexes[:, 1], np.cumsum( np.unique(ground_truth_indexes[:, 0], return_counts=True)[1])[:-1]) ground_truth_slates.extend(grouped_ground_truth) predicted_slates = torch.from_numpy(np.vstack(predicted_slates)) precision, hr, cc = precision_hit_coverage_ratio( predicted_slates, ground_truth_slates, movies_categories) diversity = movie_diversity(predicted_slates, len(df_train_matrix.columns)) # Count years years_dict = {} all_years = np.unique(titles) for year in all_years: years_dict[year] = 0 for predicted_slate in list(predicted_slates): for predicted_movie in predicted_slate: years_dict[titles[predicted_movie]] += 1 print(years_dict) print(precision, hr, cc) print(diversity)