예제 #1
0
def experiments_run():
    configs = extract_args_from_json()
    print(configs)
    set_seeds(configs['seed'])
    df_train, df_test, df_train_matrix, df_test_matrix, movies_categories, titles = split_dataset(
        configs)

    test_dataset = NoAdditionalInfoTestDataLoader(df_test, df_test_matrix)
    test_loader = DataLoader(test_dataset,
                             batch_size=configs['test_batch_size'],
                             shuffle=True,
                             num_workers=4,
                             drop_last=True)

    all_movies = np.arange(len(df_train_matrix.columns))

    for slate_size in configs['slate_size']:
        set_seeds(configs['seed'])
        print(f'Test for {slate_size}')
        model = RandomSlateGeneration(slate_size, all_movies,
                                      configs['test_batch_size'])

        experiment_builder = ExperimentBuilderRandom(
            model, test_loader, len(df_train_matrix.columns),
            movies_categories, titles, configs)
        experiment_builder.run_experiment()
예제 #2
0
def experiments_run():
    configs = extract_args_from_json()
    print(configs)
    set_seeds(configs['seed'])

    df_train, df_test, df_train_matrix, df_test_matrix, movies_categories = split_dataset(
        configs)

    train_dataset = PointwiseDataLoader(
        df_train, df_train_matrix, configs['neg_sample_per_training_example'])
    train_loader = DataLoader(train_dataset,
                              batch_size=configs['train_batch_size'],
                              shuffle=True,
                              num_workers=4,
                              drop_last=True)

    test_dataset = UserIndexTestDataLoader(df_test, df_test_matrix,
                                           df_train_matrix)
    test_loader = DataLoader(test_dataset,
                             batch_size=configs['test_batch_size'],
                             shuffle=False,
                             num_workers=4,
                             drop_last=False)

    total_movies = len(df_train_matrix.columns)
    total_users = len(df_train_matrix.index)
예제 #3
0
def experiments_run():
    configs = extract_args_from_json()
    print(configs)
    set_seeds(configs['seed'])

    train_loader, test_loader, data_configs, movie_categories, titles = get_data_loaders(
        configs, False)

    if configs['diverse']:
        # One dims maximize utility, one dim genres maximization
        response_vector_dims = 2
    else:
        response_vector_dims = 1

    device = torch.device("cuda")

    encoder_params = Parameters(configs['enc_batch_norm'],
                                configs['enc_dropout'], configs['enc_act'])
    decoder_params = Parameters(configs['dec_batch_norm'],
                                configs['dec_dropout'], configs['dec_act'])
    prior_params = Parameters(configs['prior_batch_norm'],
                              configs['prior_dropout'], configs['prior_act'])

    gdpp_active = False

    if configs['gdpp_weight'] > 0:
        gdpp_active = True

    model = ListCVAE(train_loader.dataset.number_of_movies,
                     configs['slate_size'], response_vector_dims,
                     configs['embed_dims'], configs['encoder_dims'],
                     configs['latent_dims'], configs['decoder_dims'],
                     configs['prior_dims'], device, encoder_params,
                     decoder_params, prior_params, gdpp_active)

    print(model)

    experiment_builder = ExperimentBuilderCVAE(
        model, train_loader, test_loader, data_configs['number_of_movies'],
        movie_categories, titles, configs)

    if configs['load_model']:
        experiment_builder.run_evaluation()
    else:
        experiment_builder.run_experiment()
예제 #4
0
def experiments_run():
    configs = extract_args_from_json()
    print(configs)
    set_seeds(configs['seed'])

    df_train, df_test, df_train_matrix, df_test_matrix, movies_categories = split_dataset(
        configs)

    train_dataset = PointwiseDataLoader(
        df_train, df_train_matrix, configs['neg_sample_per_training_example'])

    train_loader = DataLoader(train_dataset,
                              batch_size=configs['train_batch_size'],
                              shuffle=True,
                              num_workers=4,
                              drop_last=True)

    test_dataset = UserIndexTestDataLoader(df_test, df_test_matrix,
                                           df_train_matrix)
    test_loader = DataLoader(test_dataset,
                             batch_size=configs['test_batch_size'],
                             shuffle=False,
                             num_workers=4,
                             drop_last=False)

    total_movies = len(df_train_matrix.columns)
    total_users = len(df_train_matrix.index)

    model = GreedyMLP(total_users, total_movies, configs['hidden_layers_dims'],
                      configs['use_bias'], configs['dropout'])
    print(model)

    experiment_builder = GreedyMLPExperimentBuilder(
        model,
        train_loader,
        test_loader,
        total_movies,
        configs,
        print_learnable_parameters=False)
    experiment_builder.run_experiment()
예제 #5
0
def experiments_run():
    configs = extract_args_from_json()
    print(configs)
    set_seeds(configs['seed'])

    train_loader, test_loader, data_configs = get_data_loaders(configs, True)

    print('number of movies: ', train_loader.dataset.number_of_movies)

    response_vector_dims = 1

    generator = Generator(train_loader.dataset.number_of_movies, configs['slate_size'], configs['embed_dims'],
                          configs['noise_hidden_dims'], configs['hidden_layers_dims_gen'], response_vector_dims,
                          configs['gen_dropout'])

    print(generator)

    discriminator = Discriminator(train_loader.dataset.number_of_movies, configs['slate_size'], configs['embed_dims'],
                                  configs['hidden_layers_dims_dis'], response_vector_dims, configs['dis_dropout'])
    print(discriminator)

    experiment_builder = FullyConnectedGANExperimentBuilder(generator, discriminator, train_loader, test_loader, configs,
                                                            print_learnable_parameters=True)
    experiment_builder.run_experiment()
예제 #6
0
def experiments_run():
    configs = extract_args_from_json()
    print(configs)
    set_seeds(configs['seed'])

    df_train, df_test, df_train_matrix, df_test_matrix, movies_categories, titles = split_dataset(
        configs)

    test_dataset = UserIndexTestDataLoader(df_test, df_test_matrix,
                                           df_train_matrix)
    test_loader = DataLoader(test_dataset,
                             batch_size=configs['test_batch_size'],
                             shuffle=True,
                             num_workers=4,
                             drop_last=False)

    model = implicit.als.AlternatingLeastSquares(
        regularization=configs['weight_decay'],
        iterations=50,
        factors=configs['embed_dims'])

    a = sparse.coo_matrix(df_train_matrix.to_numpy().T)
    temp = sparse.csr_matrix(df_train_matrix.to_numpy())

    # train the model on a sparse matrix of item/user/confidence weights
    model.fit(a)

    for slate_size in configs['slate_size']:
        print(f'Test for {slate_size}')

        recommendations = model.recommend_all(temp, N=slate_size)

        predicted_slates = []
        ground_truth_slates = []

        for values in test_loader:
            for value in values[0]:
                predicted_slates.append(recommendations[int(value)])

            ground_truth_slate = values[1].cpu()
            ground_truth_indexes = np.nonzero(ground_truth_slate)
            grouped_ground_truth = np.split(
                ground_truth_indexes[:, 1],
                np.cumsum(
                    np.unique(ground_truth_indexes[:, 0],
                              return_counts=True)[1])[:-1])

            ground_truth_slates.extend(grouped_ground_truth)

        predicted_slates = torch.from_numpy(np.vstack(predicted_slates))

        precision, hr, cc = precision_hit_coverage_ratio(
            predicted_slates, ground_truth_slates, movies_categories)
        diversity = movie_diversity(predicted_slates,
                                    len(df_train_matrix.columns))

        # Count years
        years_dict = {}
        all_years = np.unique(titles)

        for year in all_years:
            years_dict[year] = 0

        for predicted_slate in list(predicted_slates):
            for predicted_movie in predicted_slate:
                years_dict[titles[predicted_movie]] += 1

        print(years_dict)

        print(precision, hr, cc)
        print(diversity)