def shuffle_in_unison(a, b):
    assert len(a) == len(b)
    shuffled_a = np.empty(a.shape, dtype=a.dtype)
    shuffled_b = np.empty(b.shape, dtype=b.dtype)
    permutation = np.random.permutation(len(a))
    for old_index, new_index in enumerate(permutation):
        shuffled_a[new_index] = a[old_index]
        shuffled_b[new_index] = b[old_index]
    return shuffled_a, shuffled_b


if __name__ == "__main__":
    # print(1)
    X_train, X_test, y_train, y_test = prepare_data(m1matrix_2)
    X_train, y_train = shuffle_in_unison(X_train, y_train)
    array2file(X_train, y_train, redundancy_output_dir + '\\svm\\m1_train_2.data')
    array2file(X_test, y_test, redundancy_output_dir + '\\svm\\m1_test_2.data')

    X_train, X_test, y_train, y_test = prepare_data(m2matrix_2)
    X_train, y_train = shuffle_in_unison(X_train, y_train)
    array2file(X_train, y_train, redundancy_output_dir + '\\svm\\m2_train_2.data')
    array2file(X_test, y_test, redundancy_output_dir + '\\svm\\m2_test_2.data')

    X_train, X_test, y_train, y_test = prepare_data(m4matrix_2)
    X_train, y_train = shuffle_in_unison(X_train, y_train)
    array2file(X_train, y_train, redundancy_output_dir + '\\svm\\m4_train_2.data')
    array2file(X_test, y_test, redundancy_output_dir + '\\svm\\m4_test_2.data')

    X_train, X_test, y_train, y_test = prepare_data(m8matrix_2)
    X_train, y_train = shuffle_in_unison(X_train, y_train)
    array2file(X_train, y_train, redundancy_output_dir + '\\svm\\m8_train_2.data')
示例#2
0
        abundant_id_list = np.load(os.path.join(tensorflow_data_3_dir, 'top250_movie_pos.npy'))
        X_test = result_matrix[abundant_id_list,]
        y_test = movie_rates_y[abundant_id_list,]
        X_train = np.delete(result_matrix, abundant_id_list, 0)
        y_train = np.delete(movie_rates_y, abundant_id_list, 0)
        print(X_train.shape, y_train.shape)

        array2file(X_train, y_train, SVD3_output_dir + '\\NeuMF\\NeuMF_train_{}.data'.format(n))
        array2file(X_test, y_test, SVD3_output_dir + '\\NeuMF\\NeuMF_test_{}.data'.format(n))

    print('NeuMF extension finish')
    '''

    # MLP
    matrix = np.load(os.path.join(workdir, 'SVD_3', 'baselines', 'MLP_result_mt_ori.npy'))
    movie_rates_y = np.load(os.path.join(tensorflow_data_3_dir, "movie_rates.npy"))

    for n in range(5, 25, 5):
        result_matrix, extension_matrix, sort_index_matrix = tag_extension_2(matrix, n)
        abundant_id_list = np.load(os.path.join(tensorflow_data_3_dir, 'top250_movie_pos.npy'))
        X_test = result_matrix[abundant_id_list,]
        y_test = movie_rates_y[abundant_id_list,]
        X_train = np.delete(result_matrix, abundant_id_list, 0)
        y_train = np.delete(movie_rates_y, abundant_id_list, 0)
        print(X_train.shape, y_train.shape)

        array2file(X_train, y_train, SVD3_output_dir + '\\MLP\\MLP_train_{}.data'.format(n))
        array2file(X_test, y_test, SVD3_output_dir + '\\MLP\\MLP_test_{}.data'.format(n))

    print('MLP extension finish')
示例#3
0
    all_movie_pos_file_Path = os.path.join(workdir,
                                           "all_movie_matrix_dict.json")
    all_movie_pos_file = open(all_movie_pos_file_Path,
                              encoding='utf-8',
                              mode='r')
    all_movie_pos_dict = json.load(all_movie_pos_file)

    most_popular_file_Path = os.path.join(redundancy_output_dir,
                                          "most_popular_movie.json")
    most_popular_file = open(most_popular_file_Path,
                             encoding='utf-8',
                             mode='r')
    most_popular_movie_dict = json.load(most_popular_file)
    most_popular_movie_list = most_popular_movie_dict.keys()

    X_train, X_test, y_train, y_test = prepare_data(matrix, movie_rates_y,
                                                    most_popular_movie_list,
                                                    all_movie_pos_dict)
    X_train, y_train = shuffle_in_unison(X_train, y_train)
    array2file(X_train, y_train,
               SVD_output_dir + '\\svm\\k{}_train.data'.format(k))
    array2file(X_test, y_test,
               SVD_output_dir + '\\svm\\k{}_test.data'.format(k))

    tagFile = os.path.join(redundancy_output_dir, 'tag.txt')
    tag_list = loadTagList(tagFile)
    SVD_movie_result = os.path.join(SVD_output_dir,
                                    "SVD_movie_result_{}.json".format(k))
    matrix2json(extension_matrix, all_movie_pos_dict, tag_list,
                SVD_movie_result)
示例#4
0
    y_test = np.delete(y, id_list, 0)
    print(X_train.shape, y_train.shape)
    return X_train, y_train, X_test, y_test


def shuffle_in_unison(a, b):
    assert len(a) == len(b)
    shuffled_a = np.empty(a.shape, dtype=a.dtype)
    shuffled_b = np.empty(b.shape, dtype=b.dtype)
    permutation = np.random.permutation(len(a))
    for old_index, new_index in enumerate(permutation):
        shuffled_a[new_index] = a[old_index]
        shuffled_b[new_index] = b[old_index]
    return shuffled_a, shuffled_b


if __name__ == "__main__":
    # print(1)
    # X_train, X_test, y_train, y_test = prepare_data(m1matrix_2)
    # X_train, y_train = shuffle_in_unison(X_train, y_train)
    # array2file(X_train, y_train, redundancy_output_dir + '\\svm\\m1_train_2.data')
    # array2file(X_test, y_test, redundancy_output_dir + '\\svm\\m1_test_2.data')

    print("prepare r1matrix data")
    abundant_movie_matrix = os.path.join(redundancy2_output_dir, 'r1matrix_2.npy')
    abundant_movie_rates = os.path.join(tensorflow_data_dir, 'abundant_movie_rates.npy')
    X_train, y_train, X_test, y_test = get_60_split(abundant_movie_matrix, abundant_movie_rates)
    array2file(X_train, y_train, redundancy2_output_dir + '\\svm\\m1_train_addi.data')
    array2file(X_test, y_test, redundancy2_output_dir + '\\svm\\m1_test_addi.data')