def shuffle_in_unison(a, b): assert len(a) == len(b) shuffled_a = np.empty(a.shape, dtype=a.dtype) shuffled_b = np.empty(b.shape, dtype=b.dtype) permutation = np.random.permutation(len(a)) for old_index, new_index in enumerate(permutation): shuffled_a[new_index] = a[old_index] shuffled_b[new_index] = b[old_index] return shuffled_a, shuffled_b if __name__ == "__main__": # print(1) X_train, X_test, y_train, y_test = prepare_data(m1matrix_2) X_train, y_train = shuffle_in_unison(X_train, y_train) array2file(X_train, y_train, redundancy_output_dir + '\\svm\\m1_train_2.data') array2file(X_test, y_test, redundancy_output_dir + '\\svm\\m1_test_2.data') X_train, X_test, y_train, y_test = prepare_data(m2matrix_2) X_train, y_train = shuffle_in_unison(X_train, y_train) array2file(X_train, y_train, redundancy_output_dir + '\\svm\\m2_train_2.data') array2file(X_test, y_test, redundancy_output_dir + '\\svm\\m2_test_2.data') X_train, X_test, y_train, y_test = prepare_data(m4matrix_2) X_train, y_train = shuffle_in_unison(X_train, y_train) array2file(X_train, y_train, redundancy_output_dir + '\\svm\\m4_train_2.data') array2file(X_test, y_test, redundancy_output_dir + '\\svm\\m4_test_2.data') X_train, X_test, y_train, y_test = prepare_data(m8matrix_2) X_train, y_train = shuffle_in_unison(X_train, y_train) array2file(X_train, y_train, redundancy_output_dir + '\\svm\\m8_train_2.data')
abundant_id_list = np.load(os.path.join(tensorflow_data_3_dir, 'top250_movie_pos.npy')) X_test = result_matrix[abundant_id_list,] y_test = movie_rates_y[abundant_id_list,] X_train = np.delete(result_matrix, abundant_id_list, 0) y_train = np.delete(movie_rates_y, abundant_id_list, 0) print(X_train.shape, y_train.shape) array2file(X_train, y_train, SVD3_output_dir + '\\NeuMF\\NeuMF_train_{}.data'.format(n)) array2file(X_test, y_test, SVD3_output_dir + '\\NeuMF\\NeuMF_test_{}.data'.format(n)) print('NeuMF extension finish') ''' # MLP matrix = np.load(os.path.join(workdir, 'SVD_3', 'baselines', 'MLP_result_mt_ori.npy')) movie_rates_y = np.load(os.path.join(tensorflow_data_3_dir, "movie_rates.npy")) for n in range(5, 25, 5): result_matrix, extension_matrix, sort_index_matrix = tag_extension_2(matrix, n) abundant_id_list = np.load(os.path.join(tensorflow_data_3_dir, 'top250_movie_pos.npy')) X_test = result_matrix[abundant_id_list,] y_test = movie_rates_y[abundant_id_list,] X_train = np.delete(result_matrix, abundant_id_list, 0) y_train = np.delete(movie_rates_y, abundant_id_list, 0) print(X_train.shape, y_train.shape) array2file(X_train, y_train, SVD3_output_dir + '\\MLP\\MLP_train_{}.data'.format(n)) array2file(X_test, y_test, SVD3_output_dir + '\\MLP\\MLP_test_{}.data'.format(n)) print('MLP extension finish')
all_movie_pos_file_Path = os.path.join(workdir, "all_movie_matrix_dict.json") all_movie_pos_file = open(all_movie_pos_file_Path, encoding='utf-8', mode='r') all_movie_pos_dict = json.load(all_movie_pos_file) most_popular_file_Path = os.path.join(redundancy_output_dir, "most_popular_movie.json") most_popular_file = open(most_popular_file_Path, encoding='utf-8', mode='r') most_popular_movie_dict = json.load(most_popular_file) most_popular_movie_list = most_popular_movie_dict.keys() X_train, X_test, y_train, y_test = prepare_data(matrix, movie_rates_y, most_popular_movie_list, all_movie_pos_dict) X_train, y_train = shuffle_in_unison(X_train, y_train) array2file(X_train, y_train, SVD_output_dir + '\\svm\\k{}_train.data'.format(k)) array2file(X_test, y_test, SVD_output_dir + '\\svm\\k{}_test.data'.format(k)) tagFile = os.path.join(redundancy_output_dir, 'tag.txt') tag_list = loadTagList(tagFile) SVD_movie_result = os.path.join(SVD_output_dir, "SVD_movie_result_{}.json".format(k)) matrix2json(extension_matrix, all_movie_pos_dict, tag_list, SVD_movie_result)
y_test = np.delete(y, id_list, 0) print(X_train.shape, y_train.shape) return X_train, y_train, X_test, y_test def shuffle_in_unison(a, b): assert len(a) == len(b) shuffled_a = np.empty(a.shape, dtype=a.dtype) shuffled_b = np.empty(b.shape, dtype=b.dtype) permutation = np.random.permutation(len(a)) for old_index, new_index in enumerate(permutation): shuffled_a[new_index] = a[old_index] shuffled_b[new_index] = b[old_index] return shuffled_a, shuffled_b if __name__ == "__main__": # print(1) # X_train, X_test, y_train, y_test = prepare_data(m1matrix_2) # X_train, y_train = shuffle_in_unison(X_train, y_train) # array2file(X_train, y_train, redundancy_output_dir + '\\svm\\m1_train_2.data') # array2file(X_test, y_test, redundancy_output_dir + '\\svm\\m1_test_2.data') print("prepare r1matrix data") abundant_movie_matrix = os.path.join(redundancy2_output_dir, 'r1matrix_2.npy') abundant_movie_rates = os.path.join(tensorflow_data_dir, 'abundant_movie_rates.npy') X_train, y_train, X_test, y_test = get_60_split(abundant_movie_matrix, abundant_movie_rates) array2file(X_train, y_train, redundancy2_output_dir + '\\svm\\m1_train_addi.data') array2file(X_test, y_test, redundancy2_output_dir + '\\svm\\m1_test_addi.data')