def test_import_dense_values_mat(): """ Test values after saving and loading of .sparse format. """ x = np.random.rand(3, 2) export_data('/tmp/test.mat', x) assert np.array_equal(x, import_data('/tmp/test.mat'))
def test_import_dense_type_mat(): """4 Test the type after saving and loading of .sparset1 format. """ x = np.random.rand(3, 2) export_data('/tmp/test.mat', x) assert x.dtype == import_data('/tmp/test.mat').dtype
def test_import_sparse_values_mat(): """ Test values after saving and loading of .sparse format. """ x = sps.csr_matrix(np.random.rand(3, 2)) export_data('/tmp/test.mat', x) assert np.array_equal(x.toarray(), import_data('/tmp/test.mat').toarray())
def test_import_type_sparsetxt(): """4 Test the type after saving and loading of .sparset1 format. """ x = sps.csr_matrix(np.random.rand(3, 2)) export_data('/tmp/test.sparsetxt', x) assert x.dtype == import_data('/tmp/test.sparsetxt').dtype
def test_import_type_densetxt(): """ Test the type after saving and loading of .dense format. """ x = np.random.rand(7, 11) export_data('/tmp/test.densetxt', x) assert x.dtype == import_data('/tmp/test.densetxt').dtype
def test_import_values_dense(): """ Test values after saving and loading of .dense format. """ x = np.random.rand(7, 11) export_data('/tmp/test.dense', x) assert np.array_equal(x, import_data('/tmp/test.dense'))
parser.add_argument('-item_list', default=[], nargs='+', help='One or more item datafiles to use in the split.') parser.add_argument('-user_list', default=[], nargs='+', help='List of user datafiles to use in the split.') args = parser.parse_args() # slash ambivalent if not args.readpath.endswith(slash): args.readpath += slash if not args.writepath.endswith(slash): args.writepath += slash loader.makedirs(args.writepath, cold=True) # read utility matrix and independently shuffle rows and columns ratings = loader.import_data(args.readpath + args.ratings) num_users = ratings.shape[0] num_items = ratings.shape[1] num_ratings = ratings.getnnz() item_order = range(num_items) user_order = range(num_users) random.shuffle(item_order) random.shuffle(user_order) # save shuffled orders scipy.io.savemat(args.writepath+'item_order.mat', {'data': np.array(item_order)}) scipy.io.savemat(args.writepath+'user_order.mat', {'data': np.array(user_order)}) ratings = ratings[:, item_order] ratings = ratings[user_order, :] split_size = num_ratings*0.05
else: zipcodes.append(0) loader.export_data(args.outpath + 'features_sex.index', loader.HotIndex(numpy.array(sexes), 2)) loader.export_data(args.outpath + 'features_occ.index', loader.HotIndex(numpy.array(occupations), len(occlist) -1)) loader.export_data(args.outpath + 'features_zip.index', loader.HotIndex(numpy.array(zipcodes), 1000)) loader.export_data(args.outpath + 'features_age.mat', numpy.array(ages)) # inspect processed data sex = loader.imatload(args.outpath + 'features_sex.index') print('sex:') print(sex.vec.shape, sex.dim) print(sex.vec[0:10]) print('\n') occ = loader.imatload(args.outpath + 'features_occ.index') print('occ:') print(occ.vec.shape, occ.dim) print(occ.vec[0:10]) print('\n') zip = loader.imatload(args.outpath + 'features_zip.index') print('zip:') print(zip.vec.shape, zip.dim) print(zip.vec[0:10]) print('\n') age = loader.import_data(args.outpath + 'features_age.mat') print('age:') print(age.shape) print(age[0:10])
date = line[2].split('-') month.append(monthmap[date[1]]) # year released================================== year.append(float(int(date[2]))) genres = line[5:len(line)] for i in range(len(genres)): genres[i] = float(genres[i]) genre.append(genres) # print(month_matrix.vec.shape, month_matrix.dim, genre_matrix.vec.shape, genre_matrix.dim, year_matrix.shape) loader.export_data(args.outpath + 'features_month.index', loader.HotIndex(numpy.array(month), 12)) loader.export_data(args.outpath + 'features_year.mat', numpy.array(year)) loader.export_data(args.outpath + 'features_genre.mat', numpy.array(genre)) # inspect processed data month = loader.import_data(args.outpath + 'features_month.index') print('month:') print(month.vec.shape, month.dim) print(month.vec[0:20]) print('\n') year = loader.import_data(args.outpath + 'features_year.mat') print('year:') print(year.shape) print(year[0:10]) print('\n') genre = loader.import_data(args.outpath + 'features_genre.mat') print('genre:') print(genre.shape) print(genre[0:10])
import numpy from antk.core import loader import scipy.sparse as sps from numpy.linalg import norm genres = loader.import_data( '/home/aarontuor/data/ml100k/item/features_genre.mat') dev_item = loader.import_data( '/home/aarontuor/data/ml100k/dev/features_item.index') train_item = loader.import_data( '/home/aarontuor/data/ml100k/train/features_item.index') test_item = loader.import_data( '/home/aarontuor/data/ml100k/test/features_item.index') words = loader.import_data( '/home/aarontuor/data/ml100k/item/features_bin_doc_term.mat') genre_dist = genres / (norm(genres, axis=1, keepdims=True) * norm(genres, axis=1, keepdims=True)) devgenre = genres[dev_item.vec] testgenre = genres[test_item.vec] traingenre = genres[train_item.vec] devwords = words[dev_item.vec] testwords = words[test_item.vec] trainwords = words[train_item.vec] print(devgenre.shape) print(testgenre.shape) print(traingenre.shape) print(devwords.shape) print(testwords.shape) print(trainwords.shape)