示例#1
0
def test_import_dense_values_mat():
    """
    Test values after saving and loading of .sparse format.
    """
    x = np.random.rand(3, 2)
    export_data('/tmp/test.mat', x)
    assert np.array_equal(x, import_data('/tmp/test.mat'))
示例#2
0
def test_import_dense_type_mat():
    """4
    Test the type after saving and loading of .sparset1 format.
    """
    x = np.random.rand(3, 2)
    export_data('/tmp/test.mat', x)
    assert x.dtype == import_data('/tmp/test.mat').dtype
示例#3
0
def test_import_sparse_values_mat():
    """
    Test values after saving and loading of .sparse format.
    """
    x = sps.csr_matrix(np.random.rand(3, 2))
    export_data('/tmp/test.mat', x)
    assert np.array_equal(x.toarray(), import_data('/tmp/test.mat').toarray())
示例#4
0
def test_import_type_sparsetxt():
    """4
    Test the type after saving and loading of .sparset1 format.
    """
    x = sps.csr_matrix(np.random.rand(3, 2))
    export_data('/tmp/test.sparsetxt', x)
    assert x.dtype == import_data('/tmp/test.sparsetxt').dtype
示例#5
0
def test_import_type_densetxt():
    """
    Test the type after saving and loading of .dense format.
    """
    x = np.random.rand(7, 11)
    export_data('/tmp/test.densetxt', x)
    assert x.dtype == import_data('/tmp/test.densetxt').dtype
示例#6
0
def test_import_values_dense():
    """
    Test values after saving and loading of .dense format.
    """
    x = np.random.rand(7, 11)
    export_data('/tmp/test.dense', x)
    assert np.array_equal(x, import_data('/tmp/test.dense'))
示例#7
0
parser.add_argument('-item_list', default=[], nargs='+',
                    help='One or more item datafiles to use in the split.')
parser.add_argument('-user_list', default=[], nargs='+',
                    help='List of user datafiles to use in the split.')
args = parser.parse_args()

# slash ambivalent
if not args.readpath.endswith(slash):
    args.readpath += slash
if not args.writepath.endswith(slash):
    args.writepath += slash

loader.makedirs(args.writepath, cold=True)

# read utility matrix and independently shuffle rows and columns
ratings = loader.import_data(args.readpath + args.ratings)
num_users = ratings.shape[0]
num_items = ratings.shape[1]
num_ratings = ratings.getnnz()
item_order = range(num_items)
user_order = range(num_users)
random.shuffle(item_order)
random.shuffle(user_order)

# save shuffled orders
scipy.io.savemat(args.writepath+'item_order.mat', {'data': np.array(item_order)})
scipy.io.savemat(args.writepath+'user_order.mat', {'data': np.array(user_order)})
ratings = ratings[:, item_order]
ratings = ratings[user_order, :]
split_size = num_ratings*0.05
示例#8
0
            else:
                zipcodes.append(0)

    loader.export_data(args.outpath + 'features_sex.index', loader.HotIndex(numpy.array(sexes), 2))
    loader.export_data(args.outpath + 'features_occ.index', loader.HotIndex(numpy.array(occupations), len(occlist) -1))
    loader.export_data(args.outpath + 'features_zip.index', loader.HotIndex(numpy.array(zipcodes), 1000))
    loader.export_data(args.outpath + 'features_age.mat', numpy.array(ages))

    # inspect processed data
    sex = loader.imatload(args.outpath + 'features_sex.index')
    print('sex:')
    print(sex.vec.shape, sex.dim)
    print(sex.vec[0:10])
    print('\n')

    occ = loader.imatload(args.outpath + 'features_occ.index')
    print('occ:')
    print(occ.vec.shape, occ.dim)
    print(occ.vec[0:10])
    print('\n')

    zip = loader.imatload(args.outpath + 'features_zip.index')
    print('zip:')
    print(zip.vec.shape, zip.dim)
    print(zip.vec[0:10])
    print('\n')

    age = loader.import_data(args.outpath + 'features_age.mat')
    print('age:')
    print(age.shape)
    print(age[0:10])
示例#9
0
            date = line[2].split('-')
            month.append(monthmap[date[1]])
            # year released==================================
            year.append(float(int(date[2])))
            genres = line[5:len(line)]
            for i in range(len(genres)):
                genres[i] = float(genres[i])
            genre.append(genres)

    # print(month_matrix.vec.shape, month_matrix.dim, genre_matrix.vec.shape, genre_matrix.dim, year_matrix.shape)
    loader.export_data(args.outpath + 'features_month.index', loader.HotIndex(numpy.array(month), 12))
    loader.export_data(args.outpath + 'features_year.mat', numpy.array(year))
    loader.export_data(args.outpath + 'features_genre.mat', numpy.array(genre))

    # inspect processed data
    month = loader.import_data(args.outpath + 'features_month.index')
    print('month:')
    print(month.vec.shape, month.dim)
    print(month.vec[0:20])
    print('\n')

    year = loader.import_data(args.outpath + 'features_year.mat')
    print('year:')
    print(year.shape)
    print(year[0:10])
    print('\n')

    genre = loader.import_data(args.outpath + 'features_genre.mat')
    print('genre:')
    print(genre.shape)
    print(genre[0:10])
示例#10
0
import numpy
from antk.core import loader
import scipy.sparse as sps
from numpy.linalg import norm
genres = loader.import_data(
    '/home/aarontuor/data/ml100k/item/features_genre.mat')
dev_item = loader.import_data(
    '/home/aarontuor/data/ml100k/dev/features_item.index')
train_item = loader.import_data(
    '/home/aarontuor/data/ml100k/train/features_item.index')
test_item = loader.import_data(
    '/home/aarontuor/data/ml100k/test/features_item.index')
words = loader.import_data(
    '/home/aarontuor/data/ml100k/item/features_bin_doc_term.mat')

genre_dist = genres / (norm(genres, axis=1, keepdims=True) *
                       norm(genres, axis=1, keepdims=True))
devgenre = genres[dev_item.vec]
testgenre = genres[test_item.vec]
traingenre = genres[train_item.vec]
devwords = words[dev_item.vec]
testwords = words[test_item.vec]
trainwords = words[train_item.vec]
print(devgenre.shape)
print(testgenre.shape)
print(traingenre.shape)

print(devwords.shape)
print(testwords.shape)
print(trainwords.shape)