示例#1
0
    batch_size = args.batch_size
    optimizer = 'sgd'
    use_sparse = not args.use_dense
    factor_size = args.factor_size
    dummy_iter = args.dummy_iter
    print_every = args.print_every

    momentum = 0.9
    ctx = mx.gpu(0) if args.use_gpu else mx.cpu(0)
    learning_rate = 0.1

    # prepare dataset and iterators
    max_user = MOVIELENS['max_user']
    max_movies = MOVIELENS['max_movie']
    data_dir = os.path.join(os.getcwd(), 'data')
    get_movielens_data(data_dir, MOVIELENS['dataset'])
    train_iter = get_movielens_iter(MOVIELENS['train'], batch_size, dummy_iter)
    val_iter = get_movielens_iter(MOVIELENS['val'], batch_size, dummy_iter)

    # construct the model
    net = matrix_fact_net(factor_size, factor_size, max_user, max_movies, sparse_embed=use_sparse)

    # initialize the module
    mod = mx.module.Module(symbol=net, context=ctx, data_names=['user', 'item'],
                           label_names=['score'])
    mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label)
    mod.init_params(initializer=mx.init.Xavier(factor_type="in", magnitude=2.34))
    optim = mx.optimizer.create(optimizer, learning_rate=learning_rate, momentum=momentum,
                                wd=1e-4, rescale_grad=1.0/batch_size)
    mod.init_optimizer(optimizer=optim)
    # use MSE as the metric
    model = Model(
        input=[positive_item_input, negative_item_input, user_input],
        output=loss)
    model.compile(loss=identity_loss, optimizer=Adam())

    return model


if __name__ == '__main__':

    latent_dim = 100
    num_epochs = 10

    # Read data
    train, test = data.get_movielens_data()
    num_users, num_items = train.shape

    # Prepare the test triplets
    test_uid, test_pid, test_nid = data.get_triplets(test)

    model = build_model(num_users, num_items, latent_dim)

    # Print the model structure
    print(model.summary())

    # Sanity check, should be around 0.5
    print('AUC before training %s' % metrics.full_auc(model, test))

    for epoch in range(num_epochs):
示例#3
0
    loss = model.predict({
        'user_input': user_features,
        'positive_item_input': posititve_item_features,
        'negative_item_input': negative_item_features
    })['triplet_loss']

    return (loss > 0).mean()


if __name__ == '__main__':

    num_epochs = 5

    # Read data
    train, test = data.get_movielens_data()
    num_users, num_items = train.shape

    # Prepare the test triplets
    test_uid, test_pid, test_nid = data.get_triplets(test)
    test_user_features, test_positive_item_features, test_negative_item_features = data.get_dense_triplets(
        test_uid, test_pid, test_nid, num_users, num_items)

    # Sample triplets from the training data
    uid, pid, nid = data.get_triplets(train)
    user_features, positive_item_features, negative_item_features = data.get_dense_triplets(
        uid, pid, nid, num_users, num_items)

    model = get_graph(num_users, num_items, 256)

    # Print the model structure
示例#4
0
import sys
import numpy as np

train, test = data.get_movielens_positive_data('/data/sidana/recnet_draft/' +
                                               sys.argv[1] + '/lightfm')
uid_train, pid_train, nid_train = data.get_triplets(train)
uid_test, pid_test, nid_test = data.get_triplets(test)

model = LightFM(no_components=14,
                loss='logistic',
                learning_rate=0.01,
                item_alpha=0.0001,
                user_alpha=0.0001)
model.fit(train, epochs=10, num_threads=1)

train_all, test_all = data.get_movielens_data('/data/sidana/recnet_draft/' +
                                              sys.argv[1] + '/lightfm')
uid_all_train, pid_all_train, nid_all_train = data.get_triplets(train_all)
uid_all_test, pid_all_test, nid_all_test = data.get_triplets(test_all)

export_basename = '/data/sidana/recnet_draft/' + sys.argv[
    1] + '/lightfm_all' + '/vectors/'
export_pred = open(export_basename + 'pr', 'w')
#export_pred = open(export_basename + 'pr', 'w')
export_true = open(export_basename + 'gt', 'w')
#export_true = open(export_basename + 'gt', 'w')

for u in uid_test:
    #num_items_all_test_u = test_all.getrow(u).indices.shape[0]
    #items_all_test_u = test_all.getrow(u).indices
    known_positives = test.getrow(u).indices
    scores = model.predict(u, np.unique(test_all.col))
示例#5
0
from lightfm import LightFM
import data
import sys
import numpy as np


train, test = data.get_movielens_positive_data('/data/sidana/ijcai_competetion/data_analysis/')
uid_train, pid_train, nid_train = data.get_triplets(train)
uid_test, pid_test, nid_test = data.get_triplets(test)

model = LightFM(no_components=14,loss='logistic',learning_rate=0.01, item_alpha=0.0001, user_alpha=0.0001)
model.fit(train, epochs=10, num_threads=1)

train_all, test_all = data.get_movielens_data('/data/sidana/ijcai_competetion/data_analysis/')
uid_all_train, pid_all_train, nid_all_train = data.get_triplets(train_all)
uid_all_test, pid_all_test, nid_all_test = data.get_triplets(test_all)

export_basename = '/data/sidana/ijcai_competetion/baselines/vectors/'
export_pred = open(export_basename + 'pr', 'w')
#export_pred = open(export_basename + 'pr', 'w')
export_true = open(export_basename + 'gt', 'w')
#export_true = open(export_basename + 'gt', 'w')
export_scores = open(export_basename + 'rec', 'w')

for u in uid_test:
    num_items_all_test_u = test_all.getrow(u).indices.shape[0]
    items_all_test_u = test_all.getrow(u).indices
    known_positives = test.getrow(u).indices
    scores = model.predict(u, items_all_test_u)

    #write scores for diversity metrics
示例#6
0
    num_epoch = args.num_epoch
    batch_size = args.batch_size
    optimizer = 'sgd'
    use_sparse = not args.use_dense
    factor_size = args.factor_size
    dummy_iter = args.dummy_iter
    print_every = args.print_every

    momentum = 0.9
    ctx = mx.gpu(0) if args.use_gpu else mx.cpu(0)
    learning_rate = 0.1

    # prepare dataset and iterators
    max_user = MOVIELENS['max_user']
    max_movies = MOVIELENS['max_movie']
    get_movielens_data(MOVIELENS['dataset'])
    train_iter = get_movielens_iter(MOVIELENS['train'], batch_size, dummy_iter)
    val_iter = get_movielens_iter(MOVIELENS['val'], batch_size, dummy_iter)

    # construct the model
    net = matrix_fact_net(factor_size, factor_size, max_user, max_movies, sparse_embed=use_sparse)

    # initialize the module
    mod = mx.module.Module(symbol=net, context=ctx, data_names=['user', 'item'],
                           label_names=['score'])
    mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label)
    mod.init_params(initializer=mx.init.Xavier(factor_type="in", magnitude=2.34))
    optim = mx.optimizer.create(optimizer, learning_rate=learning_rate, momentum=momentum,
                                wd=1e-4, rescale_grad=1.0/batch_size)
    mod.init_optimizer(optimizer=optim)
    # use MSE as the metric
示例#7
0
    optimizer = 'sgd'
    factor_size = args.factor_size
    log_interval = args.log_interval

    momentum = 0.9
    ctx = [mx.gpu(int(i))
           for i in args.gpus.split(',')] if args.gpus else [mx.cpu()]
    learning_rate = 0.1
    mx.random.seed(args.seed)
    np.random.seed(args.seed)

    # prepare dataset and iterators
    max_user = MOVIELENS['max_user']
    max_movies = MOVIELENS['max_movie']
    data_dir = os.path.join(os.getcwd(), 'data')
    get_movielens_data(data_dir, MOVIELENS['dataset'])
    train_iter = get_movielens_iter(MOVIELENS['train'], batch_size)
    val_iter = get_movielens_iter(MOVIELENS['val'], batch_size)

    # construct the model
    net = matrix_fact_net(factor_size,
                          factor_size,
                          max_user,
                          max_movies,
                          dense=args.dense)

    # initialize the module
    mod = mx.module.Module(net,
                           context=ctx,
                           data_names=['user', 'item'],
                           label_names=['score'])