示例#1
0
def training(n_epochs=40,
             batch_size=256,
             num_neg=4,
             resample=True,
             verbose=10):
    args = parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    data = Dataset(args.path)
    idx_images, idx2item = data.item2image_u2a()
    #idx_images, idx2item = data.item2image()
    #idx_images = idx_images [:, -1:, :]
    #print idx_images[0]
    #exit(0)
    users = data.train['UserId'].unique()
    user2idx = pd.Series(data=np.arange(len(users)), index=users)
    item2idx = pd.Series(data=np.arange(len(idx2item)), index=idx2item)
    n_items, n_factors, embedding = idx_images.shape

    print n_items, len(idx2item)
    print "the shape of images is", idx_images.shape
    model = Ourmodel(n_items, len(users), n_factors, idx_images, embedding,
                     0.01)
    model.build_graph()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        print "initialized"
        if args.pretrain == True:
            print "load the pretrain model"
            saver.restore(sess,
                          "./model2/music_GCNEncoder_model.ckpt")  # usermap
            #saver.restore(sess, "./model/usermap.ckpt")
            eval(sess, model, data, item2idx, user2idx, idx2item, users, True)
            #return
        else:
            start_time = time.time()
            train_l, train_r = generate_input_fast(data.train, item2idx,
                                                   user2idx, num_neg)
            print "generating data needed for 1 epoch", time.time(
            ) - start_time, 's'
            print "training for learning usermape"
            for epoch in range(n_epochs):
                if resample == True:
                    #user_negs = data.neg_sample_pop(idx2item, pop, pop_items)
                    #user_negs = data.neg_sample(idx2item)
                    train_l, train_r = generate_input_fast(
                        data.train, item2idx, user2idx, num_neg)
                losses, lrs, l2_losss = [], [], []
                start = 0
                while start < train_l.shape[0]:
                    feed_dict = dict()
                    feed_dict[model.user_input] = train_l[start:(start +
                                                                 batch_size)]
                    feed_dict[model.item_input] = train_r[start:(start +
                                                                 batch_size)]
                    feed_dict[model.global_step] = epoch
                    _, loss, lr, l2_loss = sess.run([
                        model.optimizer, model.loss, model.learning_rate,
                        model.l2_loss
                    ], feed_dict)
                    start += batch_size
                    losses.append(loss)
                    lrs.append(lr)
                    l2_losss.append(l2_loss)
                print "epoch", epoch, ", loss", np.array(losses).mean(
                ), np.array(lrs).mean(), np.array(l2_losss).mean()
                #print lrs
                #eval(sess, model, data, item2idx, user2idx, idx2item, users, False)
                if (epoch + 1) % verbose == 0:
                    eval(sess, model, data, item2idx, user2idx, idx2item,
                         users)
                    #train_l, train_r = generate_input(data.train, item2idx, user2idx, num_neg)
                    #save(model.user_bias.eval(), 'user_bias.epoch' + str(epoch+1))
            #saver.save(sess, "./model2/music_GCNEncoder_model.ckpt")

        print "training for completion for itemmaps by using usermap"
        ## get usermap and complete the lossed ens of the items
        # get usermap embedding and itemmap embedding
        user_map_embedding = model.read_usermap_value().eval(
        )  # shape (n, k, d)
        item_map_embedding = copy.copy(idx_images)
        # get the item-user connection, idx
        i_us = get_i_us(data.train, item2idx, user2idx)
        # do completion
        for iidx in range(item_map_embedding.shape[0]):
            item_map = item_map_embedding[iidx]
            item_flag = np.sum(item_map, axis=-1)
            for k in range(item_flag.shape[0]):
                if iidx in i_us and item_flag[k] <= 0.0:
                    item_map_embedding[iidx, k] = np.mean(
                        user_map_embedding[i_us[iidx], k], axis=0)
        # updata the itemmap embedding
        #print item_map_embedding[0,:3,]
        sess.run(tf.assign(model.item_map_embedding, item_map_embedding))

        for epoch in range(n_epochs):
            if resample == True:
                #user_negs = data.neg_sample_pop(idx2item, pop, pop_items)
                #user_negs = data.neg_sample(idx2item)
                train_l, train_r = generate_input_fast(data.train, item2idx,
                                                       user2idx, num_neg)
            losses, lrs, l2_losss = [], [], []
            start = 0
            while start < train_l.shape[0]:
                feed_dict = dict()
                feed_dict[model.user_input] = train_l[start:(start +
                                                             batch_size)]
                feed_dict[model.item_input] = train_r[start:(start +
                                                             batch_size)]
                feed_dict[model.global_step] = epoch
                _, loss, lr, l2_loss = sess.run([
                    model.optimizer, model.loss, model.learning_rate,
                    model.l2_loss
                ], feed_dict)
                start += batch_size
                losses.append(loss)
                lrs.append(lr)
                l2_losss.append(l2_loss)
            print "epoch", epoch, ", loss", np.array(losses).mean(), np.array(
                lrs).mean(), np.array(l2_losss).mean()
            #print lrs
            #eval(sess, model, data, item2idx, user2idx, idx2item, users, False)
            if (epoch + 1) % verbose == 0:
                eval(sess, model, data, item2idx, user2idx, idx2item, users)
示例#2
0
def training(n_epochs=40,
             batch_size=256,
             num_neg=4,
             resample=True,
             verbose=10):
    args = parse_args()
    data = Dataset('../data/ml-1m/')
    idx_images, idx2item = data.item2image_u2a()
    #idx_images, idx2item = data.item2image()
    #idx_images = idx_images [:, -1:, :]
    #print idx_images[0]
    users = data.train['UserId'].unique()
    user2idx = pd.Series(data=np.arange(len(users)), index=users)
    item2idx = pd.Series(data=np.arange(len(idx2item)), index=idx2item)
    start_time = time.time()
    train_l, train_r = generate_input_fast(data.train, item2idx, user2idx,
                                           num_neg)
    print "generating data needed for 1 epoch", time.time() - start_time, 's'
    n_items, n_factors, embedding = idx_images.shape

    print n_items, len(idx2item)
    print "the shape of images is", idx_images.shape
    model = Ourmodel(n_items, len(users), n_factors, idx_images, embedding,
                     0.01)
    model.build_graph()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        print "initialized"

        #initialize for training batches
        #feed_dict[model.user_input] = train_l
        #feed_dict[model.item_input] = train_r

        #eval(sess, model, data, item2idx, user2idx, idx2item, users, False)
        #save(model.user_bias.eval(), 'user_bias.epoch0')
        #save_UI_simMap(sess, model, train_l, train_r, 'train_DT.pkl')
        if args.pretrain == True:
            print "load the pretrain model"
            saver.restore(sess, "./model/model.ckpt")
            #eval(sess, model, data, item2idx, user2idx, idx2item, users)
            #save_UI_simMap(sess, model, train_l, train_r, 'train_DT.pkl')
            eval(sess, model, data, item2idx, user2idx, idx2item, users, True)
            return

        for epoch in range(n_epochs):
            if resample == True:
                #user_negs = data.neg_sample_pop(idx2item, pop, pop_items)
                #user_negs = data.neg_sample(idx2item)
                train_l, train_r = generate_input_fast(data.train, item2idx,
                                                       user2idx, num_neg)
            losses, lrs, l2_losss = [], [], []
            start = 0
            while start < train_l.shape[0]:
                feed_dict = dict()
                feed_dict[model.user_input] = train_l[start:(start +
                                                             batch_size)]
                feed_dict[model.item_input] = train_r[start:(start +
                                                             batch_size)]
                feed_dict[model.global_step] = epoch
                '''print epoch, start, start+batch_size
                print train_l[start:(start+batch_size)]
                print train_r[start:(start+batch_size)]'''
                _, loss, lr, l2_loss = sess.run([
                    model.optimizer, model.loss, model.learning_rate,
                    model.l2_loss
                ], feed_dict)
                '''print loss'''
                start += batch_size
                losses.append(loss)
                lrs.append(lr)
                l2_losss.append(l2_loss)
            print "epoch", epoch, ", loss", np.array(losses).mean(), np.array(
                lrs).mean(), np.array(l2_losss).mean()
            #print lrs
            #eval(sess, model, data, item2idx, user2idx, idx2item, users, False)
            if (epoch + 1) % verbose == 0:
                eval(sess, model, data, item2idx, user2idx, idx2item, users)
                #train_l, train_r = generate_input(data.train, item2idx, user2idx, num_neg)
                #save(model.user_bias.eval(), 'user_bias.epoch' + str(epoch+1))
        #save_UI_simMap(sess, model, train_l, train_r, 'train_DT.pkl')
        #eval(sess, model, data, item2idx, user2idx, idx2item, users, True)
        #eval(sess, model, data, item2idx, user2idx, idx2item, users)
        saver.save(sess, "./model/usermap.ckpt")