def training(n_epochs=40, batch_size=256, num_neg=4, resample=True, verbose=10): args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu data = Dataset(args.path) idx_images, idx2item = data.item2image_u2a() #idx_images, idx2item = data.item2image() #idx_images = idx_images [:, -1:, :] #print idx_images[0] #exit(0) users = data.train['UserId'].unique() user2idx = pd.Series(data=np.arange(len(users)), index=users) item2idx = pd.Series(data=np.arange(len(idx2item)), index=idx2item) n_items, n_factors, embedding = idx_images.shape print n_items, len(idx2item) print "the shape of images is", idx_images.shape model = Ourmodel(n_items, len(users), n_factors, idx_images, embedding, 0.01) model.build_graph() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() print "initialized" if args.pretrain == True: print "load the pretrain model" saver.restore(sess, "./model2/music_GCNEncoder_model.ckpt") # usermap #saver.restore(sess, "./model/usermap.ckpt") eval(sess, model, data, item2idx, user2idx, idx2item, users, True) #return else: start_time = time.time() train_l, train_r = generate_input_fast(data.train, item2idx, user2idx, num_neg) print "generating data needed for 1 epoch", time.time( ) - start_time, 's' print "training for learning usermape" for epoch in range(n_epochs): if resample == True: #user_negs = data.neg_sample_pop(idx2item, pop, pop_items) #user_negs = data.neg_sample(idx2item) train_l, train_r = generate_input_fast( data.train, item2idx, user2idx, num_neg) losses, lrs, l2_losss = [], [], [] start = 0 while start < train_l.shape[0]: feed_dict = dict() feed_dict[model.user_input] = train_l[start:(start + batch_size)] feed_dict[model.item_input] = train_r[start:(start + batch_size)] feed_dict[model.global_step] = epoch _, loss, lr, l2_loss = sess.run([ model.optimizer, model.loss, model.learning_rate, model.l2_loss ], feed_dict) start += batch_size losses.append(loss) lrs.append(lr) l2_losss.append(l2_loss) print "epoch", epoch, ", loss", np.array(losses).mean( ), np.array(lrs).mean(), np.array(l2_losss).mean() #print lrs #eval(sess, model, data, item2idx, user2idx, idx2item, users, False) if (epoch + 1) % verbose == 0: eval(sess, model, data, item2idx, user2idx, idx2item, users) #train_l, train_r = generate_input(data.train, item2idx, user2idx, num_neg) #save(model.user_bias.eval(), 'user_bias.epoch' + str(epoch+1)) #saver.save(sess, "./model2/music_GCNEncoder_model.ckpt") print "training for completion for itemmaps by using usermap" ## get usermap and complete the lossed ens of the items # get usermap embedding and itemmap embedding user_map_embedding = model.read_usermap_value().eval( ) # shape (n, k, d) item_map_embedding = copy.copy(idx_images) # get the item-user connection, idx i_us = get_i_us(data.train, item2idx, user2idx) # do completion for iidx in range(item_map_embedding.shape[0]): item_map = item_map_embedding[iidx] item_flag = np.sum(item_map, axis=-1) for k in range(item_flag.shape[0]): if iidx in i_us and item_flag[k] <= 0.0: item_map_embedding[iidx, k] = np.mean( user_map_embedding[i_us[iidx], k], axis=0) # updata the itemmap embedding #print item_map_embedding[0,:3,] sess.run(tf.assign(model.item_map_embedding, item_map_embedding)) for epoch in range(n_epochs): if resample == True: #user_negs = data.neg_sample_pop(idx2item, pop, pop_items) #user_negs = data.neg_sample(idx2item) train_l, train_r = generate_input_fast(data.train, item2idx, user2idx, num_neg) losses, lrs, l2_losss = [], [], [] start = 0 while start < train_l.shape[0]: feed_dict = dict() feed_dict[model.user_input] = train_l[start:(start + batch_size)] feed_dict[model.item_input] = train_r[start:(start + batch_size)] feed_dict[model.global_step] = epoch _, loss, lr, l2_loss = sess.run([ model.optimizer, model.loss, model.learning_rate, model.l2_loss ], feed_dict) start += batch_size losses.append(loss) lrs.append(lr) l2_losss.append(l2_loss) print "epoch", epoch, ", loss", np.array(losses).mean(), np.array( lrs).mean(), np.array(l2_losss).mean() #print lrs #eval(sess, model, data, item2idx, user2idx, idx2item, users, False) if (epoch + 1) % verbose == 0: eval(sess, model, data, item2idx, user2idx, idx2item, users)
def training(n_epochs=40, batch_size=256, num_neg=4, resample=True, verbose=10): args = parse_args() data = Dataset('../data/ml-1m/') idx_images, idx2item = data.item2image_u2a() #idx_images, idx2item = data.item2image() #idx_images = idx_images [:, -1:, :] #print idx_images[0] users = data.train['UserId'].unique() user2idx = pd.Series(data=np.arange(len(users)), index=users) item2idx = pd.Series(data=np.arange(len(idx2item)), index=idx2item) start_time = time.time() train_l, train_r = generate_input_fast(data.train, item2idx, user2idx, num_neg) print "generating data needed for 1 epoch", time.time() - start_time, 's' n_items, n_factors, embedding = idx_images.shape print n_items, len(idx2item) print "the shape of images is", idx_images.shape model = Ourmodel(n_items, len(users), n_factors, idx_images, embedding, 0.01) model.build_graph() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() print "initialized" #initialize for training batches #feed_dict[model.user_input] = train_l #feed_dict[model.item_input] = train_r #eval(sess, model, data, item2idx, user2idx, idx2item, users, False) #save(model.user_bias.eval(), 'user_bias.epoch0') #save_UI_simMap(sess, model, train_l, train_r, 'train_DT.pkl') if args.pretrain == True: print "load the pretrain model" saver.restore(sess, "./model/model.ckpt") #eval(sess, model, data, item2idx, user2idx, idx2item, users) #save_UI_simMap(sess, model, train_l, train_r, 'train_DT.pkl') eval(sess, model, data, item2idx, user2idx, idx2item, users, True) return for epoch in range(n_epochs): if resample == True: #user_negs = data.neg_sample_pop(idx2item, pop, pop_items) #user_negs = data.neg_sample(idx2item) train_l, train_r = generate_input_fast(data.train, item2idx, user2idx, num_neg) losses, lrs, l2_losss = [], [], [] start = 0 while start < train_l.shape[0]: feed_dict = dict() feed_dict[model.user_input] = train_l[start:(start + batch_size)] feed_dict[model.item_input] = train_r[start:(start + batch_size)] feed_dict[model.global_step] = epoch '''print epoch, start, start+batch_size print train_l[start:(start+batch_size)] print train_r[start:(start+batch_size)]''' _, loss, lr, l2_loss = sess.run([ model.optimizer, model.loss, model.learning_rate, model.l2_loss ], feed_dict) '''print loss''' start += batch_size losses.append(loss) lrs.append(lr) l2_losss.append(l2_loss) print "epoch", epoch, ", loss", np.array(losses).mean(), np.array( lrs).mean(), np.array(l2_losss).mean() #print lrs #eval(sess, model, data, item2idx, user2idx, idx2item, users, False) if (epoch + 1) % verbose == 0: eval(sess, model, data, item2idx, user2idx, idx2item, users) #train_l, train_r = generate_input(data.train, item2idx, user2idx, num_neg) #save(model.user_bias.eval(), 'user_bias.epoch' + str(epoch+1)) #save_UI_simMap(sess, model, train_l, train_r, 'train_DT.pkl') #eval(sess, model, data, item2idx, user2idx, idx2item, users, True) #eval(sess, model, data, item2idx, user2idx, idx2item, users) saver.save(sess, "./model/usermap.ckpt")