def __init__(self, user_size, item_size, size, batch_size, learning_rate, learning_rate_decay_factor, user_attributes=None, item_attributes=None, item_ind2logit_ind=None, logit_ind2item_ind=None, n_input_items=0, loss_function='ce', logit_size_test=None, dropout=1.0, top_N_items=100, use_sep_item=True, n_sampled=None, output_feat=1, indices_item=None, dtype=tf.float32): self.user_size = user_size self.item_size = item_size self.top_N_items = top_N_items if user_attributes is not None: user_attributes.set_model_size(size) self.user_attributes = user_attributes if item_attributes is not None: item_attributes.set_model_size(size) self.item_attributes = item_attributes self.item_ind2logit_ind = item_ind2logit_ind self.logit_ind2item_ind = logit_ind2item_ind if logit_ind2item_ind is not None: self.logit_size = len(logit_ind2item_ind) if indices_item is not None: self.indices_item = indices_item else: self.indices_item = range(self.logit_size) self.logit_size_test = logit_size_test self.loss_function = loss_function self.n_input_items = n_input_items self.n_sampled = n_sampled self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.att_emb = None self.dtype = dtype mb = self.batch_size ''' this is mapped item target ''' self.item_target = tf.placeholder(tf.int32, shape=[mb], name="item") self.item_id_target = tf.placeholder(tf.int32, shape=[mb], name="item_id") self.dropout = dropout self.keep_prob = tf.constant(dropout, dtype=dtype) # tf.placeholder(tf.float32, name='keep_prob') n_input = max(n_input_items, 1) m = embed_attribute.EmbeddingAttribute( user_attributes, item_attributes, mb, self.n_sampled, n_input, use_sep_item, item_ind2logit_ind, logit_ind2item_ind) self.att_emb = m embedded_user, _ = m.get_batch_user(1.0, False) embedded_items = [] for i in range(n_input): embedded_item, _ = m.get_batch_item('input{}'.format(i), batch_size) embedded_item = tf.reduce_mean(embedded_item, 0) embedded_items.append(embedded_item) print("non-sampled prediction") input_embed = tf.reduce_mean([embedded_user, embedded_items[0]], 0) input_embed = tf.nn.dropout(input_embed, self.keep_prob) logits = m.get_prediction(input_embed, output_feat=output_feat) if self.n_input_items == 0: input_embed_test = embedded_user else: # including two cases: 1, n items. 2, end_line item # input_embed_test = [embedded_user] + embedded_items # input_embed_test = tf.reduce_mean(input_embed_test, 0) input_embed_test = [embedded_user ] + [tf.reduce_mean(embedded_items, 0)] input_embed_test = tf.reduce_mean(input_embed_test, 0) logits_test = m.get_prediction(input_embed_test, output_feat=output_feat) # mini batch version print("sampled prediction") if self.n_sampled is not None: sampled_logits = m.get_prediction(input_embed, 'sampled', output_feat=output_feat) # embedded_item, item_b = m.get_sampled_item(self.n_sampled) # sampled_logits = tf.matmul(embedded_user, tf.transpose(embedded_item)) + item_b target_score = m.get_target_score(input_embed, self.item_id_target) loss = self.loss_function if loss in ['warp', 'ce', 'bbpr']: batch_loss = m.compute_loss(logits, self.item_target, loss) batch_loss_test = m.compute_loss(logits_test, self.item_target, loss) elif loss in ['mw']: batch_loss = m.compute_loss(sampled_logits, target_score, loss) batch_loss_eval = m.compute_loss(logits, self.item_target, 'warp') else: print("not implemented!") exit(-1) if loss in ['warp', 'mw', 'bbpr']: self.set_mask, self.reset_mask = m.get_warp_mask() self.loss = tf.reduce_mean(batch_loss) # self.loss_eval = tf.reduce_mean(batch_loss_eval) if loss == 'mw' else self.loss self.loss_test = tf.reduce_mean(batch_loss_test) # Gradients and SGD update operation for training the model. params = tf.trainable_variables() opt = tf.train.AdagradOptimizer(self.learning_rate) # opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(self.loss, params) self.updates = opt.apply_gradients(zip(gradients, params), global_step=self.global_step) self.output = logits_test values, self.indices = tf.nn.top_k(self.output, self.top_N_items, sorted=True) self.saver = tf.train.Saver(tf.all_variables())
def __init__(self, user_size, item_size, size, num_layers, batch_size, learning_rate, learning_rate_decay_factor, user_attributes=None, item_attributes=None, item_ind2logit_ind=None, logit_ind2item_ind=None, loss_function='ce', GPU=None, logit_size_test=None, nonlinear=None, dropout=1.0, n_sampled=None, indices_item=None, dtype=tf.float32, top_N_items=100, hidden_size=500): self.user_size = user_size self.item_size = item_size self.top_N_items = top_N_items if user_attributes is not None: user_attributes.set_model_size(size) self.user_attributes = user_attributes if item_attributes is not None: item_attributes.set_model_size(size) self.item_attributes = item_attributes self.item_ind2logit_ind = item_ind2logit_ind self.logit_ind2item_ind = logit_ind2item_ind if logit_ind2item_ind is not None: self.logit_size = len(logit_ind2item_ind) if indices_item is not None: self.indices_item = indices_item else: self.indices_item = range(self.logit_size) self.logit_size_test = logit_size_test self.nonlinear = nonlinear self.loss_function = loss_function self.n_sampled = n_sampled self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.att_emb = None self.dtype=dtype self.data_length = None self.train_permutation = None self.start_index = None mb = self.batch_size ''' this is mapped item target ''' self.item_target = tf.placeholder(tf.int32, shape = [mb], name = "item") self.item_id_target = tf.placeholder(tf.int32, shape = [mb], name = "item_id") self.dropout = dropout self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') m = embed_attribute.EmbeddingAttribute(user_attributes, item_attributes, mb, self.n_sampled, 0, False, item_ind2logit_ind, logit_ind2item_ind) self.att_emb = m embedded_user, user_b = m.get_batch_user(self.keep_prob, False) if self.nonlinear in ['relu', 'tanh']: act = tf.nn.relu if self.nonlinear == 'relu' else tf.tanh w1 = tf.get_variable('w1', [size, hidden_size], dtype=self.dtype) b1 = tf.get_variable('b1', [hidden_size], dtype=self.dtype) w2 = tf.get_variable('w2', [hidden_size, size], dtype=self.dtype) b2 = tf.get_variable('b2', [size], dtype=self.dtype) embedded_user, user_b = m.get_batch_user(1.0, False) h0 = tf.nn.dropout(act(embedded_user), self.keep_prob) h1 = act(tf.matmul(h0, w1) + b1) h1 = tf.nn.dropout(h1, self.keep_prob) h2 = act(tf.matmul(h1, w2) + b2) embedded_user = tf.nn.dropout(h2, self.keep_prob) pos_embs_item, pos_item_b = m.get_batch_item('pos', batch_size) pos_embs_item = tf.reduce_mean(pos_embs_item, 0) neg_embs_item, neg_item_b = m.get_batch_item('neg', batch_size) neg_embs_item = tf.reduce_mean(neg_embs_item, 0) # print('debug: user, item dim', embedded_user.get_shape(), neg_embs_item.get_shape()) print("construct postive/negative items/scores \n(for bpr loss, AUC)") self.pos_score = tf.reduce_sum(tf.multiply(embedded_user, pos_embs_item), 1) + pos_item_b self.neg_score = tf.reduce_sum(tf.multiply(embedded_user, neg_embs_item), 1) + neg_item_b neg_pos = self.neg_score - self.pos_score self.auc = 0.5 - 0.5 * tf.reduce_mean(tf.sign(neg_pos)) # mini batch version if self.n_sampled is not None: print("sampled prediction") sampled_logits = m.get_prediction(embedded_user, 'sampled') # embedded_item, item_b = m.get_sampled_item(self.n_sampled) # sampled_logits = tf.matmul(embedded_user, tf.transpose(embedded_item)) + item_b target_score = m.get_target_score(embedded_user, self.item_id_target) print("non-sampled prediction") logits = m.get_prediction(embedded_user) loss = self.loss_function if loss in ['warp', 'ce', 'rs', 'rs-sig', 'bbpr']: batch_loss = m.compute_loss(logits, self.item_target, loss) elif loss in ['warp_eval']: batch_loss, batch_rank = m.compute_loss(logits, self.item_target, loss) elif loss in ['mw']: # batch_loss = m.compute_loss(sampled_logits, self.pos_score, loss) batch_loss = m.compute_loss(sampled_logits, target_score, loss) batch_loss_eval = m.compute_loss(logits, self.item_target, 'warp') elif loss in ['bpr', 'bpr-hinge']: batch_loss = m.compute_loss(neg_pos, self.item_target, loss) else: print("not implemented!") exit(-1) if loss in ['warp', 'warp_eval', 'mw', 'rs', 'rs-sig', 'bbpr']: self.set_mask, self.reset_mask = m.get_warp_mask() self.loss = tf.reduce_mean(batch_loss) self.batch_loss = batch_loss if loss in ['warp_eval']: self.batch_rank = batch_rank self.loss_eval = tf.reduce_mean(batch_loss_eval) if loss == 'mw' else self.loss # Gradients and SGD update operation for training the model. params = tf.trainable_variables() opt = tf.train.AdagradOptimizer(self.learning_rate) # opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(self.loss, params) self.updates = opt.apply_gradients( zip(gradients, params), global_step=self.global_step) self.output = logits values, self.indices= tf.nn.top_k(self.output, self.top_N_items, sorted=True) # self.saver = tf.train.Saver(tf.global_variables()) self.saver = tf.train.Saver(tf.global_variables())
def get_data(raw_data, data_dir=FLAGS.data_dir, combine_att=FLAGS.combine_att, logits_size_tr=FLAGS.item_vocab_size, thresh=FLAGS.vocab_min_thresh, use_user_feature=FLAGS.use_user_feature, test=FLAGS.test, mylog=mylog, use_item_feature=FLAGS.use_item_feature, recommend=False): (data_tr, data_va, u_attr, i_attr, item_ind2logit_ind, logit_ind2item_ind, user_index, item_index) = read_attributed_data(raw_data_dir=raw_data, data_dir=data_dir, combine_att=combine_att, logits_size_tr=logits_size_tr, thresh=thresh, use_user_feature=use_user_feature, use_item_feature=use_item_feature, test=test, mylog=mylog) # remove unk data_tr = [p for p in data_tr if (p[1] in item_ind2logit_ind)] # remove items before week 40 if FLAGS.after40: data_tr = [p for p in data_tr if (to_week(p[2]) >= 40)] # item frequency (for sampling) item_population, p_item = item_frequency(data_tr, FLAGS.power) # UNK and START # print(len(item_ind2logit_ind)) # print(len(logit_ind2item_ind)) # print(len(item_index)) START_ID = len(item_index) # START_ID = i_attr.get_item_last_index() item_ind2logit_ind[START_ID] = 0 seq_all = form_sequence(data_tr, maxlen=FLAGS.L) seq_tr0, seq_va0 = split_train_dev(seq_all, ratio=0.05) # calculate buckets global _buckets _buckets = calculate_buckets(seq_tr0 + seq_va0, FLAGS.L, FLAGS.n_bucket) _buckets = sorted(_buckets) # split_buckets seq_tr = split_buckets(seq_tr0, _buckets) seq_va = split_buckets(seq_va0, _buckets) # get test data if recommend: from evaluate import Evaluation as Evaluate evaluation = Evaluate(raw_data, test=test) uids = evaluation.get_uinds() # abuse of 'uids' : actually uinds seq_test = form_sequence_prediction(seq_all, uids, FLAGS.L, START_ID) _buckets = calculate_buckets(seq_test, FLAGS.L, FLAGS.n_bucket) _buckets = sorted(_buckets) seq_test = split_buckets(seq_test, _buckets) else: seq_test = [] evaluation = None uids = [] # create embedAttr devices = get_device_address(FLAGS.N) with tf.device(devices[0]): u_attr.set_model_size(FLAGS.size) i_attr.set_model_size(FLAGS.size) # if not FLAGS.use_item_feature: # mylog("NOT using item attributes") # i_attr.num_features_cat = 1 # i_attr.num_features_mulhot = 0 # if not FLAGS.use_user_feature: # mylog("NOT using user attributes") # u_attr.num_features_cat = 1 # u_attr.num_features_mulhot = 0 embAttr = embed_attribute.EmbeddingAttribute(u_attr, i_attr, FLAGS.batch_size, FLAGS.n_sampled, _buckets[-1], FLAGS.use_sep_item, item_ind2logit_ind, logit_ind2item_ind, devices=devices) if FLAGS.loss in ["warp", 'mw']: prepare_warp(embAttr, seq_tr0, seq_va0) return seq_tr, seq_va, seq_test, embAttr, START_ID, item_population, p_item, evaluation, uids, user_index, item_index, logit_ind2item_ind