def __init__(self, queue_size, epoch_num, vocabulary_size, vocabulary_block_num, hash_feature_id, factor_num, init_value_range, loss_type, optimizer, batch_size, factor_lambda, bias_lambda): with self.main_ps_device(): self.file_queue = tf.FIFOQueue(queue_size, [tf.int32, tf.bool, tf.string, tf.string], shared_name='global_queue') with self.default_device(): self.finished_worker_num = tf.Variable(0) self.incre_finshed_worker_num = self.finished_worker_num.assign_add(1, True) self.model_loaded = tf.Variable(False) self.set_model_loaded = self.model_loaded.assign(True) self.training_stat = [] self.validation_stat = [] for i in range(epoch_num): self.training_stat.append(ModelStat('training_%d' % i)) self.validation_stat.append(ModelStat('validation_%d' % i)) self.epoch_id = tf.placeholder(dtype=tf.int32) self.is_training = tf.placeholder(dtype=tf.bool) self.data_file = tf.placeholder(dtype=tf.string) self.weight_file = tf.placeholder(dtype=tf.string) self.file_enqueue_op = self.file_queue.enqueue( (self.epoch_id, self.is_training, self.data_file, self.weight_file)) self.file_dequeue_op = self.file_queue.dequeue() self.file_close_queue_op = self.file_queue.close() self.vocab_blocks = [] vocab_size_per_block = int(vocabulary_size / vocabulary_block_num + 1) for i in range(vocabulary_block_num): self.vocab_blocks.append(tf.Variable( tf.random_uniform([vocab_size_per_block, factor_num + 1], -init_value_range, init_value_range), name='vocab_block_%d' % i)) self.file_id = tf.placeholder(dtype=tf.int32) labels, weights, ori_ids, feature_ids, feature_vals, feature_poses = fm_ops.fm_parser(self.file_id, self.data_file, self.weight_file, batch_size, vocabulary_size, hash_feature_id) self.example_num = tf.size(labels) local_params = tf.nn.embedding_lookup(self.vocab_blocks, ori_ids) self.pred_score, reg_score = fm_ops.fm_scorer(feature_ids, local_params, feature_vals, feature_poses, factor_lambda, bias_lambda) if loss_type == 'logistic': self.loss = tf.reduce_sum( weights * tf.nn.sigmoid_cross_entropy_with_logits(logits=self.pred_score, labels=labels)) elif loss_type == 'mse': self.loss = tf.reduce_sum(weights * tf.square(self.pred_score - labels)) else: self.loss = None if optimizer != None: self.opt = optimizer.minimize(self.loss + reg_score) self.init_vars = tf.initialize_all_variables() self.saver = tf.train.Saver(self.vocab_blocks)
def predict(sess, predict_files, score_path, model_path, model_version, batch_size, vocabulary_size, hash_feature_id): with sess as sess: if not os.path.exists(score_path): os.mkdir(score_path) export_path = os.path.join(tf.compat.as_bytes(model_path), tf.compat.as_bytes(model_version)) meta_graph_def = tf.saved_model.loader.load( sess, [tf.saved_model.tag_constants.SERVING], export_path) signature = meta_graph_def.signature_def signature_key = 'predict_score' ori_ids_tensor_name = signature[signature_key].inputs['ori_ids'].name feature_ids_tensor_name = signature[signature_key].inputs[ 'feature_ids'].name feature_vals_tensor_name = signature[signature_key].inputs[ 'feature_vals'].name feature_pos_tensor_name = signature[signature_key].inputs[ 'feature_pos'].name pred_score_tensor_name = signature[signature_key].outputs[ 'pred_score'].name ori_ids = sess.graph.get_tensor_by_name(ori_ids_tensor_name) feature_ids = sess.graph.get_tensor_by_name(feature_ids_tensor_name) feature_vals = sess.graph.get_tensor_by_name(feature_vals_tensor_name) feature_poses = sess.graph.get_tensor_by_name(feature_pos_tensor_name) pred_score = sess.graph.get_tensor_by_name(pred_score_tensor_name) file_id = tf.placeholder(dtype=tf.int32) data_file = tf.placeholder(dtype=tf.string) weight_file = tf.placeholder(dtype=tf.string) try: fid = 0 for fname in predict_files: score_file = score_path + '/' + os.path.basename( fname) + '.score' print 'Start processing %s, scores written to %s ...' % ( fname, score_file) labels_t, weights_t, ori_ids_t, feature_ids_t, feature_vals_t, feature_poses_t = fm_ops.fm_parser( file_id, data_file, weight_file, batch_size, vocabulary_size, hash_feature_id) with open(score_file, 'w') as o: while True: labels_, weights_, ori_ids_, feature_ids_, feature_vals_, feature_poses_ = sess.run( [ labels_t, weights_t, ori_ids_t, feature_ids_t, feature_vals_t, feature_poses_t ], feed_dict={ file_id: fid, data_file: fname, weight_file: '' }) if len(labels_) == 0: break instance_score = sess.run( pred_score, { ori_ids: ori_ids_, feature_ids: feature_ids_, feature_vals: feature_vals_, feature_poses: feature_poses_ }) for score in instance_score: o.write(str(score) + '\n') fid += 1 except tf.errors.OutOfRangeError: pass